diff --git a/Dockerfile b/Dockerfile index 279c54190e289e2b2cb353826351a9f5983d2d83..75e396f126fd1f76adb97643a5d0a91a506ab58f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,6 @@ FROM python:3.11 WORKDIR /code COPY ./requirements.txt /code/requirements.txt COPY ./pre-requirements.txt /code/pre-requirements.txt -COPY ./pytorch3d /code/pytorch3d RUN pip install --no-cache-dir -r /code/pre-requirements.txt RUN pip install --no-cache-dir -r /code/requirements.txt diff --git a/detectron2/.clang-format b/detectron2/.clang-format deleted file mode 100644 index 39b1b3d603ed0cf6b7f94c9c08067f148f35613f..0000000000000000000000000000000000000000 --- a/detectron2/.clang-format +++ /dev/null @@ -1,85 +0,0 @@ -AccessModifierOffset: -1 -AlignAfterOpenBracket: AlwaysBreak -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false -AlignEscapedNewlinesLeft: true -AlignOperands: false -AlignTrailingComments: false -AllowAllParametersOfDeclarationOnNextLine: false -AllowShortBlocksOnASingleLine: false -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: Empty -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: true -AlwaysBreakTemplateDeclarations: true -BinPackArguments: false -BinPackParameters: false -BraceWrapping: - AfterClass: false - AfterControlStatement: false - AfterEnum: false - AfterFunction: false - AfterNamespace: false - AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false - BeforeCatch: false - BeforeElse: false - IndentBraces: false -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Attach -BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false -BreakAfterJavaFieldAnnotations: false -BreakStringLiterals: false -ColumnLimit: 80 -CommentPragmas: '^ IWYU pragma:' -ConstructorInitializerAllOnOneLineOrOnePerLine: true -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: true -DerivePointerAlignment: false -DisableFormat: false -ForEachMacros: [ FOR_EACH, FOR_EACH_R, FOR_EACH_RANGE, ] -IncludeCategories: - - Regex: '^<.*\.h(pp)?>' - Priority: 1 - - Regex: '^<.*' - Priority: 2 - - Regex: '.*' - Priority: 3 -IndentCaseLabels: true -IndentWidth: 2 -IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: None -ObjCBlockIndentWidth: 2 -ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: false -PenaltyBreakBeforeFirstCallParameter: 1 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 200 -PointerAlignment: Left -ReflowComments: true -SortIncludes: true -SpaceAfterCStyleCast: false -SpaceBeforeAssignmentOperators: true -SpaceBeforeParens: ControlStatements -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInAngles: false -SpacesInContainerLiterals: true -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -Standard: Cpp11 -TabWidth: 8 -UseTab: Never diff --git a/detectron2/.flake8 b/detectron2/.flake8 deleted file mode 100644 index 28881e488263c5693835063be9455f2fb1fdc849..0000000000000000000000000000000000000000 --- a/detectron2/.flake8 +++ /dev/null @@ -1,15 +0,0 @@ -# This is an example .flake8 config, used when developing *Black* itself. -# Keep in sync with setup.cfg which is used for source packages. - -[flake8] -ignore = W503, E203, E221, C901, C408, E741, C407, B017, F811, C101, EXE001, EXE002 -max-line-length = 100 -max-complexity = 18 -select = B,C,E,F,W,T4,B9 -exclude = build -per-file-ignores = - **/__init__.py:F401,F403,E402 - **/configs/**.py:F401,E402 - configs/**.py:F401,E402 - **/tests/config/**.py:F401,E402 - tests/config/**.py:F401,E402 diff --git a/detectron2/.github/CODE_OF_CONDUCT.md b/detectron2/.github/CODE_OF_CONDUCT.md deleted file mode 100644 index 0f7ad8bfc173eac554f0b6ef7c684861e8014bbe..0000000000000000000000000000000000000000 --- a/detectron2/.github/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,5 +0,0 @@ -# Code of Conduct - -Facebook has adopted a Code of Conduct that we expect project participants to adhere to. -Please read the [full text](https://code.fb.com/codeofconduct/) -so that you can understand what actions will and will not be tolerated. diff --git a/detectron2/.github/CONTRIBUTING.md b/detectron2/.github/CONTRIBUTING.md deleted file mode 100644 index 9bab709cae689ba3b92dd52f7fbcc0c6926f4a38..0000000000000000000000000000000000000000 --- a/detectron2/.github/CONTRIBUTING.md +++ /dev/null @@ -1,68 +0,0 @@ -# Contributing to detectron2 - -## Issues -We use GitHub issues to track public bugs and questions. -Please make sure to follow one of the -[issue templates](https://github.com/facebookresearch/detectron2/issues/new/choose) -when reporting any issues. - -Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe -disclosure of security bugs. In those cases, please go through the process -outlined on that page and do not file a public issue. - -## Pull Requests -We actively welcome pull requests. - -However, if you're adding any significant features (e.g. > 50 lines), please -make sure to discuss with maintainers about your motivation and proposals in an issue -before sending a PR. This is to save your time so you don't spend time on a PR that we'll not accept. - -We do not always accept new features, and we take the following -factors into consideration: - -1. Whether the same feature can be achieved without modifying detectron2. - Detectron2 is designed so that you can implement many extensions from the outside, e.g. - those in [projects](https://github.com/facebookresearch/detectron2/tree/master/projects). - * If some part of detectron2 is not extensible enough, you can also bring up a more general issue to - improve it. Such feature request may be useful to more users. -2. Whether the feature is potentially useful to a large audience (e.g. an impactful detection paper, a popular dataset, - a significant speedup, a widely useful utility), - or only to a small portion of users (e.g., a less-known paper, an improvement not in the object - detection field, a trick that's not very popular in the community, code to handle a non-standard type of data) - * Adoption of additional models, datasets, new task are by default not added to detectron2 before they - receive significant popularity in the community. - We sometimes accept such features in `projects/`, or as a link in `projects/README.md`. -3. Whether the proposed solution has a good design / interface. This can be discussed in the issue prior to PRs, or - in the form of a draft PR. -4. Whether the proposed solution adds extra mental/practical overhead to users who don't - need such feature. -5. Whether the proposed solution breaks existing APIs. - -To add a feature to an existing function/class `Func`, there are always two approaches: -(1) add new arguments to `Func`; (2) write a new `Func_with_new_feature`. -To meet the above criteria, we often prefer approach (2), because: - -1. It does not involve modifying or potentially breaking existing code. -2. It does not add overhead to users who do not need the new feature. -3. Adding new arguments to a function/class is not scalable w.r.t. all the possible new research ideas in the future. - -When sending a PR, please do: - -1. If a PR contains multiple orthogonal changes, split it to several PRs. -2. If you've added code that should be tested, add tests. -3. For PRs that need experiments (e.g. adding a new model or new methods), - you don't need to update model zoo, but do provide experiment results in the description of the PR. -4. If APIs are changed, update the documentation. -5. We use the [Google style docstrings](https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html) in python. -6. Make sure your code lints with `./dev/linter.sh`. - - -## Contributor License Agreement ("CLA") -In order to accept your pull request, we need you to submit a CLA. You only need -to do this once to work on any of Facebook's open source projects. - -Complete your CLA here: - -## License -By contributing to detectron2, you agree that your contributions will be licensed -under the LICENSE file in the root directory of this source tree. diff --git a/detectron2/.github/Detectron2-Logo-Horz.svg b/detectron2/.github/Detectron2-Logo-Horz.svg deleted file mode 100644 index eb2d643ddd940cd8bdb5eaad093029969ff2364c..0000000000000000000000000000000000000000 --- a/detectron2/.github/Detectron2-Logo-Horz.svg +++ /dev/null @@ -1 +0,0 @@ -Detectron2-Logo-Horz \ No newline at end of file diff --git a/detectron2/.github/ISSUE_TEMPLATE.md b/detectron2/.github/ISSUE_TEMPLATE.md deleted file mode 100644 index 5e8aaa2d3722e7e73a3d94b2b7dfc4f751d7a240..0000000000000000000000000000000000000000 --- a/detectron2/.github/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,5 +0,0 @@ - -Please select an issue template from -https://github.com/facebookresearch/detectron2/issues/new/choose . - -Otherwise your issue will be closed. diff --git a/detectron2/.github/ISSUE_TEMPLATE/bugs.md b/detectron2/.github/ISSUE_TEMPLATE/bugs.md deleted file mode 100644 index d0235c708ab6b0cdadb5865110e9e8c22ca313aa..0000000000000000000000000000000000000000 --- a/detectron2/.github/ISSUE_TEMPLATE/bugs.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -name: "πŸ› Bugs" -about: Report bugs in detectron2 -title: Please read & provide the following - ---- - -## Instructions To Reproduce the πŸ› Bug: -1. Full runnable code or full changes you made: -``` -If making changes to the project itself, please use output of the following command: -git rev-parse HEAD; git diff - - -``` -2. What exact command you run: -3. __Full logs__ or other relevant observations: -``` - -``` -4. please simplify the steps as much as possible so they do not require additional resources to - run, such as a private dataset. - -## Expected behavior: - -If there are no obvious error in "full logs" provided above, -please tell us the expected behavior. - -## Environment: - -Provide your environment information using the following command: -``` -wget -nc -q https://github.com/facebookresearch/detectron2/raw/main/detectron2/utils/collect_env.py && python collect_env.py -``` - -If your issue looks like an installation issue / environment issue, -please first try to solve it yourself with the instructions in -https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues diff --git a/detectron2/.github/ISSUE_TEMPLATE/config.yml b/detectron2/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index c60c2e14309be9a93293a64e7481f2a91385f76a..0000000000000000000000000000000000000000 --- a/detectron2/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,17 +0,0 @@ -# require an issue template to be chosen -blank_issues_enabled: false - -contact_links: - - name: How-To / All Other Questions - url: https://github.com/facebookresearch/detectron2/discussions - about: Use "github discussions" for community support on general questions that don't belong to the above issue categories - - name: Detectron2 Documentation - url: https://detectron2.readthedocs.io/index.html - about: Check if your question is answered in tutorials or API docs - -# Unexpected behaviors & bugs are split to two templates. -# When they are one template, users think "it's not a bug" and don't choose the template. -# -# But the file name is still "unexpected-problems-bugs.md" so that old references -# to this issue template still works. -# It's ok since this template should be a superset of "bugs.md" (unexpected behaviors is a superset of bugs) diff --git a/detectron2/.github/ISSUE_TEMPLATE/documentation.md b/detectron2/.github/ISSUE_TEMPLATE/documentation.md deleted file mode 100644 index 88214d62e5228639491e019c78bb4171d535cdd1..0000000000000000000000000000000000000000 --- a/detectron2/.github/ISSUE_TEMPLATE/documentation.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -name: "\U0001F4DA Documentation Issue" -about: Report a problem about existing documentation, comments, website or tutorials. -labels: documentation - ---- - -## πŸ“š Documentation Issue - -This issue category is for problems about existing documentation, not for asking how-to questions. - -* Provide a link to an existing documentation/comment/tutorial: - -* How should the above documentation/comment/tutorial improve: diff --git a/detectron2/.github/ISSUE_TEMPLATE/feature-request.md b/detectron2/.github/ISSUE_TEMPLATE/feature-request.md deleted file mode 100644 index 03a1e93d7293948042120b875af8be0c6964e59c..0000000000000000000000000000000000000000 --- a/detectron2/.github/ISSUE_TEMPLATE/feature-request.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -name: "\U0001F680Feature Request" -about: Suggest an improvement or new feature -labels: enhancement - ---- - -## πŸš€ Feature -A clear and concise description of the feature proposal. - -## Motivation & Examples - -Tell us why the feature is useful. - -Describe what the feature would look like, if it is implemented. -Best demonstrated using **code examples** in addition to words. - -## Note - -We only consider adding new features if they are relevant to many users. - -If you request implementation of research papers -- we only consider papers that have enough significance and prevalance in the object detection field. - -We do not take requests for most projects in the `projects/` directory, because they are research code release that is mainly for other researchers to reproduce results. - -"Make X faster/accurate" is not a valid feature request. "Implement a concrete feature that can make X faster/accurate" can be a valid feature request. - -Instead of adding features inside detectron2, -you can implement many features by [extending detectron2](https://detectron2.readthedocs.io/tutorials/extend.html). -The [projects/](https://github.com/facebookresearch/detectron2/tree/main/projects/) directory contains many of such examples. - diff --git a/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md b/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md deleted file mode 100644 index 5db8f22415ff5c857ce83fb0d3de68211f775080..0000000000000000000000000000000000000000 --- a/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -name: "😩 Unexpected behaviors" -about: Report unexpected behaviors when using detectron2 -title: Please read & provide the following - ---- - -If you do not know the root cause of the problem, please post according to this template: - -## Instructions To Reproduce the Issue: - -Check https://stackoverflow.com/help/minimal-reproducible-example for how to ask good questions. -Simplify the steps to reproduce the issue using suggestions from the above link, and provide them below: - -1. Full runnable code or full changes you made: -``` -If making changes to the project itself, please use output of the following command: -git rev-parse HEAD; git diff - - -``` -2. What exact command you run: -3. __Full logs__ or other relevant observations: -``` - -``` - -## Expected behavior: - -If there are no obvious crash in "full logs" provided above, -please tell us the expected behavior. - -If you expect a model to converge / work better, we do not help with such issues, unless -a model fails to reproduce the results in detectron2 model zoo, or proves existence of bugs. - -## Environment: - -Paste the output of the following command: -``` -wget -nc -nv https://github.com/facebookresearch/detectron2/raw/main/detectron2/utils/collect_env.py && python collect_env.py -``` - -If your issue looks like an installation issue / environment issue, -please first check common issues in https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues diff --git a/detectron2/.github/actions/install_detectron2/action.yml b/detectron2/.github/actions/install_detectron2/action.yml deleted file mode 100644 index 7da3e2e11ace018aa0c021b8a85152c5582aae61..0000000000000000000000000000000000000000 --- a/detectron2/.github/actions/install_detectron2/action.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: "Install Detectron2" -runs: - using: composite - steps: - - name: Install Detectron2 - shell: bash - run: | - # Remove first, in case it's in the CI cache - pip uninstall -y detectron2 - - pip install --progress-bar off -e .[all] - python -m detectron2.utils.collect_env - ./datasets/prepare_for_tests.sh diff --git a/detectron2/.github/actions/install_detectron2_win/action.yml b/detectron2/.github/actions/install_detectron2_win/action.yml deleted file mode 100644 index d7f796509a336cd1b31d46a5944abdf714d2bdfd..0000000000000000000000000000000000000000 --- a/detectron2/.github/actions/install_detectron2_win/action.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: "Install Detectron2" -runs: - using: composite - steps: - - name: Install Detectron2 - shell: bash - run: | - # Remove first, in case it's in the CI cache - pip uninstall -y detectron2 - - pip install --progress-bar off -e .[all] - python -m detectron2.utils.collect_env - - # TODO: this command fails because windows does not have wget - # ./datasets/prepare_for_tests.sh diff --git a/detectron2/.github/actions/install_linux_dep/action.yml b/detectron2/.github/actions/install_linux_dep/action.yml deleted file mode 100644 index 59bb8b22518877e633014ce18b569bce7fc667d7..0000000000000000000000000000000000000000 --- a/detectron2/.github/actions/install_linux_dep/action.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: "Install Dependencies" -inputs: - torch-version: - description: torch version to install - torchvision-version: - description: torch vision version to install, version number or "master" - pytorch-index: - description: where to install torch from - required: false - default: "https://download.pytorch.org/whl/torch_stable.html" - # use test wheels index to have access to RC wheels - # https://download.pytorch.org/whl/test/torch_test.html -runs: - using: composite - steps: - - name: Install Dependencies - shell: bash - run: | - # disable crash coredump, so unittests fail fast - sudo systemctl stop apport.service || true - - pip install -U pip - - # install from github to get latest; install iopath first since fvcore depends on it - pip install --progress-bar off -U 'git+https://github.com/facebookresearch/iopath' - pip install --progress-bar off -U 'git+https://github.com/facebookresearch/fvcore' - - # Don't use pytest-xdist: cuda tests are unstable under multi-process workers. - # Don't use opencv 4.7.0.68: https://github.com/opencv/opencv-python/issues/765 - pip install --progress-bar off ninja opencv-python-headless!=4.7.0.68 pytest tensorboard pycocotools onnx - pip install --progress-bar off torch==${{inputs.torch-version}} -f ${{inputs.pytorch-index}} - if [[ "${{inputs.torchvision-version}}" == "master" ]]; then - pip install git+https://github.com/pytorch/vision.git - else - pip install --progress-bar off torchvision==${{inputs.torchvision-version}} -f ${{inputs.pytorch-index}} - fi - - echo python install path: $pythonLocation - python -c 'import torch; print("CUDA:", torch.cuda.is_available())' - gcc --version diff --git a/detectron2/.github/actions/install_linux_gpu_dep/action.yml b/detectron2/.github/actions/install_linux_gpu_dep/action.yml deleted file mode 100644 index 058ccf119eaddacac9561fe94fd6b73a454072c7..0000000000000000000000000000000000000000 --- a/detectron2/.github/actions/install_linux_gpu_dep/action.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: "Install GPU Dependencies" -inputs: - cuda-version: - description: version of cuda to install, ie "12-5" for 12.5 -runs: - using: composite - steps: - - name: Install GPU Dependencies - shell: bash - run: | - uname -r - - # https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#network-repo-installation-for-ubuntu - # Installing the keyring seems to be unnecessary - - sudo apt-get update - sudo apt-get --yes install cuda-toolkit-${{inputs.cuda-version}} - sudo apt-get --yes install nvidia-gds-${{inputs.cuda-version}} diff --git a/detectron2/.github/actions/install_windows_dep/action.yml b/detectron2/.github/actions/install_windows_dep/action.yml deleted file mode 100644 index ca5a224c9aa4deb168037ddeb9ff36bca6462cb6..0000000000000000000000000000000000000000 --- a/detectron2/.github/actions/install_windows_dep/action.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: "Install Dependencies" -inputs: - torch-version: - description: torch version to install - torchvision-version: - description: torch vision version to install, version number or "master" - pytorch-index: - description: where to install torch from - required: false - default: "https://download.pytorch.org/whl/torch_stable.html" - # use test wheels index to have access to RC wheels - # https://download.pytorch.org/whl/test/torch_test.html -runs: - using: composite - steps: - - name: Install Dependencies - shell: bash - run: | - pip install certifi --ignore-installed # required on windows to workaround some cert issue - pip install numpy cython # required on windows before pycocotools - pip install opencv-python-headless pytest-xdist pycocotools tensorboard onnx - pip install -U git+https://github.com/facebookresearch/iopath - pip install -U git+https://github.com/facebookresearch/fvcore - pip install torch==${{inputs.torch-version}} torchvision==${{inputs.torchvision-version}} -f ${{inputs.pytorch-index}} diff --git a/detectron2/.github/actions/run_unittests/action.yml b/detectron2/.github/actions/run_unittests/action.yml deleted file mode 100644 index 1922efca976eb809a7e9f135bb8b55ef92389b6d..0000000000000000000000000000000000000000 --- a/detectron2/.github/actions/run_unittests/action.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: "Run Unit Tests" -runs: - using: composite - steps: - - name: Run Unit Tests - shell: bash - run: | - python -m pytest -sv --durations=15 tests # parallel causes some random failures diff --git a/detectron2/.github/actions/run_unittests_win/action.yml b/detectron2/.github/actions/run_unittests_win/action.yml deleted file mode 100644 index 0f8543a415b8b757c9eaf5b330a5a59a6ce51fd1..0000000000000000000000000000000000000000 --- a/detectron2/.github/actions/run_unittests_win/action.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: "Run Unit Tests" -runs: - using: composite - steps: - - name: Run Unit Tests - shell: bash - run: | - echo TODO: unittest fails for now diff --git a/detectron2/.github/actions/uninstall_tests/action.yml b/detectron2/.github/actions/uninstall_tests/action.yml deleted file mode 100644 index fee990460dddec9fb7f3f93e3cbaf3f31fc18a55..0000000000000000000000000000000000000000 --- a/detectron2/.github/actions/uninstall_tests/action.yml +++ /dev/null @@ -1,12 +0,0 @@ -name: "Run Tests After Uninstalling" -runs: - using: composite - steps: - - name: "Run Tests After Uninstalling" - shell: bash - run: | - pip uninstall -y detectron2 - # Remove built binaries - rm -rf build/ detectron2/*.so - # Tests that code is importable without installation - . ./.github/import-tests.sh diff --git a/detectron2/.github/import-tests.sh b/detectron2/.github/import-tests.sh deleted file mode 100644 index 8e8deb6ad699fd673fea0f66b91aa3ec6e3c7c7c..0000000000000000000000000000000000000000 --- a/detectron2/.github/import-tests.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. - -# Test that import works without building detectron2. - -# Check that _C is not importable -python -c "from detectron2 import _C" > /dev/null 2>&1 && { - echo "This test should be run without building detectron2." - exit 1 -} - -# Check that other modules are still importable, even when _C is not importable -python -c "from detectron2 import modeling" -python -c "from detectron2 import modeling, data" -python -c "from detectron2 import evaluation, export, checkpoint" -python -c "from detectron2 import utils, engine" diff --git a/detectron2/.github/pull_request_template.md b/detectron2/.github/pull_request_template.md deleted file mode 100644 index d71729baee1ec324ab9db6e7562965cf9e2a091b..0000000000000000000000000000000000000000 --- a/detectron2/.github/pull_request_template.md +++ /dev/null @@ -1,10 +0,0 @@ -Thanks for your contribution! - -If you're sending a large PR (e.g., >100 lines), -please open an issue first about the feature / bug, and indicate how you want to contribute. - -We do not always accept features. -See https://detectron2.readthedocs.io/notes/contributing.html#pull-requests about how we handle PRs. - -Before submitting a PR, please run `dev/linter.sh` to lint the code. - diff --git a/detectron2/.github/workflows/check-template.yml b/detectron2/.github/workflows/check-template.yml deleted file mode 100644 index 3caed9df3caa50c0d3b606e4a56a1959c463b710..0000000000000000000000000000000000000000 --- a/detectron2/.github/workflows/check-template.yml +++ /dev/null @@ -1,86 +0,0 @@ -name: Check issue template - -on: - issues: - types: [opened] - -jobs: - check-template: - runs-on: ubuntu-latest - # comment this out when testing with https://github.com/nektos/act - if: ${{ github.repository_owner == 'facebookresearch' }} - steps: - - uses: actions/checkout@v2 - - uses: actions/github-script@v3 - with: - github-token: ${{secrets.GITHUB_TOKEN}} - script: | - // Arguments available: - // - github: A pre-authenticated octokit/rest.js client - // - context: An object containing the context of the workflow run - // - core: A reference to the @actions/core package - // - io: A reference to the @actions/io package - const fs = require('fs'); - const editDistance = require(`${process.env.GITHUB_WORKSPACE}/.github/workflows/levenshtein.js`).getEditDistance - issue = await github.issues.get({ - owner: context.issue.owner, - repo: context.issue.repo, - issue_number: context.issue.number, - }); - const hasLabel = issue.data.labels.length > 0; - if (hasLabel || issue.state === "closed") { - // don't require template on them - core.debug("Issue " + issue.data.title + " was skipped."); - return; - } - - sameAsTemplate = function(filename, body) { - let tmpl = fs.readFileSync(`.github/ISSUE_TEMPLATE/${filename}`, 'utf8'); - tmpl = tmpl.toLowerCase().split("---").slice(2).join("").trim(); - tmpl = tmpl.replace(/(\r\n|\n|\r)/gm, ""); - let bodyr = body.replace(/(\r\n|\n|\r)/gm, ""); - let dist = editDistance(tmpl, bodyr); - return dist < 8; - }; - - checkFail = async function(msg) { - core.info("Processing '" + issue.data.title + "' with message: " + msg); - await github.issues.addLabels({ - owner: context.issue.owner, - repo: context.issue.repo, - issue_number: context.issue.number, - labels: ["needs-more-info"], - }); - await github.issues.createComment({ - owner: context.issue.owner, - repo: context.issue.repo, - issue_number: context.issue.number, - body: msg, - }); - }; - - const body = issue.data.body.toLowerCase().trim(); - - if (sameAsTemplate("bugs.md", body) || sameAsTemplate("unexpected-problems-bugs.md", body)) { - await checkFail(` - We found that not enough information is provided about this issue. - Please provide details following the [issue template](https://github.com/facebookresearch/detectron2/issues/new/choose).`) - return; - } - - const hasInstructions = body.indexOf("reproduce") != -1; - const hasEnvironment = (body.indexOf("environment") != -1) || (body.indexOf("colab") != -1) || (body.indexOf("docker") != -1); - if (hasInstructions && hasEnvironment) { - core.debug("Issue " + issue.data.title + " follows template."); - return; - } - - let message = "You've chosen to report an unexpected problem or bug. Unless you already know the root cause of it, please include details about it by filling the [issue template](https://github.com/facebookresearch/detectron2/issues/new/choose).\n"; - message += "The following information is missing: "; - if (!hasInstructions) { - message += "\"Instructions To Reproduce the Issue and __Full__ Logs\"; "; - } - if (!hasEnvironment) { - message += "\"Your Environment\"; "; - } - await checkFail(message); diff --git a/detectron2/.github/workflows/levenshtein.js b/detectron2/.github/workflows/levenshtein.js deleted file mode 100644 index 67a5e3613c0072d124035ee8933a23de2105cfe3..0000000000000000000000000000000000000000 --- a/detectron2/.github/workflows/levenshtein.js +++ /dev/null @@ -1,44 +0,0 @@ -/* -Copyright (c) 2011 Andrei Mackenzie - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -// Compute the edit distance between the two given strings -exports.getEditDistance = function(a, b){ - if(a.length == 0) return b.length; - if(b.length == 0) return a.length; - - var matrix = []; - - // increment along the first column of each row - var i; - for(i = 0; i <= b.length; i++){ - matrix[i] = [i]; - } - - // increment each column in the first row - var j; - for(j = 0; j <= a.length; j++){ - matrix[0][j] = j; - } - - // Fill in the rest of the matrix - for(i = 1; i <= b.length; i++){ - for(j = 1; j <= a.length; j++){ - if(b.charAt(i-1) == a.charAt(j-1)){ - matrix[i][j] = matrix[i-1][j-1]; - } else { - matrix[i][j] = Math.min(matrix[i-1][j-1] + 1, // substitution - Math.min(matrix[i][j-1] + 1, // insertion - matrix[i-1][j] + 1)); // deletion - } - } - } - - return matrix[b.length][a.length]; -}; diff --git a/detectron2/.github/workflows/needs-reply.yml b/detectron2/.github/workflows/needs-reply.yml deleted file mode 100644 index 4b113d8fa9ecbb490ce5d2ab49b046d089a5ce5d..0000000000000000000000000000000000000000 --- a/detectron2/.github/workflows/needs-reply.yml +++ /dev/null @@ -1,98 +0,0 @@ -name: Close/Lock issues after inactivity - -on: - schedule: - - cron: "42 1 * * *" - -jobs: - close-issues-needs-more-info: - runs-on: ubuntu-latest - if: ${{ github.repository_owner == 'facebookresearch' }} - steps: - - name: Close old issues that need reply - uses: actions/github-script@v3 - with: - github-token: ${{secrets.GITHUB_TOKEN}} - # Modified from https://github.com/dwieeb/needs-reply - script: | - // Arguments available: - // - github: A pre-authenticated octokit/rest.js client - // - context: An object containing the context of the workflow run - // - core: A reference to the @actions/core package - // - io: A reference to the @actions/io package - const kLabelToCheck = "needs-more-info"; - const kInvalidLabel = "invalid/unrelated"; - const kDaysBeforeClose = 7; - const kMessage = "Requested information was not provided in 7 days, so we're closing this issue.\n\nPlease open new issue if information becomes available. Otherwise, use [github discussions](https://github.com/facebookresearch/detectron2/discussions) for free-form discussions." - - issues = await github.issues.listForRepo({ - owner: context.repo.owner, - repo: context.repo.repo, - state: 'open', - labels: kLabelToCheck, - sort: 'updated', - direction: 'asc', - per_page: 30, - page: 1, - }); - issues = issues.data; - if (issues.length === 0) { - core.info('No more issues found to process. Exiting.'); - return; - } - for (const issue of issues) { - if (!!issue.pull_request) - continue; - core.info(`Processing issue #${issue.number}`); - - let updatedAt = new Date(issue.updated_at).getTime(); - const numComments = issue.comments; - const comments = await github.issues.listComments({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: issue.number, - per_page: 30, - page: Math.floor((numComments - 1) / 30) + 1, // the last page - }); - const lastComments = comments.data - .map(l => new Date(l.created_at).getTime()) - .sort(); - if (lastComments.length > 0) { - updatedAt = lastComments[lastComments.length - 1]; - } - - const now = new Date().getTime(); - const daysSinceUpdated = (now - updatedAt) / 1000 / 60 / 60 / 24; - - if (daysSinceUpdated < kDaysBeforeClose) { - core.info(`Skipping #${issue.number} because it has been updated in the last ${daysSinceUpdated} days`); - continue; - } - core.info(`Closing #${issue.number} because it has not been updated in the last ${daysSinceUpdated} days`); - await github.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: issue.number, - body: kMessage, - }); - const newLabels = numComments <= 2 ? [kInvalidLabel, kLabelToCheck] : issue.labels; - await github.issues.update({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: issue.number, - labels: newLabels, - state: 'closed', - }); - } - - lock-issues-after-closed: - runs-on: ubuntu-latest - if: ${{ github.repository_owner == 'facebookresearch' }} - steps: - - name: Lock closed issues that have no activity for a while - uses: dessant/lock-threads@v2 - with: - github-token: ${{ github.token }} - issue-lock-inactive-days: '300' - process-only: 'issues' - issue-exclude-labels: 'enhancement,bug,documentation' diff --git a/detectron2/.github/workflows/remove-needs-reply.yml b/detectron2/.github/workflows/remove-needs-reply.yml deleted file mode 100644 index 1f000b28ca27ef9c219d197f95251be1cb8c0979..0000000000000000000000000000000000000000 --- a/detectron2/.github/workflows/remove-needs-reply.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Remove needs-more-info label - -on: - issue_comment: - types: [created] - issues: - types: [edited] - -jobs: - remove-needs-more-info-label: - runs-on: ubuntu-latest - # 1. issue_comment events could include PR comment, filter them out - # 2. Only trigger action if event was produced by the original author - if: ${{ !github.event.issue.pull_request && github.event.sender.login == github.event.issue.user.login }} - steps: - - name: Remove needs-more-info label - uses: octokit/request-action@v2.x - continue-on-error: true - with: - route: DELETE /repos/:repository/issues/:issue/labels/:label - repository: ${{ github.repository }} - issue: ${{ github.event.issue.number }} - label: needs-more-info - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/detectron2/.github/workflows/workflow.yml b/detectron2/.github/workflows/workflow.yml deleted file mode 100644 index 2b64a1c7800739bf08d36d9c51891b8ee98284a5..0000000000000000000000000000000000000000 --- a/detectron2/.github/workflows/workflow.yml +++ /dev/null @@ -1,241 +0,0 @@ -name: CI -on: - push: - pull_request: - schedule: - - cron: "0 0 * * *" # @daily - -# Run linter with github actions for quick feedbacks. -jobs: - linter: - runs-on: ubuntu-latest - # run on PRs, or commits to facebookresearch (not internal) - if: ${{ github.repository_owner == 'facebookresearch' || github.event_name == 'pull_request' }} - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.9 - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Install dependencies - # flake8-bugbear flake8-comprehensions are useful but not available internally - run: | - python -m pip install --upgrade pip - python -m pip install flake8==6.1.0 isort==4.3.21 - python -m pip install black==24.4.2 - flake8 --version - - name: Lint - run: | - echo "Running isort" - isort -c -sp . - echo "Running black" - black -l 100 --check . - echo "Running flake8" - flake8 . - - macos_tests: - runs-on: macos-latest - # run on PRs, or commits to facebookresearch (not internal) - if: ${{ github.repository_owner == 'facebookresearch' || github.event_name == 'pull_request' }} - strategy: - fail-fast: false - matrix: - torch: ["1.13.1", "2.2.2"] - include: - - torch: "1.13.1" - torchvision: "0.14.1" - - torch: "2.2.2" - torchvision: "0.17.2" - env: - # point datasets to ~/.torch so it's cached by CI - DETECTRON2_DATASETS: ~/.torch/datasets - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - name: Cache dependencies - uses: actions/cache@v4 - with: - path: | - ${{ env.pythonLocation }}/lib/python3.8/site-packages - ~/.torch - key: ${{ runner.os }}-torch${{ matrix.torch }}-${{ hashFiles('setup.py') }}-20220119 - - - name: Install dependencies - run: | - python -m pip install -U pip - python -m pip install wheel ninja opencv-python-headless onnx pytest-xdist - python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html - # install from github to get latest; install iopath first since fvcore depends on it - python -m pip install -U 'git+https://github.com/facebookresearch/iopath' - python -m pip install -U 'git+https://github.com/facebookresearch/fvcore' - wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py - python collect_env.py - - - name: Build and install - run: | - CC=clang CXX=clang++ python -m pip install -e .[all] - python -m detectron2.utils.collect_env - ./datasets/prepare_for_tests.sh - - name: Run unittests - run: python -m pytest -n 4 --durations=15 -sv tests/ - - linux_gpu_tests: - runs-on: 4-core-ubuntu-gpu-t4 - # run on PRs, or commits to facebookresearch (not internal) - if: ${{ github.repository_owner == 'facebookresearch' || github.event_name == 'pull_request' }} - strategy: - fail-fast: false - matrix: - torch: ["1.13.1", "2.2.2"] - include: - - torch: "1.13.1" - torchvision: "0.14.1" - cuda: "11-7" - - torch: "2.2.2" - torchvision: "0.17.2" - cuda: "12-5" - env: - PYTORCH_INDEX: "https://download.pytorch.org/whl/cu118" - DETECTRON2_DATASETS: ~/.torch/datasets - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - - name: Cache dependencies - uses: actions/cache@v4 - id: load-cache - with: - path: | - ${{ env.pythonLocation }}/lib/python3.8/site-packages - ~/.torch - key: ${{ runner.os }}-torch-gpu${{ matrix.torch }}-${{ hashFiles('setup.py') }}-20210827 - - - name: Install GPU Dependencies - uses: ./.github/actions/install_linux_gpu_dep - with: - cuda-version: ${{matrix.cuda}} - - - name: Install Dependencies - uses: ./.github/actions/install_linux_dep - if: steps.load-cache.outputs.cache-hit != 'true' - with: - torch-version: ${{matrix.torch}} - torchvision-version: ${{matrix.torchvision}} - pytorch-index: $PYTORCH_INDEX - - - name: Install Detectron2 - uses: ./.github/actions/install_detectron2 - - - name: Run Unit Tests - uses: ./.github/actions/run_unittests - - - name: Run Tests After Uninstalling - uses: ./.github/actions/uninstall_tests - - linux_cpu_tests: - runs-on: ubuntu-latest - # run on PRs, or commits to facebookresearch (not internal) - if: ${{ github.repository_owner == 'facebookresearch' || github.event_name == 'pull_request' }} - strategy: - fail-fast: false - matrix: - torch: ["1.13.1", "2.2.2"] - include: - - torch: "1.13.1" - torchvision: "0.14.1" - - torch: "2.2.2" - torchvision: "0.17.2" - env: - PYTORCH_INDEX: "https://download.pytorch.org/whl/cu118" - DETECTRON2_DATASETS: ~/.torch/datasets - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - - name: Cache dependencies - uses: actions/cache@v4 - id: load-cache - with: - path: | - ${{ env.pythonLocation }}/lib/python3.8/site-packages - ~/.torch - key: ${{ runner.os }}-torch${{ matrix.torch }}-${{ hashFiles('setup.py') }}-20210827 - - - name: Install Dependencies - uses: ./.github/actions/install_linux_dep - if: steps.load-cache.outputs.cache-hit != 'true' - with: - torch-version: ${{matrix.torch}} - torchvision-version: ${{matrix.torchvision}} - pytorch-index: $PYTORCH_INDEX - - - name: Install Detectron2 - uses: ./.github/actions/install_detectron2 - - - name: Run Unit Tests - uses: ./.github/actions/run_unittests - - - name: Run Tests After Uninstalling - uses: ./.github/actions/uninstall_tests - - windows_cpu_tests: - runs-on: windows-latest - # run on PRs, or commits to facebookresearch (not internal) - if: ${{ github.repository_owner == 'facebookresearch' || github.event_name == 'pull_request' }} - strategy: - fail-fast: false - matrix: - torch: ["1.13.1", "2.2.2"] - include: - - torch: "1.13.1" - torchvision: "0.14.1" - - torch: "2.2.2" - torchvision: "0.17.2" - env: - PYTORCH_INDEX: "https://download.pytorch.org/whl/cu118" - DETECTRON2_DATASETS: ~/.torch/datasets - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - - name: Cache dependencies - uses: actions/cache@v4 - id: load-cache - with: - path: | - ${{ env.pythonLocation }}\Lib\site-packages - ~\.torch - key: ${{ runner.os }}-torch${{ matrix.torch }}-${{ hashFiles('setup.py') }}-20210404 - - - name: Install Dependencies - uses: ./.github/actions/install_windows_dep - if: steps.load-cache.outputs.cache-hit != 'true' - with: - torch-version: ${{matrix.torch}} - torchvision-version: ${{matrix.torchvision}} - pytorch-index: $PYTORCH_INDEX - - - name: Install Detectron2 - uses: ./.github/actions/install_detectron2_win - - - name: Run Unit Tests - uses: ./.github/actions/run_unittests_win diff --git a/detectron2/.gitignore b/detectron2/.gitignore deleted file mode 100644 index 9953d9b49bd150ffb251886f755b7a4150c4e35d..0000000000000000000000000000000000000000 --- a/detectron2/.gitignore +++ /dev/null @@ -1,53 +0,0 @@ -# output dir -output -instant_test_output -inference_test_output - - -*.png -*.json -*.diff -*.jpg -!/projects/DensePose/doc/images/*.jpg - -# compilation and distribution -__pycache__ -_ext -*.pyc -*.pyd -*.so -*.dll -*.egg-info/ -build/ -dist/ -wheels/ - -# pytorch/python/numpy formats -*.pth -*.pkl -*.npy -*.ts -model_ts*.txt - -# ipython/jupyter notebooks -*.ipynb -**/.ipynb_checkpoints/ - -# Editor temporaries -*.swn -*.swo -*.swp -*~ - -# editor settings -.idea -.vscode -_darcs - -# project dirs -/detectron2/model_zoo/configs -/datasets/* -!/datasets/*.* -/projects/*/datasets -/models -/snippet diff --git a/detectron2/GETTING_STARTED.md b/detectron2/GETTING_STARTED.md deleted file mode 100644 index 404b0c8f467264d1adf61e8274e5f864e24018e8..0000000000000000000000000000000000000000 --- a/detectron2/GETTING_STARTED.md +++ /dev/null @@ -1,79 +0,0 @@ -## Getting Started with Detectron2 - -This document provides a brief intro of the usage of builtin command-line tools in detectron2. - -For a tutorial that involves actual coding with the API, -see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) -which covers how to run inference with an -existing model, and how to train a builtin model on a custom dataset. - - -### Inference Demo with Pre-trained Models - -1. Pick a model and its config file from - [model zoo](MODEL_ZOO.md), - for example, `mask_rcnn_R_50_FPN_3x.yaml`. -2. We provide `demo.py` that is able to demo builtin configs. Run it with: -``` -cd demo/ -python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \ - --input input1.jpg input2.jpg \ - [--other-options] - --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl -``` -The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation. -This command will run the inference and show visualizations in an OpenCV window. - -For details of the command line arguments, see `demo.py -h` or look at its source code -to understand its behavior. Some common arguments are: -* To run __on your webcam__, replace `--input files` with `--webcam`. -* To run __on a video__, replace `--input files` with `--video-input video.mp4`. -* To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`. -* To save outputs to a directory (for images) or a file (for webcam or video), use `--output`. - - -### Training & Evaluation in Command Line - -We provide two scripts in "tools/plain_train_net.py" and "tools/train_net.py", -that are made to train all the configs provided in detectron2. You may want to -use it as a reference to write your own training script. - -Compared to "train_net.py", "plain_train_net.py" supports fewer default -features. It also includes fewer abstraction, therefore is easier to add custom -logic. - -To train a model with "train_net.py", first -setup the corresponding datasets following -[datasets/README.md](./datasets/README.md), -then run: -``` -cd tools/ -./train_net.py --num-gpus 8 \ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml -``` - -The configs are made for 8-GPU training. -To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.: -``` -./train_net.py \ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \ - --num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 -``` - -To evaluate a model's performance, use -``` -./train_net.py \ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \ - --eval-only MODEL.WEIGHTS /path/to/checkpoint_file -``` -For more options, see `./train_net.py -h`. - -### Use Detectron2 APIs in Your Code - -See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) -to learn how to use detectron2 APIs to: -1. run inference with an existing model -2. train a builtin model on a custom dataset - -See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/main/projects) -for more ways to build your project on detectron2. diff --git a/detectron2/INSTALL.md b/detectron2/INSTALL.md deleted file mode 100644 index 97864ad923582eb5e63644976a8a51b3c223213d..0000000000000000000000000000000000000000 --- a/detectron2/INSTALL.md +++ /dev/null @@ -1,226 +0,0 @@ -## Installation - -### Requirements -- Linux or macOS with Python β‰₯ 3.7 -- PyTorch β‰₯ 1.8 and [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. - Install them together at [pytorch.org](https://pytorch.org) to make sure of this -- OpenCV is optional but needed by demo and visualization - - -### Build Detectron2 from Source - -gcc & g++ β‰₯ 5.4 are required. [ninja](https://ninja-build.org/) is optional but recommended for faster build. -After having them, run: -``` -python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' -# (add --user if you don't have permission) - -# Or, to install it from a local clone: -git clone https://github.com/facebookresearch/detectron2.git -python -m pip install -e detectron2 - -# On macOS, you may need to prepend the above commands with a few environment variables: -CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" python -m pip install ... -``` - -To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the -old build first. You often need to rebuild detectron2 after reinstalling PyTorch. - -### Common Installation Issues - -Click each issue for its solutions: - -
- -Undefined symbols that looks like "TH..","at::Tensor...","torch..." - -
- -This usually happens when detectron2 or torchvision is not -compiled with the version of PyTorch you're running. - -If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them -following [pytorch.org](http://pytorch.org). So the versions will match. - -If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases), -uninstall and reinstall the correct pre-built detectron2 that matches pytorch version. - -If the error comes from detectron2 or torchvision that you built manually from source, -remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment. - -If the above instructions do not resolve this problem, please provide an environment (e.g. a dockerfile) that can reproduce the issue. -
- -
- -Missing torch dynamic libraries, OR segmentation fault immediately when using detectron2. - -This usually happens when detectron2 or torchvision is not -compiled with the version of PyTorch you're running. See the previous common issue for the solution. -
- -
- -Undefined C++ symbols (e.g. "GLIBCXX..") or C++ symbols not found. - -
-Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime. - -This often happens with old anaconda. -It may help to run `conda update libgcc` to upgrade its runtime. - -The fundamental solution is to avoid the mismatch, either by compiling using older version of C++ -compiler, or run the code with proper C++ runtime. -To run the code with a specific C++ runtime, you can use environment variable `LD_PRELOAD=/path/to/libstdc++.so`. - -
- -
- -"nvcc not found" or "Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available". - -
-CUDA is not found when building detectron2. -You should make sure - -``` -python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)' -``` - -print `(True, a directory with cuda)` at the time you build detectron2. - -Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config. -
- -
- -"invalid device function" or "no kernel image is available for execution". - -
-Two possibilities: - -* You build detectron2 with one version of CUDA but run it with a different version. - - To check whether it is the case, - use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions. - In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA" - to contain cuda libraries of the same version. - - When they are inconsistent, - you need to either install a different build of PyTorch (or build by yourself) - to match your local CUDA installation, or install a different version of CUDA to match PyTorch. - -* PyTorch/torchvision/Detectron2 is not built for the correct GPU SM architecture (aka. compute capability). - - The architecture included by PyTorch/detectron2/torchvision is available in the "architecture flags" in - `python -m detectron2.utils.collect_env`. It must include - the architecture of your GPU, which can be found at [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus). - - If you're using pre-built PyTorch/detectron2/torchvision, they have included support for most popular GPUs already. - If not supported, you need to build them from source. - - When building detectron2/torchvision from source, they detect the GPU device and build for only the device. - This means the compiled code may not work on a different GPU device. - To recompile them for the correct architecture, remove all installed/compiled files, - and rebuild them with the `TORCH_CUDA_ARCH_LIST` environment variable set properly. - For example, `export TORCH_CUDA_ARCH_LIST="6.0;7.0"` makes it compile for both P100s and V100s. -
- -
- -Undefined CUDA symbols; Cannot open libcudart.so - -
-The version of NVCC you use to build detectron2 or torchvision does -not match the version of CUDA you are running with. -This often happens when using anaconda's CUDA runtime. - -Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions. -In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA" -to contain cuda libraries of the same version. - -When they are inconsistent, -you need to either install a different build of PyTorch (or build by yourself) -to match your local CUDA installation, or install a different version of CUDA to match PyTorch. -
- - -
- -C++ compilation errors from NVCC / NVRTC, or "Unsupported gpu architecture" - -
-A few possibilities: - -1. Local CUDA/NVCC version has to match the CUDA version of your PyTorch. Both can be found in `python collect_env.py` - (download from [here](./detectron2/utils/collect_env.py)). - When they are inconsistent, you need to either install a different build of PyTorch (or build by yourself) - to match your local CUDA installation, or install a different version of CUDA to match PyTorch. - -2. Local CUDA/NVCC version shall support the SM architecture (a.k.a. compute capability) of your GPU. - The capability of your GPU can be found at [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus). - The capability supported by NVCC is listed at [here](https://gist.github.com/ax3l/9489132). - If your NVCC version is too old, this can be workaround by setting environment variable - `TORCH_CUDA_ARCH_LIST` to a lower, supported capability. - -3. The combination of NVCC and GCC you use is incompatible. You need to change one of their versions. - See [here](https://gist.github.com/ax3l/9489132) for some valid combinations. - Notably, CUDA<=10.1.105 doesn't support GCC>7.3. - - The CUDA/GCC version used by PyTorch can be found by `print(torch.__config__.show())`. - -
- - -
- -"ImportError: cannot import name '_C'". - -
-Please build and install detectron2 following the instructions above. - -Or, if you are running code from detectron2's root directory, `cd` to a different one. -Otherwise you may not import the code that you installed. -
- - -
- -Any issue on windows. - -
- -Detectron2 is continuously built on windows with [CircleCI](https://app.circleci.com/pipelines/github/facebookresearch/detectron2?branch=main). -However we do not provide official support for it. -PRs that improves code compatibility on windows are welcome. -
- -
- -ONNX conversion segfault after some "TraceWarning". - -
-The ONNX package is compiled with a too old compiler. - -Please build and install ONNX from its source code using a compiler -whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`). -
- - -
- -"library not found for -lstdc++" on older version of MacOS - -
- -See [this stackoverflow answer](https://stackoverflow.com/questions/56083725/macos-build-issues-lstdc-not-found-while-building-python-package). - -
- - -### Installation inside specific environments: - -* __Colab__: see our [Colab Tutorial](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) - which has step-by-step instructions. - -* __Docker__: The official [Dockerfile](docker) installs detectron2 with a few simple commands. diff --git a/detectron2/LICENSE b/detectron2/LICENSE deleted file mode 100644 index cd1b070674331757508398d99c830664dce6eaec..0000000000000000000000000000000000000000 --- a/detectron2/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, -and distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by -the copyright owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all -other entities that control, are controlled by, or are under common -control with that entity. For the purposes of this definition, -"control" means (i) the power, direct or indirect, to cause the -direction or management of such entity, whether by contract or -otherwise, or (ii) ownership of fifty percent (50%) or more of the -outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity -exercising permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, -including but not limited to software source code, documentation -source, and configuration files. - -"Object" form shall mean any form resulting from mechanical -transformation or translation of a Source form, including but -not limited to compiled object code, generated documentation, -and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or -Object form, made available under the License, as indicated by a -copyright notice that is included in or attached to the work -(an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object -form, that is based on (or derived from) the Work and for which the -editorial revisions, annotations, elaborations, or other modifications -represent, as a whole, an original work of authorship. For the purposes -of this License, Derivative Works shall not include works that remain -separable from, or merely link (or bind by name) to the interfaces of, -the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including -the original version of the Work and any modifications or additions -to that Work or Derivative Works thereof, that is intentionally -submitted to Licensor for inclusion in the Work by the copyright owner -or by an individual or Legal Entity authorized to submit on behalf of -the copyright owner. For the purposes of this definition, "submitted" -means any form of electronic, verbal, or written communication sent -to the Licensor or its representatives, including but not limited to -communication on electronic mailing lists, source code control systems, -and issue tracking systems that are managed by, or on behalf of, the -Licensor for the purpose of discussing and improving the Work, but -excluding communication that is conspicuously marked or otherwise -designated in writing by the copyright owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity -on behalf of whom a Contribution has been received by Licensor and -subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of -this License, each Contributor hereby grants to You a perpetual, -worldwide, non-exclusive, no-charge, royalty-free, irrevocable -copyright license to reproduce, prepare Derivative Works of, -publicly display, publicly perform, sublicense, and distribute the -Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of -this License, each Contributor hereby grants to You a perpetual, -worldwide, non-exclusive, no-charge, royalty-free, irrevocable -(except as stated in this section) patent license to make, have made, -use, offer to sell, sell, import, and otherwise transfer the Work, -where such license applies only to those patent claims licensable -by such Contributor that are necessarily infringed by their -Contribution(s) alone or by combination of their Contribution(s) -with the Work to which such Contribution(s) was submitted. If You -institute patent litigation against any entity (including a -cross-claim or counterclaim in a lawsuit) alleging that the Work -or a Contribution incorporated within the Work constitutes direct -or contributory patent infringement, then any patent licenses -granted to You under this License for that Work shall terminate -as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the -Work or Derivative Works thereof in any medium, with or without -modifications, and in Source or Object form, provided that You -meet the following conditions: - -(a) You must give any other recipients of the Work or -Derivative Works a copy of this License; and - -(b) You must cause any modified files to carry prominent notices -stating that You changed the files; and - -(c) You must retain, in the Source form of any Derivative Works -that You distribute, all copyright, patent, trademark, and -attribution notices from the Source form of the Work, -excluding those notices that do not pertain to any part of -the Derivative Works; and - -(d) If the Work includes a "NOTICE" text file as part of its -distribution, then any Derivative Works that You distribute must -include a readable copy of the attribution notices contained -within such NOTICE file, excluding those notices that do not -pertain to any part of the Derivative Works, in at least one -of the following places: within a NOTICE text file distributed -as part of the Derivative Works; within the Source form or -documentation, if provided along with the Derivative Works; or, -within a display generated by the Derivative Works, if and -wherever such third-party notices normally appear. The contents -of the NOTICE file are for informational purposes only and -do not modify the License. You may add Your own attribution -notices within Derivative Works that You distribute, alongside -or as an addendum to the NOTICE text from the Work, provided -that such additional attribution notices cannot be construed -as modifying the License. - -You may add Your own copyright statement to Your modifications and -may provide additional or different license terms and conditions -for use, reproduction, or distribution of Your modifications, or -for any such Derivative Works as a whole, provided Your use, -reproduction, and distribution of the Work otherwise complies with -the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, -any Contribution intentionally submitted for inclusion in the Work -by You to the Licensor shall be under the terms and conditions of -this License, without any additional terms or conditions. -Notwithstanding the above, nothing herein shall supersede or modify -the terms of any separate license agreement you may have executed -with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade -names, trademarks, service marks, or product names of the Licensor, -except as required for reasonable and customary use in describing the -origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or -agreed to in writing, Licensor provides the Work (and each -Contributor provides its Contributions) on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -implied, including, without limitation, any warranties or conditions -of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A -PARTICULAR PURPOSE. You are solely responsible for determining the -appropriateness of using or redistributing the Work and assume any -risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, -whether in tort (including negligence), contract, or otherwise, -unless required by applicable law (such as deliberate and grossly -negligent acts) or agreed to in writing, shall any Contributor be -liable to You for damages, including any direct, indirect, special, -incidental, or consequential damages of any character arising as a -result of this License or out of the use or inability to use the -Work (including but not limited to damages for loss of goodwill, -work stoppage, computer failure or malfunction, or any and all -other commercial damages or losses), even if such Contributor -has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing -the Work or Derivative Works thereof, You may choose to offer, -and charge a fee for, acceptance of support, warranty, indemnity, -or other liability obligations and/or rights consistent with this -License. However, in accepting such obligations, You may act only -on Your own behalf and on Your sole responsibility, not on behalf -of any other Contributor, and only if You agree to indemnify, -defend, and hold each Contributor harmless for any liability -incurred by, or claims asserted against, such Contributor by reason -of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - -To apply the Apache License to your work, attach the following -boilerplate notice, with the fields enclosed by brackets "[]" -replaced with your own identifying information. (Don't include -the brackets!) The text should be enclosed in the appropriate -comment syntax for the file format. We also recommend that a -file or class name and description of purpose be included on the -same "printed page" as the copyright notice for easier -identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/detectron2/MODEL_ZOO.md b/detectron2/MODEL_ZOO.md deleted file mode 100644 index 69db2728563c680e89a0d5d3e6ba272b8d78bdbd..0000000000000000000000000000000000000000 --- a/detectron2/MODEL_ZOO.md +++ /dev/null @@ -1,1052 +0,0 @@ -# Detectron2 Model Zoo and Baselines - -## Introduction - -This file documents a large collection of baselines trained -with detectron2 in Sep-Oct, 2019. -All numbers were obtained on [Big Basin](https://engineering.fb.com/data-center-engineering/introducing-big-basin-our-next-generation-ai-hardware/) -servers with 8 NVIDIA V100 GPUs & NVLink. The speed numbers are periodically updated with latest PyTorch/CUDA/cuDNN versions. -You can access these models from code using [detectron2.model_zoo](https://detectron2.readthedocs.io/modules/model_zoo.html) APIs. - -In addition to these official baseline models, you can find more models in [projects/](projects/). - -#### How to Read the Tables -* The "Name" column contains a link to the config file. Models can be reproduced using `tools/train_net.py` with the corresponding yaml config file, - or `tools/lazyconfig_train_net.py` for python config files. -* Training speed is averaged across the entire training. - We keep updating the speed with latest version of detectron2/pytorch/etc., - so they might be different from the `metrics` file. - Training speed for multi-machine jobs is not provided. -* Inference speed is measured by `tools/train_net.py --eval-only`, or [inference_on_dataset()](https://detectron2.readthedocs.io/modules/evaluation.html#detectron2.evaluation.inference_on_dataset), - with batch size 1 in detectron2 directly. - Measuring it with custom code may introduce other overhead. - Actual deployment in production should in general be faster than the given inference - speed due to more optimizations. -* The *model id* column is provided for ease of reference. - To check downloaded file integrity, any model on this page contains its md5 prefix in its file name. -* Training curves and other statistics can be found in `metrics` for each model. - -#### Common Settings for COCO Models -* All COCO models were trained on `train2017` and evaluated on `val2017`. -* The default settings are __not directly comparable__ with Detectron's standard settings. - For example, our default training data augmentation uses scale jittering in addition to horizontal flipping. - - To make fair comparisons with Detectron's settings, see - [Detectron1-Comparisons](configs/Detectron1-Comparisons/) for accuracy comparison, - and [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html) - for speed comparison. -* For Faster/Mask R-CNN, we provide baselines based on __3 different backbone combinations__: - * __FPN__: Use a ResNet+FPN backbone with standard conv and FC heads for mask and box prediction, - respectively. It obtains the best - speed/accuracy tradeoff, but the other two are still useful for research. - * __C4__: Use a ResNet conv4 backbone with conv5 head. The original baseline in the Faster R-CNN paper. - * __DC5__ (Dilated-C5): Use a ResNet conv5 backbone with dilations in conv5, and standard conv and FC heads - for mask and box prediction, respectively. - This is used by the Deformable ConvNet paper. -* Most models are trained with the 3x schedule (~37 COCO epochs). - Although 1x models are heavily under-trained, we provide some ResNet-50 models with the 1x (~12 COCO epochs) - training schedule for comparison when doing quick research iteration. - -#### ImageNet Pretrained Models - -It's common to initialize from backbone models pre-trained on ImageNet classification tasks. The following backbone models are available: - -* [R-50.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl): converted copy of [MSRA's original ResNet-50](https://github.com/KaimingHe/deep-residual-networks) model. -* [R-101.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl): converted copy of [MSRA's original ResNet-101](https://github.com/KaimingHe/deep-residual-networks) model. -* [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/FAIR/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB. -* [R-50.pkl (torchvision)](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/torchvision/R-50.pkl): converted copy of [torchvision's ResNet-50](https://pytorch.org/docs/stable/torchvision/models.html#torchvision.models.resnet50) model. - More details can be found in [the conversion script](tools/convert-torchvision-to-d2.py). - -Note that the above models have __different__ format from those provided in Detectron: we do not fuse BatchNorm into an affine layer. -Pretrained models in Detectron's format can still be used. For example: -* [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl): - ResNeXt-152-32x8d model trained on ImageNet-5k with Caffe2 at FB (see ResNeXt paper for details on ImageNet-5k). -* [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl): - ResNet-50 with Group Normalization. -* [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl): - ResNet-101 with Group Normalization. - -These models require slightly different settings regarding normalization and architecture. See the model zoo configs for reference. - -#### License - -All models available for download through this document are licensed under the -[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/). - -### COCO Object Detection Baselines - -#### Faster R-CNN: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
model iddownload
R50-C41x0.5510.1024.835.7137257644model | metrics
R50-DC51x0.3800.0685.037.3137847829model | metrics
R50-FPN1x0.2100.0383.037.9137257794model | metrics
R50-C43x0.5430.1044.838.4137849393model | metrics
R50-DC53x0.3780.0705.039.0137849425model | metrics
R50-FPN3x0.2090.0383.040.2137849458model | metrics
R101-C43x0.6190.1395.941.1138204752model | metrics
R101-DC53x0.4520.0866.140.6138204841model | metrics
R101-FPN3x0.2860.0514.142.0137851257model | metrics
X101-FPN3x0.6380.0986.743.0139173657model | metrics
- -#### RetinaNet: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
model iddownload
R501x0.2050.0414.137.4190397773model | metrics
R503x0.2050.0414.138.7190397829model | metrics
R1013x0.2910.0545.240.4190397697model | metrics
- - -#### RPN & Fast R-CNN: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
prop.
AR
model iddownload
RPN R50-C41x0.1300.0341.551.6137258005model | metrics
RPN R50-FPN1x0.1860.0322.758.0137258492model | metrics
Fast R-CNN R50-FPN1x0.1400.0292.637.8137635226model | metrics
- -### COCO Instance Segmentation Baselines with Mask R-CNN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
R50-C41x0.5840.1105.236.832.2137259246model | metrics
R50-DC51x0.4710.0766.538.334.2137260150model | metrics
R50-FPN1x0.2610.0433.438.635.2137260431model | metrics
R50-C43x0.5750.1115.239.834.4137849525model | metrics
R50-DC53x0.4700.0766.540.035.9137849551model | metrics
R50-FPN3x0.2610.0433.441.037.2137849600model | metrics
R101-C43x0.6520.1456.342.636.7138363239model | metrics
R101-DC53x0.5450.0927.641.937.3138363294model | metrics
R101-FPN3x0.3400.0564.642.938.6138205316model | metrics
X101-FPN3x0.6900.1037.244.339.5139653917model | metrics
- - - -#### New baselines using Large-Scale Jitter and Longer Training Schedule - -The following baselines of COCO Instance Segmentation with Mask R-CNN are generated -using a longer training schedule and large-scale jitter as described in Google's -[Simple Copy-Paste Data Augmentation](https://arxiv.org/pdf/2012.07177.pdf) paper. These -models are trained from scratch using random initialization. These baselines exceed the -previous Mask R-CNN baselines. - -In the following table, one epoch consists of training on 118000 COCO images. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Nameepochstrain
time
(s/im)
inference
time
(s/im)
box
AP
mask
AP
model iddownload
R50-FPN1000.3760.06944.640.342047764model | metrics
R50-FPN2000.3760.06946.341.742047638model | metrics
R50-FPN4000.3760.06947.442.542019571model | metrics
R101-FPN1000.5180.07346.441.642025812model | metrics
R101-FPN2000.5180.07348.043.142131867model | metrics
R101-FPN4000.5180.07348.943.742073830model | metrics
regnetx_4gf_dds_FPN1000.4740.07146.041.342047771model | metrics
regnetx_4gf_dds_FPN2000.4740.07148.143.142132721model | metrics
regnetx_4gf_dds_FPN4000.4740.07148.643.542025447model | metrics
regnety_4gf_dds_FPN1000.4870.07346.141.642047784model | metrics
regnety_4gf_dds_FPN2000.4870.07247.843.042047642model | metrics
regnety_4gf_dds_FPN4000.4870.07248.243.342045954model | metrics
- -### COCO Person Keypoint Detection Baselines with Keypoint R-CNN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
kp.
AP
model iddownload
R50-FPN1x0.3150.0725.053.664.0137261548model | metrics
R50-FPN3x0.3160.0665.055.465.5137849621model | metrics
R101-FPN3x0.3900.0766.156.466.1138363331model | metrics
X101-FPN3x0.7380.1218.757.366.0139686956model | metrics
- -### COCO Panoptic Segmentation Baselines with Panoptic FPN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
PQmodel iddownload
R50-FPN1x0.3040.0534.837.634.739.4139514544model | metrics
R50-FPN3x0.3020.0534.840.036.541.5139514569model | metrics
R101-FPN3x0.3920.0666.042.438.543.0139514519model | metrics
- - -### LVIS Instance Segmentation Baselines with Mask R-CNN - -Mask R-CNN baselines on the [LVIS dataset](https://lvisdataset.org), v0.5. -These baselines are described in Table 3(c) of the [LVIS paper](https://arxiv.org/abs/1908.03195). - -NOTE: the 1x schedule here has the same amount of __iterations__ as the COCO 1x baselines. -They are roughly 24 epochs of LVISv0.5 data. -The final results of these configs have large variance across different runs. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
R50-FPN1x0.2920.1077.123.624.4144219072model | metrics
R101-FPN1x0.3710.1147.825.625.9144219035model | metrics
X101-FPN1x0.7120.15110.226.727.1144219108model | metrics
- - - -### Cityscapes & Pascal VOC Baselines - -Simple baselines for -* Mask R-CNN on Cityscapes instance segmentation (initialized from COCO pre-training, then trained on Cityscapes fine annotations only) -* Faster R-CNN on PASCAL VOC object detection (trained on VOC 2007 train+val + VOC 2012 train+val, tested on VOC 2007 using 11-point interpolated AP) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Nametrain
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
box
AP50
mask
AP
model iddownload
R50-FPN, Cityscapes0.2400.0784.436.5142423278model | metrics
R50-C4, VOC0.5370.0814.851.980.3142202221model | metrics
- - - -### Other Settings - -Ablations for Deformable Conv and Cascade R-CNN: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
Baseline R50-FPN1x0.2610.0433.438.635.2137260431model | metrics
Deformable Conv1x0.3420.0483.541.537.5138602867model | metrics
Cascade R-CNN1x0.3170.0524.042.136.4138602847model | metrics
Baseline R50-FPN3x0.2610.0433.441.037.2137849600model | metrics
Deformable Conv3x0.3490.0473.542.738.5144998336model | metrics
Cascade R-CNN3x0.3280.0534.044.338.5144998488model | metrics
- - -Ablations for normalization methods, and a few models trained from scratch following [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883). -(Note: The baseline uses `2fc` head while the others use [`4conv1fc` head](https://arxiv.org/abs/1803.08494)) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
Baseline R50-FPN3x0.2610.0433.441.037.2137849600model | metrics
GN3x0.3090.0605.642.638.6138602888model | metrics
SyncBN3x0.3450.0535.541.937.8169527823model | metrics
GN (from scratch)3x0.3380.0617.239.936.6138602908model | metrics
GN (from scratch)9xN/A0.0617.243.739.6183808979model | metrics
SyncBN (from scratch)9xN/A0.0557.243.639.3184226666model | metrics
- - -A few very large models trained for a long time, for demo purposes. They are trained using multiple machines: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Nameinference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
PQmodel iddownload
Panoptic FPN R1010.09811.447.441.346.1139797668model | metrics
Mask R-CNN X1520.23415.150.244.018131413model | metrics
above + test-time aug.51.945.9
diff --git a/detectron2/README.md b/detectron2/README.md deleted file mode 100644 index 7d99b46e415ad06b7dd69e4b6a6b302ffea1d303..0000000000000000000000000000000000000000 --- a/detectron2/README.md +++ /dev/null @@ -1,60 +0,0 @@ - - -Detectron2 is Facebook AI Research's next generation library -that provides state-of-the-art detection and segmentation algorithms. -It is the successor of -[Detectron](https://github.com/facebookresearch/Detectron/) -and [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark/). -It supports a number of computer vision research projects and production applications in Facebook. - -
- -
-
- -## Learn More about Detectron2 - -* Includes new capabilities such as panoptic segmentation, Densepose, Cascade R-CNN, rotated bounding boxes, PointRend, - DeepLab, ViTDet, MViTv2 etc. -* Used as a library to support building [research projects](projects/) on top of it. -* Models can be exported to TorchScript format or Caffe2 format for deployment. -* It [trains much faster](https://detectron2.readthedocs.io/notes/benchmarks.html). - -See our [blog post](https://ai.meta.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-/) -to see more demos. -See this [interview](https://ai.meta.com/blog/detectron-everingham-prize/) to learn more about the stories behind detectron2. - -## Installation - -See [installation instructions](https://detectron2.readthedocs.io/tutorials/install.html). - -## Getting Started - -See [Getting Started with Detectron2](https://detectron2.readthedocs.io/tutorials/getting_started.html), -and the [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) -to learn about basic usage. - -Learn more at our [documentation](https://detectron2.readthedocs.org). -And see [projects/](projects/) for some projects that are built on top of detectron2. - -## Model Zoo and Baselines - -We provide a large set of baseline results and trained models available for download in the [Detectron2 Model Zoo](MODEL_ZOO.md). - -## License - -Detectron2 is released under the [Apache 2.0 license](LICENSE). - -## Citing Detectron2 - -If you use Detectron2 in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry. - -```BibTeX -@misc{wu2019detectron2, - author = {Yuxin Wu and Alexander Kirillov and Francisco Massa and - Wan-Yen Lo and Ross Girshick}, - title = {Detectron2}, - howpublished = {\url{https://github.com/facebookresearch/detectron2}}, - year = {2019} -} -``` diff --git a/detectron2/configs/Base-RCNN-C4.yaml b/detectron2/configs/Base-RCNN-C4.yaml deleted file mode 100644 index fbf34a0ea57a587e09997edd94c4012d69d0b6ad..0000000000000000000000000000000000000000 --- a/detectron2/configs/Base-RCNN-C4.yaml +++ /dev/null @@ -1,18 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - RPN: - PRE_NMS_TOPK_TEST: 6000 - POST_NMS_TOPK_TEST: 1000 - ROI_HEADS: - NAME: "Res5ROIHeads" -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/detectron2/configs/Base-RCNN-DilatedC5.yaml b/detectron2/configs/Base-RCNN-DilatedC5.yaml deleted file mode 100644 index c0d6d16bdaf532f09e4976f0aa240a49e748da27..0000000000000000000000000000000000000000 --- a/detectron2/configs/Base-RCNN-DilatedC5.yaml +++ /dev/null @@ -1,31 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - RESNETS: - OUT_FEATURES: ["res5"] - RES5_DILATION: 2 - RPN: - IN_FEATURES: ["res5"] - PRE_NMS_TOPK_TEST: 6000 - POST_NMS_TOPK_TEST: 1000 - ROI_HEADS: - NAME: "StandardROIHeads" - IN_FEATURES: ["res5"] - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_FC: 2 - POOLER_RESOLUTION: 7 - ROI_MASK_HEAD: - NAME: "MaskRCNNConvUpsampleHead" - NUM_CONV: 4 - POOLER_RESOLUTION: 14 -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/detectron2/configs/Base-RCNN-FPN.yaml b/detectron2/configs/Base-RCNN-FPN.yaml deleted file mode 100644 index 3e020f2e7b2f26765be317f907126a1556621abf..0000000000000000000000000000000000000000 --- a/detectron2/configs/Base-RCNN-FPN.yaml +++ /dev/null @@ -1,42 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - BACKBONE: - NAME: "build_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res2", "res3", "res4", "res5"] - FPN: - IN_FEATURES: ["res2", "res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map - ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) - RPN: - IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] - PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level - PRE_NMS_TOPK_TEST: 1000 # Per FPN level - # Detectron1 uses 2000 proposals per-batch, - # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) - # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. - POST_NMS_TOPK_TRAIN: 1000 - POST_NMS_TOPK_TEST: 1000 - ROI_HEADS: - NAME: "StandardROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_FC: 2 - POOLER_RESOLUTION: 7 - ROI_MASK_HEAD: - NAME: "MaskRCNNConvUpsampleHead" - NUM_CONV: 4 - POOLER_RESOLUTION: 14 -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/detectron2/configs/Base-RetinaNet.yaml b/detectron2/configs/Base-RetinaNet.yaml deleted file mode 100644 index 8b45b982bbf84b34d2a6a172ab0a946b1029f7c8..0000000000000000000000000000000000000000 --- a/detectron2/configs/Base-RetinaNet.yaml +++ /dev/null @@ -1,25 +0,0 @@ -MODEL: - META_ARCHITECTURE: "RetinaNet" - BACKBONE: - NAME: "build_retinanet_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] - FPN: - IN_FEATURES: ["res3", "res4", "res5"] - RETINANET: - IOU_THRESHOLDS: [0.4, 0.5] - IOU_LABELS: [0, -1, 1] - SMOOTH_L1_LOSS_BETA: 0.0 -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml b/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index 773ac10e87c626760d00d831bf664ce9ff073c49..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,17 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - LOAD_PROPOSALS: True - RESNETS: - DEPTH: 50 - PROPOSAL_GENERATOR: - NAME: "PrecomputedProposals" -DATASETS: - TRAIN: ("coco_2017_train",) - PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", ) - TEST: ("coco_2017_val",) - PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) -DATALOADER: - # proposals are part of the dataset_dicts, and take a lot of RAM - NUM_WORKERS: 2 diff --git a/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml b/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml deleted file mode 100644 index db142cd671c1841b4f64cf130bee7f7954ecdd28..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: False - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml b/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml deleted file mode 100644 index bceb6b343618d8cd9a6c414ff9eb86ab31cc230a..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: False - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml b/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml deleted file mode 100644 index 57a098f53ee8c54ecfa354cc96efefd890dc1b72..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: False - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml b/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml deleted file mode 100644 index f96130105c3ba6ab393e0932870903875f5cb732..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 diff --git a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml b/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml deleted file mode 100644 index bc51bce390a85ee3529ffdcebde05748e1646be0..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml b/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml deleted file mode 100644 index 0fe96f57febdac5790ea4cec168fa4b97ac4807a..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 diff --git a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml b/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml deleted file mode 100644 index 33fadeb87d1ef67ab2b55926b9a652ab4ac4a27d..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml b/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index 3262019a1211b910d3b371569199ed1afaacf6a4..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 diff --git a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml b/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml deleted file mode 100644 index 41395182bf5c9dd8ab1241c4414068817298d554..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml b/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml deleted file mode 100644 index 9c9b5ab77157baa581d90d9847c045c19ed6ffa3..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - MASK_ON: False - WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" - PIXEL_STD: [57.375, 57.120, 58.395] - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-Detection/fcos_R_50_FPN_1x.py b/detectron2/configs/COCO-Detection/fcos_R_50_FPN_1x.py deleted file mode 100644 index 86f83c68786f5995c462ade5f3067072d69f047e..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/fcos_R_50_FPN_1x.py +++ /dev/null @@ -1,11 +0,0 @@ -from ..common.optim import SGD as optimizer -from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier -from ..common.data.coco import dataloader -from ..common.models.fcos import model -from ..common.train import train - -dataloader.train.mapper.use_instance_mask = False -optimizer.lr = 0.01 - -model.backbone.bottom_up.freeze_at = 2 -train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" diff --git a/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml b/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml deleted file mode 100644 index 4abb1b9a547957aa6afc0b29129e00f89cf98d59..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../Base-RetinaNet.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.py b/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.py deleted file mode 100644 index 43057a8eeed38c78183e26d21b74261eb4dbc1b9..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.py +++ /dev/null @@ -1,11 +0,0 @@ -from ..common.optim import SGD as optimizer -from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier -from ..common.data.coco import dataloader -from ..common.models.retinanet import model -from ..common.train import train - -dataloader.train.mapper.use_instance_mask = False -model.backbone.bottom_up.freeze_at = 2 -optimizer.lr = 0.01 - -train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" diff --git a/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml b/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml deleted file mode 100644 index 4a24ce3a9a108a8792e18c8aabfb7b712f0d3725..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_BASE_: "../Base-RetinaNet.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 diff --git a/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml b/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml deleted file mode 100644 index 3b5412d4a7aef1d6c3f7c1e34f94007de639b833..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../Base-RetinaNet.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml b/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml deleted file mode 100644 index e04821156b0376ba5215d5ce5b7010a36b43e6a1..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - META_ARCHITECTURE: "ProposalNetwork" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 - RPN: - PRE_NMS_TOPK_TEST: 12000 - POST_NMS_TOPK_TEST: 2000 diff --git a/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml b/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml deleted file mode 100644 index dc9c95203b1c3c9cd9bb9876bb8d9a5dd9b31d9a..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "ProposalNetwork" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 - RPN: - POST_NMS_TOPK_TEST: 2000 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml deleted file mode 100644 index 1a94cc45a0f2aaa8c92e14871c553b736545e327..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: True - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml deleted file mode 100644 index 67b70cf4be8c19f5dc735b6f55a8690698f34b69..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: True - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml deleted file mode 100644 index 1935a302d2d0fa7f69553b3fd50b5a7082c6c0d1..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: True - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py deleted file mode 100644 index 22016be150df4abbe912700d7ca29f8b7b72554a..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.py +++ /dev/null @@ -1,8 +0,0 @@ -from ..common.train import train -from ..common.optim import SGD as optimizer -from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier -from ..common.data.coco import dataloader -from ..common.models.mask_rcnn_c4 import model - -model.backbone.freeze_at = 2 -train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml deleted file mode 100644 index a9aeb4eac38026dbb867e799f9fd3a8d8eb3af80..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml deleted file mode 100644 index 38ed867d897dfec839cbcf11a2e2dc8abb92f07c..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml deleted file mode 100644 index b13eefab2a049c48d94d5051c82ceb6dbde40579..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml deleted file mode 100644 index d401016358f967f6619d88b1c9bd5673a1cdeba8..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py deleted file mode 100644 index 40844ddeb8d47ff58a6af49ab35bad84e14f5721..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py +++ /dev/null @@ -1,8 +0,0 @@ -from ..common.optim import SGD as optimizer -from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier -from ..common.data.coco import dataloader -from ..common.models.mask_rcnn_fpn import model -from ..common.train import train - -model.backbone.bottom_up.freeze_at = 2 -train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index d50fb866ca7811a87b42555c7213f88e00bf6df1..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml deleted file mode 100644 index bec680ee17a474fefe527b7b79d26266e75c09f0..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - RPN: - BBOX_REG_LOSS_TYPE: "giou" - BBOX_REG_LOSS_WEIGHT: 2.0 - ROI_BOX_HEAD: - BBOX_REG_LOSS_TYPE: "giou" - BBOX_REG_LOSS_WEIGHT: 10.0 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml deleted file mode 100644 index be7d06b8e0f032ee7fcaabd7c122158518489fd2..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml deleted file mode 100644 index d14c63f74383bfc308750f51d51344398b02a239..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - MASK_ON: True - WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" - PIXEL_STD: [57.375, 57.120, 58.395] - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py deleted file mode 100644 index d7bbdd7d00505f1e51154379c99ab621cb648a6d..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py +++ /dev/null @@ -1,34 +0,0 @@ -from ..common.optim import SGD as optimizer -from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier -from ..common.data.coco import dataloader -from ..common.models.mask_rcnn_fpn import model -from ..common.train import train - -from detectron2.config import LazyCall as L -from detectron2.modeling.backbone import RegNet -from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock - - -# Replace default ResNet with RegNetX-4GF from the DDS paper. Config source: -# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnetx/RegNetX-4.0GF_dds_8gpu.yaml#L4-L9 # noqa -model.backbone.bottom_up = L(RegNet)( - stem_class=SimpleStem, - stem_width=32, - block_class=ResBottleneckBlock, - depth=23, - w_a=38.65, - w_0=96, - w_m=2.43, - group_width=40, - freeze_at=2, - norm="FrozenBN", - out_features=["s1", "s2", "s3", "s4"], -) -model.pixel_std = [57.375, 57.120, 58.395] - -optimizer.weight_decay = 5e-5 -train.init_checkpoint = ( - "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906383/RegNetX-4.0GF_dds_8gpu.pyth" -) -# RegNets benefit from enabling cudnn benchmark mode -train.cudnn_benchmark = True diff --git a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py b/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py deleted file mode 100644 index 72c6b7a5c8939970bd0e1e4a3c1155695943b19a..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py +++ /dev/null @@ -1,35 +0,0 @@ -from ..common.optim import SGD as optimizer -from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier -from ..common.data.coco import dataloader -from ..common.models.mask_rcnn_fpn import model -from ..common.train import train - -from detectron2.config import LazyCall as L -from detectron2.modeling.backbone import RegNet -from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock - - -# Replace default ResNet with RegNetY-4GF from the DDS paper. Config source: -# https://github.com/facebookresearch/pycls/blob/2c152a6e5d913e898cca4f0a758f41e6b976714d/configs/dds_baselines/regnety/RegNetY-4.0GF_dds_8gpu.yaml#L4-L10 # noqa -model.backbone.bottom_up = L(RegNet)( - stem_class=SimpleStem, - stem_width=32, - block_class=ResBottleneckBlock, - depth=22, - w_a=31.41, - w_0=96, - w_m=2.24, - group_width=64, - se_ratio=0.25, - freeze_at=2, - norm="FrozenBN", - out_features=["s1", "s2", "s3", "s4"], -) -model.pixel_std = [57.375, 57.120, 58.395] - -optimizer.weight_decay = 5e-5 -train.init_checkpoint = ( - "https://dl.fbaipublicfiles.com/pycls/dds_baselines/160906838/RegNetY-4.0GF_dds_8gpu.pyth" -) -# RegNets benefit from enabling cudnn benchmark mode -train.cudnn_benchmark = True diff --git a/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml b/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml deleted file mode 100644 index 4e03944a42d2e497da5ceca17c8fda797dac3f82..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml +++ /dev/null @@ -1,15 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - KEYPOINT_ON: True - ROI_HEADS: - NUM_CLASSES: 1 - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss - RPN: - # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. - # 1000 proposals per-image is found to hurt box AP. - # Therefore we increase it to 1500 per-image. - POST_NMS_TOPK_TRAIN: 1500 -DATASETS: - TRAIN: ("keypoints_coco_2017_train",) - TEST: ("keypoints_coco_2017_val",) diff --git a/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml b/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml deleted file mode 100644 index 9309535c57a1aa7d23297aac80a9bd78a6c79fcc..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-Keypoint-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py b/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py deleted file mode 100644 index 1aad53bfef62fb584d5022585d567e346f671a55..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.py +++ /dev/null @@ -1,8 +0,0 @@ -from ..common.optim import SGD as optimizer -from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier -from ..common.data.coco_keypoint import dataloader -from ..common.models.keypoint_rcnn_fpn import model -from ..common.train import train - -model.backbone.bottom_up.freeze_at = 2 -train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" diff --git a/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml b/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index 7bf85cf745b53b3e7ab28fe94b7f4f9e7fe6e335..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_BASE_: "Base-Keypoint-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 diff --git a/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml b/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml deleted file mode 100644 index a07f243f650a497b9372501e3face75194cf0941..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-Keypoint-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml b/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml deleted file mode 100644 index d4bfa20a98c0a65c6bd60e93b07e8f4b7d92a867..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "Base-Keypoint-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" - PIXEL_STD: [57.375, 57.120, 58.395] - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml b/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml deleted file mode 100644 index f00d54b760c2b9271c75643e0a1ab1ffc0d9543a..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "PanopticFPN" - MASK_ON: True - SEM_SEG_HEAD: - LOSS_WEIGHT: 0.5 -DATASETS: - TRAIN: ("coco_2017_train_panoptic_separated",) - TEST: ("coco_2017_val_panoptic_separated",) -DATALOADER: - FILTER_EMPTY_ANNOTATIONS: False diff --git a/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml b/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml deleted file mode 100644 index 0e01f6fb31e9b00b1857b7de3b5074184d1f4a21..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-Panoptic-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py b/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py deleted file mode 100644 index 40cf18131810307157a9a7d1f6d5922b00fd73d5..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.py +++ /dev/null @@ -1,8 +0,0 @@ -from ..common.optim import SGD as optimizer -from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier -from ..common.data.coco_panoptic_separated import dataloader -from ..common.models.panoptic_fpn import model -from ..common.train import train - -model.backbone.bottom_up.freeze_at = 2 -train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" diff --git a/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml b/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml deleted file mode 100644 index 6afa2c1cc92495309ed1553a17359fe5d7d6566e..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_BASE_: "Base-Panoptic-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 diff --git a/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml b/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml deleted file mode 100644 index b956b3f673e78649184fe2c50e2700b3f1f14794..0000000000000000000000000000000000000000 --- a/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-Panoptic-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml deleted file mode 100644 index 1a7aaeb961581ed9492c4cfe5a69a1eb60495b3e..0000000000000000000000000000000000000000 --- a/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml +++ /dev/null @@ -1,27 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - # For better, more stable performance initialize from COCO - WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" - MASK_ON: True - ROI_HEADS: - NUM_CLASSES: 8 -# This is similar to the setting used in Mask R-CNN paper, Appendix A -# But there are some differences, e.g., we did not initialize the output -# layer using the corresponding classes from COCO -INPUT: - MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) - MIN_SIZE_TRAIN_SAMPLING: "choice" - MIN_SIZE_TEST: 1024 - MAX_SIZE_TRAIN: 2048 - MAX_SIZE_TEST: 2048 -DATASETS: - TRAIN: ("cityscapes_fine_instance_seg_train",) - TEST: ("cityscapes_fine_instance_seg_val",) -SOLVER: - BASE_LR: 0.01 - STEPS: (18000,) - MAX_ITER: 24000 - IMS_PER_BATCH: 8 -TEST: - EVAL_PERIOD: 8000 diff --git a/detectron2/configs/Detectron1-Comparisons/README.md b/detectron2/configs/Detectron1-Comparisons/README.md deleted file mode 100644 index 924fd00af642ddf1a4ff4c4f5947f676134eb7de..0000000000000000000000000000000000000000 --- a/detectron2/configs/Detectron1-Comparisons/README.md +++ /dev/null @@ -1,84 +0,0 @@ - -Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron. - -The differences in implementation details are shared in -[Compatibility with Other Libraries](../../docs/notes/compatibility.md). - -The differences in model zoo's experimental settings include: -* Use scale augmentation during training. This improves AP with lower training cost. -* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may - affect other AP. -* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP. -* Use `ROIAlignV2`. This does not significantly affect AP. - -In this directory, we provide a few configs that __do not__ have the above changes. -They mimic Detectron's behavior as close as possible, -and provide a fair comparison of accuracy and speed against Detectron. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
kp.
AP
model iddownload
Faster R-CNN1x0.2190.0383.136.9137781054model | metrics
Keypoint R-CNN1x0.3130.0715.053.164.2137781195model | metrics
Mask R-CNN1x0.2730.0433.437.834.9137781281model | metrics
- -## Comparisons: - -* Faster R-CNN: Detectron's AP is 36.7, similar to ours. -* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's - [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be - compensated back by some parameter tuning. -* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation. - See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details. - -For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html). diff --git a/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml deleted file mode 100644 index 6ce77f137fa2c4e5254a62b58c18b8b76096f2aa..0000000000000000000000000000000000000000 --- a/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml +++ /dev/null @@ -1,17 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 - # Detectron1 uses smooth L1 loss with some magic beta values. - # The defaults are changed to L1 loss in Detectron2. - RPN: - SMOOTH_L1_BETA: 0.1111 - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 1.0 - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" -INPUT: - # no scale augmentation - MIN_SIZE_TRAIN: (800, ) diff --git a/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index aacf868ba5290c752031c130a2081af48afc0808..0000000000000000000000000000000000000000 --- a/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,27 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - KEYPOINT_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 1 - ROI_KEYPOINT_HEAD: - POOLER_RESOLUTION: 14 - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" - # Detectron1 uses smooth L1 loss with some magic beta values. - # The defaults are changed to L1 loss in Detectron2. - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 1.0 - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" - RPN: - SMOOTH_L1_BETA: 0.1111 - # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2 - # 1000 proposals per-image is found to hurt box AP. - # Therefore we increase it to 1500 per-image. - POST_NMS_TOPK_TRAIN: 1500 -DATASETS: - TRAIN: ("keypoints_coco_2017_train",) - TEST: ("keypoints_coco_2017_val",) diff --git a/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml deleted file mode 100644 index 4ea86a8d8e2cd3e51cbc7311b0d00710c07d01f6..0000000000000000000000000000000000000000 --- a/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - # Detectron1 uses smooth L1 loss with some magic beta values. - # The defaults are changed to L1 loss in Detectron2. - RPN: - SMOOTH_L1_BETA: 0.1111 - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 1.0 - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" - ROI_MASK_HEAD: - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" -INPUT: - # no scale augmentation - MIN_SIZE_TRAIN: (800, ) diff --git a/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml deleted file mode 100644 index f0c3a1bbc0a09e1384de522f30c443ba1e36fafa..0000000000000000000000000000000000000000 --- a/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: True - RESNETS: - DEPTH: 101 - ROI_HEADS: - NUM_CLASSES: 1230 - SCORE_THRESH_TEST: 0.0001 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -DATASETS: - TRAIN: ("lvis_v0.5_train",) - TEST: ("lvis_v0.5_val",) -TEST: - DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 -DATALOADER: - SAMPLER_TRAIN: "RepeatFactorTrainingSampler" - REPEAT_THRESHOLD: 0.001 diff --git a/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index 64b4caa4ef2b284782367ea702e1ae6653472630..0000000000000000000000000000000000000000 --- a/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 1230 - SCORE_THRESH_TEST: 0.0001 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -DATASETS: - TRAIN: ("lvis_v0.5_train",) - TEST: ("lvis_v0.5_val",) -TEST: - DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 -DATALOADER: - SAMPLER_TRAIN: "RepeatFactorTrainingSampler" - REPEAT_THRESHOLD: 0.001 diff --git a/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml deleted file mode 100644 index c8b822c6c006ba642f4caf9b55e7983f6797427a..0000000000000000000000000000000000000000 --- a/detectron2/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml +++ /dev/null @@ -1,23 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" - PIXEL_STD: [57.375, 57.120, 58.395] - MASK_ON: True - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 101 - ROI_HEADS: - NUM_CLASSES: 1230 - SCORE_THRESH_TEST: 0.0001 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -DATASETS: - TRAIN: ("lvis_v0.5_train",) - TEST: ("lvis_v0.5_val",) -TEST: - DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 -DATALOADER: - SAMPLER_TRAIN: "RepeatFactorTrainingSampler" - REPEAT_THRESHOLD: 0.001 diff --git a/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml deleted file mode 100644 index ca4dd97144561276ecaabbb6c254e3a7737ac157..0000000000000000000000000000000000000000 --- a/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml +++ /dev/null @@ -1,22 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: True - RESNETS: - DEPTH: 101 - ROI_HEADS: - NUM_CLASSES: 1203 - SCORE_THRESH_TEST: 0.0001 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -DATASETS: - TRAIN: ("lvis_v1_train",) - TEST: ("lvis_v1_val",) -TEST: - DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 -SOLVER: - STEPS: (120000, 160000) - MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs -DATALOADER: - SAMPLER_TRAIN: "RepeatFactorTrainingSampler" - REPEAT_THRESHOLD: 0.001 diff --git a/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index f313295ee5f0d553d394ce2efe003810c79af47d..0000000000000000000000000000000000000000 --- a/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,22 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 1203 - SCORE_THRESH_TEST: 0.0001 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -DATASETS: - TRAIN: ("lvis_v1_train",) - TEST: ("lvis_v1_val",) -TEST: - DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 -SOLVER: - STEPS: (120000, 160000) - MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs -DATALOADER: - SAMPLER_TRAIN: "RepeatFactorTrainingSampler" - REPEAT_THRESHOLD: 0.001 diff --git a/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml deleted file mode 100644 index f6528f7c31c8cfbf139c14fd0cae598592d8e898..0000000000000000000000000000000000000000 --- a/detectron2/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml +++ /dev/null @@ -1,26 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" - PIXEL_STD: [57.375, 57.120, 58.395] - MASK_ON: True - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 101 - ROI_HEADS: - NUM_CLASSES: 1203 - SCORE_THRESH_TEST: 0.0001 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -DATASETS: - TRAIN: ("lvis_v1_train",) - TEST: ("lvis_v1_val",) -SOLVER: - STEPS: (120000, 160000) - MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs -TEST: - DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 -DATALOADER: - SAMPLER_TRAIN: "RepeatFactorTrainingSampler" - REPEAT_THRESHOLD: 0.001 diff --git a/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index abb33b618932e94b66239945ac892f4c84a6e8f8..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - NAME: CascadeROIHeads - ROI_BOX_HEAD: - CLS_AGNOSTIC_BBOX_REG: True - RPN: - POST_NMS_TOPK_TRAIN: 2000 diff --git a/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml deleted file mode 100644 index e2201ad5c46ded91ccfa47b7698a521625c5e447..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml +++ /dev/null @@ -1,15 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - NAME: CascadeROIHeads - ROI_BOX_HEAD: - CLS_AGNOSTIC_BBOX_REG: True - RPN: - POST_NMS_TOPK_TRAIN: 2000 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml deleted file mode 100644 index fc117f6b5e3e51558ec2f01b73c5365622e5ce25..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml +++ /dev/null @@ -1,36 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - MASK_ON: True - WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 152 - DEFORM_ON_PER_STAGE: [False, True, True, True] - ROI_HEADS: - NAME: "CascadeROIHeads" - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_CONV: 4 - NUM_FC: 1 - NORM: "GN" - CLS_AGNOSTIC_BBOX_REG: True - ROI_MASK_HEAD: - NUM_CONV: 8 - NORM: "GN" - RPN: - POST_NMS_TOPK_TRAIN: 2000 -SOLVER: - IMS_PER_BATCH: 128 - STEPS: (35000, 45000) - MAX_ITER: 50000 - BASE_LR: 0.16 -INPUT: - MIN_SIZE_TRAIN: (640, 864) - MIN_SIZE_TRAIN_SAMPLING: "range" - MAX_SIZE_TRAIN: 1440 - CROP: - ENABLED: True -TEST: - EVAL_PERIOD: 2500 diff --git a/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml deleted file mode 100644 index 4c3b767ff473bbab7225cc8a4a92608543d78246..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - ROI_BOX_HEAD: - CLS_AGNOSTIC_BBOX_REG: True - ROI_MASK_HEAD: - CLS_AGNOSTIC_MASK: True diff --git a/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml deleted file mode 100644 index 04ff988d073ef9169ee4ca2cbce0d6f030c15232..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 - DEFORM_MODULATED: False diff --git a/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml deleted file mode 100644 index 68c0ca58d7df97ca728c339da0ca9828fe6be318..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 - DEFORM_MODULATED: False -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml deleted file mode 100644 index 74d274e5a529b5a8afe186940868f9d48c6112b3..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml +++ /dev/null @@ -1,21 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN" - MASK_ON: True - RESNETS: - DEPTH: 50 - NORM: "GN" - STRIDE_IN_1X1: False - FPN: - NORM: "GN" - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_CONV: 4 - NUM_FC: 1 - NORM: "GN" - ROI_MASK_HEAD: - NORM: "GN" -SOLVER: - # 3x schedule - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml deleted file mode 100644 index 11ebb076ba529f26c71a0d972e96ca4c2d6a830b..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml +++ /dev/null @@ -1,24 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - NORM: "SyncBN" - STRIDE_IN_1X1: True - FPN: - NORM: "SyncBN" - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_CONV: 4 - NUM_FC: 1 - NORM: "SyncBN" - ROI_MASK_HEAD: - NORM: "SyncBN" -SOLVER: - # 3x schedule - STEPS: (210000, 250000) - MAX_ITER: 270000 -TEST: - PRECISE_BN: - ENABLED: True diff --git a/detectron2/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py b/detectron2/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py deleted file mode 100644 index bdd49a4566d1d0c79d0613c34a8cffd616f74fd2..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py +++ /dev/null @@ -1,152 +0,0 @@ -# An example config to train a mmdetection model using detectron2. - -from ..common.data.coco import dataloader -from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier -from ..common.optim import SGD as optimizer -from ..common.train import train -from ..common.data.constants import constants - -from detectron2.modeling.mmdet_wrapper import MMDetDetector -from detectron2.config import LazyCall as L - -model = L(MMDetDetector)( - detector=dict( - type="MaskRCNN", - pretrained="torchvision://resnet50", - backbone=dict( - type="ResNet", - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type="BN", requires_grad=True), - norm_eval=True, - style="pytorch", - ), - neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), - rpn_head=dict( - type="RPNHead", - in_channels=256, - feat_channels=256, - anchor_generator=dict( - type="AnchorGenerator", - scales=[8], - ratios=[0.5, 1.0, 2.0], - strides=[4, 8, 16, 32, 64], - ), - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[1.0, 1.0, 1.0, 1.0], - ), - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type="L1Loss", loss_weight=1.0), - ), - roi_head=dict( - type="StandardRoIHead", - bbox_roi_extractor=dict( - type="SingleRoIExtractor", - roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32], - ), - bbox_head=dict( - type="Shared2FCBBoxHead", - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[0.1, 0.1, 0.2, 0.2], - ), - reg_class_agnostic=False, - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type="L1Loss", loss_weight=1.0), - ), - mask_roi_extractor=dict( - type="SingleRoIExtractor", - roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32], - ), - mask_head=dict( - type="FCNMaskHead", - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=80, - loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), - ), - ), - # model training and testing settings - train_cfg=dict( - rpn=dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - match_low_quality=True, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False, - ), - allowed_border=-1, - pos_weight=-1, - debug=False, - ), - rpn_proposal=dict( - nms_pre=2000, - max_per_img=1000, - nms=dict(type="nms", iou_threshold=0.7), - min_bbox_size=0, - ), - rcnn=dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - match_low_quality=True, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True, - ), - mask_size=28, - pos_weight=-1, - debug=False, - ), - ), - test_cfg=dict( - rpn=dict( - nms_pre=1000, - max_per_img=1000, - nms=dict(type="nms", iou_threshold=0.7), - min_bbox_size=0, - ), - rcnn=dict( - score_thr=0.05, - nms=dict(type="nms", iou_threshold=0.5), - max_per_img=100, - mask_thr_binary=0.5, - ), - ), - ), - pixel_mean=constants.imagenet_rgb256_mean, - pixel_std=constants.imagenet_rgb256_std, -) - -dataloader.train.mapper.image_format = "RGB" # torchvision pretrained model -train.init_checkpoint = None # pretrained model is loaded inside backbone diff --git a/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml deleted file mode 100644 index 34016cea3ca9d7fb69ef4fe01d6b47ee8690a13b..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# A large PanopticFPN for demo purposes. -# Use GN on backbone to support semantic seg. -# Use Cascade + Deform Conv to improve localization. -_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml" -MODEL: - WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN" - RESNETS: - DEPTH: 101 - NORM: "GN" - DEFORM_ON_PER_STAGE: [False, True, True, True] - STRIDE_IN_1X1: False - FPN: - NORM: "GN" - ROI_HEADS: - NAME: CascadeROIHeads - ROI_BOX_HEAD: - CLS_AGNOSTIC_BBOX_REG: True - ROI_MASK_HEAD: - NORM: "GN" - RPN: - POST_NMS_TOPK_TRAIN: 2000 -SOLVER: - STEPS: (105000, 125000) - MAX_ITER: 135000 - IMS_PER_BATCH: 32 - BASE_LR: 0.04 diff --git a/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml deleted file mode 100644 index f3400288cde242fcf66eef7f63b5a9165ca663c5..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" -MODEL: - # Train from random initialization. - WEIGHTS: "" - # It makes sense to divide by STD when training from scratch - # But it seems to make no difference on the results and C2's models didn't do this. - # So we keep things consistent with C2. - # PIXEL_STD: [57.375, 57.12, 58.395] - MASK_ON: True - BACKBONE: - FREEZE_AT: 0 -# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 -# to learn what you need for training from scratch. diff --git a/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml deleted file mode 100644 index d90c9ff0ef4573252ee165b4c958ec5f74178176..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" -MODEL: - PIXEL_STD: [57.375, 57.12, 58.395] - WEIGHTS: "" - MASK_ON: True - RESNETS: - STRIDE_IN_1X1: False - BACKBONE: - FREEZE_AT: 0 -SOLVER: - # 9x schedule - IMS_PER_BATCH: 64 # 4x the standard - STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k - MAX_ITER: 202500 # 90k * 9 / 4 - BASE_LR: 0.08 -TEST: - EVAL_PERIOD: 2500 -# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 -# to learn what you need for training from scratch. diff --git a/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml deleted file mode 100644 index 60d4e42330e396a1901437df8e17b262d5ad547a..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml" -MODEL: - PIXEL_STD: [57.375, 57.12, 58.395] - WEIGHTS: "" - MASK_ON: True - RESNETS: - STRIDE_IN_1X1: False - BACKBONE: - FREEZE_AT: 0 -SOLVER: - # 9x schedule - IMS_PER_BATCH: 64 # 4x the standard - STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k - MAX_ITER: 202500 # 90k * 9 / 4 - BASE_LR: 0.08 -TEST: - EVAL_PERIOD: 2500 -# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 -# to learn what you need for training from scratch. diff --git a/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml b/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml deleted file mode 100644 index ac256e1372770ab3d9ae522c962de0fd0dbceeb5..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "SemanticSegmentor" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -DATASETS: - TRAIN: ("coco_2017_train_panoptic_stuffonly",) - TEST: ("coco_2017_val_panoptic_stuffonly",) -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) diff --git a/detectron2/configs/Misc/torchvision_imagenet_R_50.py b/detectron2/configs/Misc/torchvision_imagenet_R_50.py deleted file mode 100644 index 20c1f85d6661ef85529904afdc6491b2c889b98f..0000000000000000000000000000000000000000 --- a/detectron2/configs/Misc/torchvision_imagenet_R_50.py +++ /dev/null @@ -1,149 +0,0 @@ -""" -An example config file to train a ImageNet classifier with detectron2. -Model and dataloader both come from torchvision. -This shows how to use detectron2 as a general engine for any new models and tasks. - -To run, use the following command: - -python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \ - --num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/ - -""" - -import torch -from torch import nn -from torch.nn import functional as F -from omegaconf import OmegaConf -import torchvision -from torchvision.transforms import transforms as T -from torchvision.models.resnet import ResNet, Bottleneck -from fvcore.common.param_scheduler import MultiStepParamScheduler - -from detectron2.solver import WarmupParamScheduler -from detectron2.solver.build import get_default_optimizer_params -from detectron2.config import LazyCall as L -from detectron2.model_zoo import get_config -from detectron2.data.samplers import TrainingSampler, InferenceSampler -from detectron2.evaluation import DatasetEvaluator -from detectron2.utils import comm - - -""" -Note: Here we put reusable code (models, evaluation, data) together with configs just as a -proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2. -Writing code in configs offers extreme flexibility but is often not a good engineering practice. -In practice, you might want to put code in your project and import them instead. -""" - - -def build_data_loader(dataset, batch_size, num_workers, training=True): - return torch.utils.data.DataLoader( - dataset, - sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)), - batch_size=batch_size, - num_workers=num_workers, - pin_memory=True, - ) - - -class ClassificationNet(nn.Module): - def __init__(self, model: nn.Module): - super().__init__() - self.model = model - - @property - def device(self): - return list(self.model.parameters())[0].device - - def forward(self, inputs): - image, label = inputs - pred = self.model(image.to(self.device)) - if self.training: - label = label.to(self.device) - return F.cross_entropy(pred, label) - else: - return pred - - -class ClassificationAcc(DatasetEvaluator): - def reset(self): - self.corr = self.total = 0 - - def process(self, inputs, outputs): - image, label = inputs - self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item() - self.total += len(label) - - def evaluate(self): - all_corr_total = comm.all_gather([self.corr, self.total]) - corr = sum(x[0] for x in all_corr_total) - total = sum(x[1] for x in all_corr_total) - return {"accuracy": corr / total} - - -# --- End of code that could be in a project and be imported - - -dataloader = OmegaConf.create() -dataloader.train = L(build_data_loader)( - dataset=L(torchvision.datasets.ImageNet)( - root="/path/to/imagenet", - split="train", - transform=L(T.Compose)( - transforms=[ - L(T.RandomResizedCrop)(size=224), - L(T.RandomHorizontalFlip)(), - T.ToTensor(), - L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), - ] - ), - ), - batch_size=256 // 8, - num_workers=4, - training=True, -) - -dataloader.test = L(build_data_loader)( - dataset=L(torchvision.datasets.ImageNet)( - root="${...train.dataset.root}", - split="val", - transform=L(T.Compose)( - transforms=[ - L(T.Resize)(size=256), - L(T.CenterCrop)(size=224), - T.ToTensor(), - L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), - ] - ), - ), - batch_size=256 // 8, - num_workers=4, - training=False, -) - -dataloader.evaluator = L(ClassificationAcc)() - -model = L(ClassificationNet)( - model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True) -) - - -optimizer = L(torch.optim.SGD)( - params=L(get_default_optimizer_params)(), - lr=0.1, - momentum=0.9, - weight_decay=1e-4, -) - -lr_multiplier = L(WarmupParamScheduler)( - scheduler=L(MultiStepParamScheduler)( - values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100] - ), - warmup_length=1 / 100, - warmup_factor=0.1, -) - - -train = get_config("common/train.py").train -train.init_checkpoint = None -train.max_iter = 100 * 1281167 // 256 diff --git a/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml b/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml deleted file mode 100644 index ea2a6baaebd1a186db18f2904430ffb25901898e..0000000000000000000000000000000000000000 --- a/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 20 -INPUT: - MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) - MIN_SIZE_TEST: 800 -DATASETS: - TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') - TEST: ('voc_2007_test',) -SOLVER: - STEPS: (12000, 16000) - MAX_ITER: 18000 # 17.4 epochs - WARMUP_ITERS: 100 diff --git a/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml b/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml deleted file mode 100644 index e554cab18a358a27b630c1ab0c2359666b0e1514..0000000000000000000000000000000000000000 --- a/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 20 -INPUT: - MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) - MIN_SIZE_TEST: 800 -DATASETS: - TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') - TEST: ('voc_2007_test',) -SOLVER: - STEPS: (12000, 16000) - MAX_ITER: 18000 # 17.4 epochs - WARMUP_ITERS: 100 diff --git a/detectron2/configs/common/README.md b/detectron2/configs/common/README.md deleted file mode 100644 index 912cc29927542bfe4258d3208cf52d73cb0ea477..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/README.md +++ /dev/null @@ -1,6 +0,0 @@ -This directory provides definitions for a few common models, dataloaders, scheduler, -and optimizers that are often used in training. -The definition of these objects are provided in the form of lazy instantiation: -their arguments can be edited by users before constructing the objects. - -They can be imported, or loaded by `model_zoo.get_config` API in users' own configs. diff --git a/detectron2/configs/common/coco_schedule.py b/detectron2/configs/common/coco_schedule.py deleted file mode 100644 index 355e66a1d213cb599a7ffe55089d854089c8ead2..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/coco_schedule.py +++ /dev/null @@ -1,47 +0,0 @@ -from fvcore.common.param_scheduler import MultiStepParamScheduler - -from detectron2.config import LazyCall as L -from detectron2.solver import WarmupParamScheduler - - -def default_X_scheduler(num_X): - """ - Returns the config for a default multi-step LR scheduler such as "1x", "3x", - commonly referred to in papers, where every 1x has the total length of 1440k - training images (~12 COCO epochs). LR is decayed twice at the end of training - following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4. - - Args: - num_X: a positive real number - - Returns: - DictConfig: configs that define the multiplier for LR during training - """ - # total number of iterations assuming 16 batch size, using 1440000/16=90000 - total_steps_16bs = num_X * 90000 - - if num_X <= 2: - scheduler = L(MultiStepParamScheduler)( - values=[1.0, 0.1, 0.01], - # note that scheduler is scale-invariant. This is equivalent to - # milestones=[6, 8, 9] - milestones=[60000, 80000, 90000], - ) - else: - scheduler = L(MultiStepParamScheduler)( - values=[1.0, 0.1, 0.01], - milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs], - ) - return L(WarmupParamScheduler)( - scheduler=scheduler, - warmup_length=1000 / total_steps_16bs, - warmup_method="linear", - warmup_factor=0.001, - ) - - -lr_multiplier_1x = default_X_scheduler(1) -lr_multiplier_2x = default_X_scheduler(2) -lr_multiplier_3x = default_X_scheduler(3) -lr_multiplier_6x = default_X_scheduler(6) -lr_multiplier_9x = default_X_scheduler(9) diff --git a/detectron2/configs/common/data/coco.py b/detectron2/configs/common/data/coco.py deleted file mode 100644 index 703c4385c7ddc7eb0759c98d102ab2384d6a9e3e..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/data/coco.py +++ /dev/null @@ -1,48 +0,0 @@ -from omegaconf import OmegaConf - -import detectron2.data.transforms as T -from detectron2.config import LazyCall as L -from detectron2.data import ( - DatasetMapper, - build_detection_test_loader, - build_detection_train_loader, - get_detection_dataset_dicts, -) -from detectron2.evaluation import COCOEvaluator - -dataloader = OmegaConf.create() - -dataloader.train = L(build_detection_train_loader)( - dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"), - mapper=L(DatasetMapper)( - is_train=True, - augmentations=[ - L(T.ResizeShortestEdge)( - short_edge_length=(640, 672, 704, 736, 768, 800), - sample_style="choice", - max_size=1333, - ), - L(T.RandomFlip)(horizontal=True), - ], - image_format="BGR", - use_instance_mask=True, - ), - total_batch_size=16, - num_workers=4, -) - -dataloader.test = L(build_detection_test_loader)( - dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False), - mapper=L(DatasetMapper)( - is_train=False, - augmentations=[ - L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333), - ], - image_format="${...train.mapper.image_format}", - ), - num_workers=4, -) - -dataloader.evaluator = L(COCOEvaluator)( - dataset_name="${..test.dataset.names}", -) diff --git a/detectron2/configs/common/data/coco_keypoint.py b/detectron2/configs/common/data/coco_keypoint.py deleted file mode 100644 index b4ceb066faf696954244205dc75376b767071217..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/data/coco_keypoint.py +++ /dev/null @@ -1,13 +0,0 @@ -from detectron2.data.detection_utils import create_keypoint_hflip_indices - -from .coco import dataloader - -dataloader.train.dataset.min_keypoints = 1 -dataloader.train.dataset.names = "keypoints_coco_2017_train" -dataloader.test.dataset.names = "keypoints_coco_2017_val" - -dataloader.train.mapper.update( - use_instance_mask=False, - use_keypoint=True, - keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names), -) diff --git a/detectron2/configs/common/data/coco_panoptic_separated.py b/detectron2/configs/common/data/coco_panoptic_separated.py deleted file mode 100644 index 5ccbc77e64d1c92c99cbd7158d047bab54cb9f3d..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/data/coco_panoptic_separated.py +++ /dev/null @@ -1,26 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.evaluation import ( - COCOEvaluator, - COCOPanopticEvaluator, - DatasetEvaluators, - SemSegEvaluator, -) - -from .coco import dataloader - -dataloader.train.dataset.names = "coco_2017_train_panoptic_separated" -dataloader.train.dataset.filter_empty = False -dataloader.test.dataset.names = "coco_2017_val_panoptic_separated" - - -dataloader.evaluator = [ - L(COCOEvaluator)( - dataset_name="${...test.dataset.names}", - ), - L(SemSegEvaluator)( - dataset_name="${...test.dataset.names}", - ), - L(COCOPanopticEvaluator)( - dataset_name="${...test.dataset.names}", - ), -] diff --git a/detectron2/configs/common/data/constants.py b/detectron2/configs/common/data/constants.py deleted file mode 100644 index be11cb5ac7c32a260af96ed27c32ed767b2f2bcd..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/data/constants.py +++ /dev/null @@ -1,9 +0,0 @@ -constants = dict( - imagenet_rgb256_mean=[123.675, 116.28, 103.53], - imagenet_rgb256_std=[58.395, 57.12, 57.375], - imagenet_bgr256_mean=[103.530, 116.280, 123.675], - # When using pre-trained models in Detectron1 or any MSRA models, - # std has been absorbed into its conv1 weights, so the std needs to be set 1. - # Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std) - imagenet_bgr256_std=[1.0, 1.0, 1.0], -) diff --git a/detectron2/configs/common/models/cascade_rcnn.py b/detectron2/configs/common/models/cascade_rcnn.py deleted file mode 100644 index c7372a801dc00d7fec4db8cda8c2612ce281d48a..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/models/cascade_rcnn.py +++ /dev/null @@ -1,36 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.layers import ShapeSpec -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.matcher import Matcher -from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads - -from .mask_rcnn_fpn import model - -# arguments that don't exist for Cascade R-CNN -[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]] - -model.roi_heads.update( - _target_=CascadeROIHeads, - box_heads=[ - L(FastRCNNConvFCHead)( - input_shape=ShapeSpec(channels=256, height=7, width=7), - conv_dims=[], - fc_dims=[1024, 1024], - ) - for k in range(3) - ], - box_predictors=[ - L(FastRCNNOutputLayers)( - input_shape=ShapeSpec(channels=1024), - test_score_thresh=0.05, - box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)), - cls_agnostic_bbox_reg=True, - num_classes="${...num_classes}", - ) - for (w1, w2) in [(10, 5), (20, 10), (30, 15)] - ], - proposal_matchers=[ - L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False) - for th in [0.5, 0.6, 0.7] - ], -) diff --git a/detectron2/configs/common/models/fcos.py b/detectron2/configs/common/models/fcos.py deleted file mode 100644 index 1c752029b7fc64ec375a55182e5342c9eb48bb33..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/models/fcos.py +++ /dev/null @@ -1,23 +0,0 @@ -from detectron2.modeling.meta_arch.fcos import FCOS, FCOSHead - -from .retinanet import model - -model._target_ = FCOS - -del model.anchor_generator -del model.box2box_transform -del model.anchor_matcher -del model.input_format - -# Use P5 instead of C5 to compute P6/P7 -# (Sec 2.2 of https://arxiv.org/abs/2006.09214) -model.backbone.top_block.in_feature = "p5" -model.backbone.top_block.in_channels = 256 - -# New score threshold determined based on sqrt(cls_score * centerness) -model.test_score_thresh = 0.2 -model.test_nms_thresh = 0.6 - -model.head._target_ = FCOSHead -del model.head.num_anchors -model.head.norm = "GN" diff --git a/detectron2/configs/common/models/keypoint_rcnn_fpn.py b/detectron2/configs/common/models/keypoint_rcnn_fpn.py deleted file mode 100644 index 56b3994df249884d4816fc9a5c7f553a9ab6f400..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/models/keypoint_rcnn_fpn.py +++ /dev/null @@ -1,33 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.layers import ShapeSpec -from detectron2.modeling.poolers import ROIPooler -from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead - -from .mask_rcnn_fpn import model - -[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]] - -model.roi_heads.update( - num_classes=1, - keypoint_in_features=["p2", "p3", "p4", "p5"], - keypoint_pooler=L(ROIPooler)( - output_size=14, - scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), - sampling_ratio=0, - pooler_type="ROIAlignV2", - ), - keypoint_head=L(KRCNNConvDeconvUpsampleHead)( - input_shape=ShapeSpec(channels=256, width=14, height=14), - num_keypoints=17, - conv_dims=[512] * 8, - loss_normalizer="visible", - ), -) - -# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. -# 1000 proposals per-image is found to hurt box AP. -# Therefore we increase it to 1500 per-image. -model.proposal_generator.post_nms_topk = (1500, 1000) - -# Keypoint AP degrades (though box AP improves) when using plain L1 loss -model.roi_heads.box_predictor.smooth_l1_beta = 0.5 diff --git a/detectron2/configs/common/models/mask_rcnn_c4.py b/detectron2/configs/common/models/mask_rcnn_c4.py deleted file mode 100644 index 902d5b195f66881c67a37ec0fe606101a6812260..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/models/mask_rcnn_c4.py +++ /dev/null @@ -1,90 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.layers import ShapeSpec -from detectron2.modeling.meta_arch import GeneralizedRCNN -from detectron2.modeling.anchor_generator import DefaultAnchorGenerator -from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.matcher import Matcher -from detectron2.modeling.poolers import ROIPooler -from detectron2.modeling.proposal_generator import RPN, StandardRPNHead -from detectron2.modeling.roi_heads import ( - FastRCNNOutputLayers, - MaskRCNNConvUpsampleHead, - Res5ROIHeads, -) - -from ..data.constants import constants - -model = L(GeneralizedRCNN)( - backbone=L(ResNet)( - stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), - stages=L(ResNet.make_default_stages)( - depth=50, - stride_in_1x1=True, - norm="FrozenBN", - ), - out_features=["res4"], - ), - proposal_generator=L(RPN)( - in_features=["res4"], - head=L(StandardRPNHead)(in_channels=1024, num_anchors=15), - anchor_generator=L(DefaultAnchorGenerator)( - sizes=[[32, 64, 128, 256, 512]], - aspect_ratios=[0.5, 1.0, 2.0], - strides=[16], - offset=0.0, - ), - anchor_matcher=L(Matcher)( - thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True - ), - box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), - batch_size_per_image=256, - positive_fraction=0.5, - pre_nms_topk=(12000, 6000), - post_nms_topk=(2000, 1000), - nms_thresh=0.7, - ), - roi_heads=L(Res5ROIHeads)( - num_classes=80, - batch_size_per_image=512, - positive_fraction=0.25, - proposal_matcher=L(Matcher)( - thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False - ), - in_features=["res4"], - pooler=L(ROIPooler)( - output_size=14, - scales=(1.0 / 16,), - sampling_ratio=0, - pooler_type="ROIAlignV2", - ), - res5=L(ResNet.make_stage)( - block_class=BottleneckBlock, - num_blocks=3, - stride_per_block=[2, 1, 1], - in_channels=1024, - bottleneck_channels=512, - out_channels=2048, - norm="FrozenBN", - stride_in_1x1=True, - ), - box_predictor=L(FastRCNNOutputLayers)( - input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1), - test_score_thresh=0.05, - box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), - num_classes="${..num_classes}", - ), - mask_head=L(MaskRCNNConvUpsampleHead)( - input_shape=L(ShapeSpec)( - channels="${...res5.out_channels}", - width="${...pooler.output_size}", - height="${...pooler.output_size}", - ), - num_classes="${..num_classes}", - conv_dims=[256], - ), - ), - pixel_mean=constants.imagenet_bgr256_mean, - pixel_std=constants.imagenet_bgr256_std, - input_format="BGR", -) diff --git a/detectron2/configs/common/models/mask_rcnn_fpn.py b/detectron2/configs/common/models/mask_rcnn_fpn.py deleted file mode 100644 index 5e5c501cd1da6cece55210efefc4ec712075ca8a..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/models/mask_rcnn_fpn.py +++ /dev/null @@ -1,95 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.layers import ShapeSpec -from detectron2.modeling.meta_arch import GeneralizedRCNN -from detectron2.modeling.anchor_generator import DefaultAnchorGenerator -from detectron2.modeling.backbone.fpn import LastLevelMaxPool -from detectron2.modeling.backbone import BasicStem, FPN, ResNet -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.matcher import Matcher -from detectron2.modeling.poolers import ROIPooler -from detectron2.modeling.proposal_generator import RPN, StandardRPNHead -from detectron2.modeling.roi_heads import ( - StandardROIHeads, - FastRCNNOutputLayers, - MaskRCNNConvUpsampleHead, - FastRCNNConvFCHead, -) - -from ..data.constants import constants - -model = L(GeneralizedRCNN)( - backbone=L(FPN)( - bottom_up=L(ResNet)( - stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), - stages=L(ResNet.make_default_stages)( - depth=50, - stride_in_1x1=True, - norm="FrozenBN", - ), - out_features=["res2", "res3", "res4", "res5"], - ), - in_features="${.bottom_up.out_features}", - out_channels=256, - top_block=L(LastLevelMaxPool)(), - ), - proposal_generator=L(RPN)( - in_features=["p2", "p3", "p4", "p5", "p6"], - head=L(StandardRPNHead)(in_channels=256, num_anchors=3), - anchor_generator=L(DefaultAnchorGenerator)( - sizes=[[32], [64], [128], [256], [512]], - aspect_ratios=[0.5, 1.0, 2.0], - strides=[4, 8, 16, 32, 64], - offset=0.0, - ), - anchor_matcher=L(Matcher)( - thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True - ), - box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), - batch_size_per_image=256, - positive_fraction=0.5, - pre_nms_topk=(2000, 1000), - post_nms_topk=(1000, 1000), - nms_thresh=0.7, - ), - roi_heads=L(StandardROIHeads)( - num_classes=80, - batch_size_per_image=512, - positive_fraction=0.25, - proposal_matcher=L(Matcher)( - thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False - ), - box_in_features=["p2", "p3", "p4", "p5"], - box_pooler=L(ROIPooler)( - output_size=7, - scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), - sampling_ratio=0, - pooler_type="ROIAlignV2", - ), - box_head=L(FastRCNNConvFCHead)( - input_shape=ShapeSpec(channels=256, height=7, width=7), - conv_dims=[], - fc_dims=[1024, 1024], - ), - box_predictor=L(FastRCNNOutputLayers)( - input_shape=ShapeSpec(channels=1024), - test_score_thresh=0.05, - box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), - num_classes="${..num_classes}", - ), - mask_in_features=["p2", "p3", "p4", "p5"], - mask_pooler=L(ROIPooler)( - output_size=14, - scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), - sampling_ratio=0, - pooler_type="ROIAlignV2", - ), - mask_head=L(MaskRCNNConvUpsampleHead)( - input_shape=ShapeSpec(channels=256, width=14, height=14), - num_classes="${..num_classes}", - conv_dims=[256, 256, 256, 256, 256], - ), - ), - pixel_mean=constants.imagenet_bgr256_mean, - pixel_std=constants.imagenet_bgr256_std, - input_format="BGR", -) diff --git a/detectron2/configs/common/models/mask_rcnn_vitdet.py b/detectron2/configs/common/models/mask_rcnn_vitdet.py deleted file mode 100644 index d6f5244402734a3f9f675c5c4e42439ea708d24d..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/models/mask_rcnn_vitdet.py +++ /dev/null @@ -1,59 +0,0 @@ -from functools import partial -import torch.nn as nn -from detectron2.config import LazyCall as L -from detectron2.modeling import ViT, SimpleFeaturePyramid -from detectron2.modeling.backbone.fpn import LastLevelMaxPool - -from .mask_rcnn_fpn import model -from ..data.constants import constants - -model.pixel_mean = constants.imagenet_rgb256_mean -model.pixel_std = constants.imagenet_rgb256_std -model.input_format = "RGB" - -# Base -embed_dim, depth, num_heads, dp = 768, 12, 12, 0.1 -# Creates Simple Feature Pyramid from ViT backbone -model.backbone = L(SimpleFeaturePyramid)( - net=L(ViT)( # Single-scale ViT backbone - img_size=1024, - patch_size=16, - embed_dim=embed_dim, - depth=depth, - num_heads=num_heads, - drop_path_rate=dp, - window_size=14, - mlp_ratio=4, - qkv_bias=True, - norm_layer=partial(nn.LayerNorm, eps=1e-6), - window_block_indexes=[ - # 2, 5, 8 11 for global attention - 0, - 1, - 3, - 4, - 6, - 7, - 9, - 10, - ], - residual_block_indexes=[], - use_rel_pos=True, - out_feature="last_feat", - ), - in_feature="${.net.out_feature}", - out_channels=256, - scale_factors=(4.0, 2.0, 1.0, 0.5), - top_block=L(LastLevelMaxPool)(), - norm="LN", - square_pad=1024, -) - -model.roi_heads.box_head.conv_norm = model.roi_heads.mask_head.conv_norm = "LN" - -# 2conv in RPN: -model.proposal_generator.head.conv_dims = [-1, -1] - -# 4conv1fc box head -model.roi_heads.box_head.conv_dims = [256, 256, 256, 256] -model.roi_heads.box_head.fc_dims = [1024] diff --git a/detectron2/configs/common/models/panoptic_fpn.py b/detectron2/configs/common/models/panoptic_fpn.py deleted file mode 100644 index 88f55d2ce9db62e61445d6a3700067d9d864ecae..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/models/panoptic_fpn.py +++ /dev/null @@ -1,20 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.layers import ShapeSpec -from detectron2.modeling import PanopticFPN -from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead - -from .mask_rcnn_fpn import model - -model._target_ = PanopticFPN -model.sem_seg_head = L(SemSegFPNHead)( - input_shape={ - f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}") - for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32]) - }, - ignore_value=255, - num_classes=54, # COCO stuff + 1 - conv_dims=128, - common_stride=4, - loss_weight=0.5, - norm="GN", -) diff --git a/detectron2/configs/common/models/retinanet.py b/detectron2/configs/common/models/retinanet.py deleted file mode 100644 index 784e5317f594db966dac02792e9c9db1774623d6..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/models/retinanet.py +++ /dev/null @@ -1,55 +0,0 @@ -# -*- coding: utf-8 -*- - -from detectron2.config import LazyCall as L -from detectron2.layers import ShapeSpec -from detectron2.modeling.meta_arch import RetinaNet -from detectron2.modeling.anchor_generator import DefaultAnchorGenerator -from detectron2.modeling.backbone.fpn import LastLevelP6P7 -from detectron2.modeling.backbone import BasicStem, FPN, ResNet -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.matcher import Matcher -from detectron2.modeling.meta_arch.retinanet import RetinaNetHead - -from ..data.constants import constants - -model = L(RetinaNet)( - backbone=L(FPN)( - bottom_up=L(ResNet)( - stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"), - stages=L(ResNet.make_default_stages)( - depth=50, - stride_in_1x1=True, - norm="FrozenBN", - ), - out_features=["res3", "res4", "res5"], - ), - in_features=["res3", "res4", "res5"], - out_channels=256, - top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"), - ), - head=L(RetinaNetHead)( - # Shape for each input feature map - input_shape=[ShapeSpec(channels=256)] * 5, - num_classes="${..num_classes}", - conv_dims=[256, 256, 256, 256], - prior_prob=0.01, - num_anchors=9, - ), - anchor_generator=L(DefaultAnchorGenerator)( - sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]], - aspect_ratios=[0.5, 1.0, 2.0], - strides=[8, 16, 32, 64, 128], - offset=0.0, - ), - box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), - anchor_matcher=L(Matcher)( - thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True - ), - num_classes=80, - head_in_features=["p3", "p4", "p5", "p6", "p7"], - focal_loss_alpha=0.25, - focal_loss_gamma=2.0, - pixel_mean=constants.imagenet_bgr256_mean, - pixel_std=constants.imagenet_bgr256_std, - input_format="BGR", -) diff --git a/detectron2/configs/common/optim.py b/detectron2/configs/common/optim.py deleted file mode 100644 index 6cf43e835f55739fbb80102b870efab950a0486d..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/optim.py +++ /dev/null @@ -1,28 +0,0 @@ -import torch - -from detectron2.config import LazyCall as L -from detectron2.solver.build import get_default_optimizer_params - -SGD = L(torch.optim.SGD)( - params=L(get_default_optimizer_params)( - # params.model is meant to be set to the model object, before instantiating - # the optimizer. - weight_decay_norm=0.0 - ), - lr=0.02, - momentum=0.9, - weight_decay=1e-4, -) - - -AdamW = L(torch.optim.AdamW)( - params=L(get_default_optimizer_params)( - # params.model is meant to be set to the model object, before instantiating - # the optimizer. - base_lr="${..lr}", - weight_decay_norm=0.0, - ), - lr=1e-4, - betas=(0.9, 0.999), - weight_decay=0.1, -) diff --git a/detectron2/configs/common/train.py b/detectron2/configs/common/train.py deleted file mode 100644 index d2fd9c2f8792115d8b65bd71a26a10c3d94689c5..0000000000000000000000000000000000000000 --- a/detectron2/configs/common/train.py +++ /dev/null @@ -1,18 +0,0 @@ -# Common training-related configs that are designed for "tools/lazyconfig_train_net.py" -# You can use your own instead, together with your own train_net.py -train = dict( - output_dir="./output", - init_checkpoint="", - max_iter=90000, - amp=dict(enabled=False), # options for Automatic Mixed Precision - ddp=dict( # options for DistributedDataParallel - broadcast_buffers=False, - find_unused_parameters=False, - fp16_compression=False, - ), - checkpointer=dict(period=5000, max_to_keep=100), # options for PeriodicCheckpointer - eval_period=5000, - log_period=20, - device="cuda", - # ... -) diff --git a/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py deleted file mode 100644 index 3740e9bb08c5f168a9ab3a6d94561678bad1775c..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py +++ /dev/null @@ -1,9 +0,0 @@ -from .mask_rcnn_R_50_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -model.backbone.bottom_up.stages.depth = 101 diff --git a/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py deleted file mode 100644 index 18e5f0720c568db4ef0c97b59688b5e7866df606..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py +++ /dev/null @@ -1,14 +0,0 @@ -from .mask_rcnn_R_101_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -train.max_iter *= 2 # 100ep -> 200ep - -lr_multiplier.scheduler.milestones = [ - milestone * 2 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py deleted file mode 100644 index 63c54ee9a5ce2368494b775cc90fada1439feaa5..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py +++ /dev/null @@ -1,14 +0,0 @@ -from .mask_rcnn_R_101_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -train.max_iter *= 4 # 100ep -> 400ep - -lr_multiplier.scheduler.milestones = [ - milestone * 4 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py deleted file mode 100644 index df7a2aedf480ed8dc4aa3645e37420e9b893fae4..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py +++ /dev/null @@ -1,72 +0,0 @@ -import detectron2.data.transforms as T -from detectron2.config.lazy import LazyCall as L -from detectron2.layers.batch_norm import NaiveSyncBatchNorm -from detectron2.solver import WarmupParamScheduler -from fvcore.common.param_scheduler import MultiStepParamScheduler - -from ..common.data.coco import dataloader -from ..common.models.mask_rcnn_fpn import model -from ..common.optim import SGD as optimizer -from ..common.train import train - -# train from scratch -train.init_checkpoint = "" -train.amp.enabled = True -train.ddp.fp16_compression = True -model.backbone.bottom_up.freeze_at = 0 - -# SyncBN -# fmt: off -model.backbone.bottom_up.stem.norm = \ - model.backbone.bottom_up.stages.norm = \ - model.backbone.norm = "SyncBN" - -# Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by -# torch.nn.SyncBatchNorm. We can remove this after -# https://github.com/pytorch/pytorch/issues/36530 is fixed. -model.roi_heads.box_head.conv_norm = \ - model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c, - stats_mode="N") -# fmt: on - -# 2conv in RPN: -# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97 # noqa: E501, B950 -model.proposal_generator.head.conv_dims = [-1, -1] - -# 4conv1fc box head -model.roi_heads.box_head.conv_dims = [256, 256, 256, 256] -model.roi_heads.box_head.fc_dims = [1024] - -# resize_and_crop_image in: -# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/utils/input_utils.py#L127 # noqa: E501, B950 -image_size = 1024 -dataloader.train.mapper.augmentations = [ - L(T.ResizeScale)( - min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size - ), - L(T.FixedSizeCrop)(crop_size=(image_size, image_size)), - L(T.RandomFlip)(horizontal=True), -] - -# recompute boxes due to cropping -dataloader.train.mapper.recompute_boxes = True - -# larger batch-size. -dataloader.train.total_batch_size = 64 - -# Equivalent to 100 epochs. -# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep -train.max_iter = 184375 - -lr_multiplier = L(WarmupParamScheduler)( - scheduler=L(MultiStepParamScheduler)( - values=[1.0, 0.1, 0.01], - milestones=[163889, 177546], - num_updates=train.max_iter, - ), - warmup_length=500 / train.max_iter, - warmup_factor=0.067, -) - -optimizer.lr = 0.1 -optimizer.weight_decay = 4e-5 diff --git a/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py deleted file mode 100644 index 2a7c376da5f9269197c44079f3e0f3b09cdc63fa..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ.py +++ /dev/null @@ -1,14 +0,0 @@ -from .mask_rcnn_R_50_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -train.max_iter *= 2 # 100ep -> 200ep - -lr_multiplier.scheduler.milestones = [ - milestone * 2 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py deleted file mode 100644 index 97586b8f5330a9d995a0bffd1f5e7bd5b5656462..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ.py +++ /dev/null @@ -1,14 +0,0 @@ -from .mask_rcnn_R_50_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -train.max_iter *= 4 # 100ep -> 400ep - -lr_multiplier.scheduler.milestones = [ - milestone * 4 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py deleted file mode 100644 index 2ca1ede262cf5c37a3a54778458c74aff1479411..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_R_50_FPN_50ep_LSJ.py +++ /dev/null @@ -1,14 +0,0 @@ -from .mask_rcnn_R_50_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -train.max_iter //= 2 # 100ep -> 50ep - -lr_multiplier.scheduler.milestones = [ - milestone // 2 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py deleted file mode 100644 index ef0b6d16d4403fb5d16a3aeb71a22621a0be5e21..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ.py +++ /dev/null @@ -1,29 +0,0 @@ -from .mask_rcnn_R_50_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) -from detectron2.config import LazyCall as L -from detectron2.modeling.backbone import RegNet -from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock - -# Config source: -# https://github.com/facebookresearch/detectron2/blob/main/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py # noqa -model.backbone.bottom_up = L(RegNet)( - stem_class=SimpleStem, - stem_width=32, - block_class=ResBottleneckBlock, - depth=23, - w_a=38.65, - w_0=96, - w_m=2.43, - group_width=40, - norm="SyncBN", - out_features=["s1", "s2", "s3", "s4"], -) -model.pixel_std = [57.375, 57.120, 58.395] - -# RegNets benefit from enabling cudnn benchmark mode -train.cudnn_benchmark = True diff --git a/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py deleted file mode 100644 index 731320e74ebed4d8ceec58c07cb906542b8b021b..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py +++ /dev/null @@ -1,14 +0,0 @@ -from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -train.max_iter *= 2 # 100ep -> 200ep - -lr_multiplier.scheduler.milestones = [ - milestone * 2 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py deleted file mode 100644 index 8f369a2afedb6c6e69fd52ff9a9a6b1cdf965937..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ.py +++ /dev/null @@ -1,14 +0,0 @@ -from .mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -train.max_iter *= 4 # 100ep -> 400ep - -lr_multiplier.scheduler.milestones = [ - milestone * 4 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py deleted file mode 100644 index ba2c3274a493d5136507364558c8289eb6ee6259..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ.py +++ /dev/null @@ -1,30 +0,0 @@ -from .mask_rcnn_R_50_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) -from detectron2.config import LazyCall as L -from detectron2.modeling.backbone import RegNet -from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock - -# Config source: -# https://github.com/facebookresearch/detectron2/blob/main/configs/COCO-InstanceSegmentation/mask_rcnn_regnety_4gf_dds_fpn_1x.py # noqa -model.backbone.bottom_up = L(RegNet)( - stem_class=SimpleStem, - stem_width=32, - block_class=ResBottleneckBlock, - depth=22, - w_a=31.41, - w_0=96, - w_m=2.24, - group_width=64, - se_ratio=0.25, - norm="SyncBN", - out_features=["s1", "s2", "s3", "s4"], -) -model.pixel_std = [57.375, 57.120, 58.395] - -# RegNets benefit from enabling cudnn benchmark mode -train.cudnn_benchmark = True diff --git a/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py deleted file mode 100644 index b867cc865e5ac4d7b70221da141894efd7cbd75c..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ.py +++ /dev/null @@ -1,14 +0,0 @@ -from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -train.max_iter *= 2 # 100ep -> 200ep - -lr_multiplier.scheduler.milestones = [ - milestone * 2 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py b/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py deleted file mode 100644 index 7b86ea8c6c5c48f5d26c9e0df7cf96e745b17b34..0000000000000000000000000000000000000000 --- a/detectron2/configs/new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ.py +++ /dev/null @@ -1,14 +0,0 @@ -from .mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -train.max_iter *= 4 # 100ep -> 400ep - -lr_multiplier.scheduler.milestones = [ - milestone * 4 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/configs/quick_schedules/README.md b/detectron2/configs/quick_schedules/README.md deleted file mode 100644 index 4e6c82ef3f75a73c7006f33d7c850a0d4781a58f..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/README.md +++ /dev/null @@ -1,8 +0,0 @@ -These are quick configs for performance or accuracy regression tracking purposes. - -* `*instance_test.yaml`: can train on 2 GPUs. They are used to test whether the training can - successfully finish. They are not expected to produce reasonable training results. -* `*inference_acc_test.yaml`: They should be run using `--eval-only`. They run inference using pre-trained models and verify - the results are as expected. -* `*training_acc_test.yaml`: They should be trained on 8 GPUs. They finish in about an hour and verify the training accuracy - is within the normal range. diff --git a/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index b76788b6b4a29d0274518dee15cb28e70d48d599..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" -MODEL: - WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]] -FLOAT32_PRECISION: "highest" diff --git a/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml deleted file mode 100644 index e41a0fe7ffe9c3531741df49e546aa45cfe4fdee..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index 1be53eb7d78b7af268cd03d58dea042864518fb0..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]] -FLOAT32_PRECISION: "highest" diff --git a/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml deleted file mode 100644 index 52fc0ec03c8b87ab2be1dda97bec1e8c93e6bb5c..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,15 +0,0 @@ -_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" -DATASETS: - TRAIN: ("coco_2017_val_100",) - PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) - TEST: ("coco_2017_val_100",) - PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index df496c1f2743a81f51a75d643f4c46faee5e4c6f..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl" -DATASETS: - TEST: ("keypoints_coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]] -FLOAT32_PRECISION: "highest" diff --git a/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml deleted file mode 100644 index 3dd209f693bd0bfdd46a2c9e7e750dede3abc141..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - KEYPOINT_ON: True - ROI_HEADS: - NUM_CLASSES: 1 -DATASETS: - TRAIN: ("keypoints_coco_2017_val_100",) - TEST: ("keypoints_coco_2017_val_100",) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml deleted file mode 100644 index 4b92392f1c4457033ae4c87a521e339fe9e184ce..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml +++ /dev/null @@ -1,30 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - KEYPOINT_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - BATCH_SIZE_PER_IMAGE: 256 - NUM_CLASSES: 1 - ROI_KEYPOINT_HEAD: - POOLER_RESOLUTION: 14 - POOLER_SAMPLING_RATIO: 2 - NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False - LOSS_WEIGHT: 4.0 - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss - RPN: - SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss -DATASETS: - TRAIN: ("keypoints_coco_2017_val",) - TEST: ("keypoints_coco_2017_val",) -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -SOLVER: - WARMUP_FACTOR: 0.33333333 - WARMUP_ITERS: 100 - STEPS: (5500, 5800) - MAX_ITER: 6000 -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]] diff --git a/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml deleted file mode 100644 index 9bd962878fea64035887c48981beeb8d41bfdbd0..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml +++ /dev/null @@ -1,28 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - KEYPOINT_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - BATCH_SIZE_PER_IMAGE: 256 - NUM_CLASSES: 1 - ROI_KEYPOINT_HEAD: - POOLER_RESOLUTION: 14 - POOLER_SAMPLING_RATIO: 2 - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss - RPN: - SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss -DATASETS: - TRAIN: ("keypoints_coco_2017_val",) - TEST: ("keypoints_coco_2017_val",) -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -SOLVER: - WARMUP_FACTOR: 0.33333333 - WARMUP_ITERS: 100 - STEPS: (5500, 5800) - MAX_ITER: 6000 -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]] diff --git a/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml deleted file mode 100644 index ab6e69812b94ea7e071f29d9a6937d5c70805b5b..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - BASE_LR: 0.001 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 - CLIP_GRADIENTS: - ENABLED: True - CLIP_TYPE: "value" - CLIP_VALUE: 1.0 -DATALOADER: - NUM_WORKERS: 2 diff --git a/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml deleted file mode 100644 index 5f18275274242d77531569b2ab6042fd929bcc32..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]] -FLOAT32_PRECISION: "highest" diff --git a/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml deleted file mode 100644 index 6c4f1214efa520944fd941daec082ad45c164a23..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml +++ /dev/null @@ -1,14 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - BASE_LR: 0.001 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml deleted file mode 100644 index f68dd8f96c7896b5fc95d694a399f2ce417c1deb..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml +++ /dev/null @@ -1,22 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - ROI_HEADS: - BATCH_SIZE_PER_IMAGE: 256 - MASK_ON: True -DATASETS: - TRAIN: ("coco_2017_val",) - TEST: ("coco_2017_val",) -INPUT: - MIN_SIZE_TRAIN: (600,) - MAX_SIZE_TRAIN: 1000 - MIN_SIZE_TEST: 800 - MAX_SIZE_TEST: 1000 -SOLVER: - IMS_PER_BATCH: 8 # base uses 16 - WARMUP_FACTOR: 0.33333 - WARMUP_ITERS: 100 - STEPS: (11000, 11600) - MAX_ITER: 12000 -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]] diff --git a/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml deleted file mode 100644 index b72ffc9fe4f90b35f9d9bd6c042163842631470e..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]] -FLOAT32_PRECISION: "highest" diff --git a/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index 8cad72eb23bd7b3eda783e4ca27b6f5922987b58..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]] - AUG: - ENABLED: True - MIN_SIZES: (700, 800) # to save some time -FLOAT32_PRECISION: "highest" diff --git a/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml deleted file mode 100644 index 6dbfcde0bf837990634d419a6dda1e2909c3cd7f..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,14 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml b/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml deleted file mode 100644 index 52f78762bda23331c97afd523cf98a5c118b113e..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml" -MODEL: - ROI_BOX_HEAD: - TRAIN_ON_PRED_BOXES: True -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]] diff --git a/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml deleted file mode 100644 index aadae4ce898761e1e40e5af65a9e5ea01053b936..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml +++ /dev/null @@ -1,21 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - ROI_HEADS: - BATCH_SIZE_PER_IMAGE: 256 - MASK_ON: True -DATASETS: - TRAIN: ("coco_2017_val",) - TEST: ("coco_2017_val",) -INPUT: - MIN_SIZE_TRAIN: (600,) - MAX_SIZE_TRAIN: 1000 - MIN_SIZE_TEST: 800 - MAX_SIZE_TEST: 1000 -SOLVER: - WARMUP_FACTOR: 0.3333333 - WARMUP_ITERS: 100 - STEPS: (5500, 5800) - MAX_ITER: 6000 -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]] diff --git a/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml deleted file mode 100644 index f5429b6330cd2848b4ab94f52c73274835f16fe8..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl" -DATASETS: - TEST: ("coco_2017_val_100_panoptic_separated",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]] -FLOAT32_PRECISION: "highest" diff --git a/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml deleted file mode 100644 index 7cdee7bfcf6dc75dda52602a0d9177ad0a9cc6ed..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "PanopticFPN" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - SEM_SEG_HEAD: - LOSS_WEIGHT: 0.5 -DATASETS: - TRAIN: ("coco_2017_val_100_panoptic_separated",) - TEST: ("coco_2017_val_100_panoptic_separated",) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 1 diff --git a/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml deleted file mode 100644 index f3bbf30196cb35434340d4c343cab0c96283cd4f..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "PanopticFPN" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - SEM_SEG_HEAD: - LOSS_WEIGHT: 0.5 -DATASETS: - TRAIN: ("coco_2017_val_panoptic_separated",) - TEST: ("coco_2017_val_panoptic_separated",) -SOLVER: - BASE_LR: 0.01 - WARMUP_FACTOR: 0.001 - WARMUP_ITERS: 500 - STEPS: (5500,) - MAX_ITER: 7000 -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]] diff --git a/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index cb666c1a6b3e351227046bc9c2af8799408858e8..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]] diff --git a/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml deleted file mode 100644 index 8d95c1f614296716374686b22055a587ccd052b9..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index aa17e742d7e331dfd7355247ba4d6827dc662f3c..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]] -FLOAT32_PRECISION: "highest" diff --git a/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml deleted file mode 100644 index 402d432477507dc36f04c4a9777cb80fe06b2809..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - STEPS: (30,) - MAX_ITER: 40 - BASE_LR: 0.005 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index bca74987d5218736983617883e0fe37f79d219b7..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "SemanticSegmentor" - WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl" - RESNETS: - DEPTH: 50 -DATASETS: - TEST: ("coco_2017_val_100_panoptic_stuffonly",) -TEST: - EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]] diff --git a/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml deleted file mode 100644 index 14ab606f219b462fe37fcc7d5fbdbe65cb5c2642..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "SemanticSegmentor" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -DATASETS: - TRAIN: ("coco_2017_val_100_panoptic_stuffonly",) - TEST: ("coco_2017_val_100_panoptic_stuffonly",) -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml deleted file mode 100644 index 1f78d775889b11e9e76743de5ddb8139198edf61..0000000000000000000000000000000000000000 --- a/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "SemanticSegmentor" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -DATASETS: - TRAIN: ("coco_2017_val_panoptic_stuffonly",) - TEST: ("coco_2017_val_panoptic_stuffonly",) -SOLVER: - BASE_LR: 0.01 - WARMUP_FACTOR: 0.001 - WARMUP_ITERS: 300 - STEPS: (5500,) - MAX_ITER: 7000 -TEST: - EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]] -INPUT: - # no scale augmentation - MIN_SIZE_TRAIN: (800, ) diff --git a/detectron2/datasets/README.md b/detectron2/datasets/README.md deleted file mode 100644 index 0eb44cc3b23beeb1755ab8d12002d26f13434235..0000000000000000000000000000000000000000 --- a/detectron2/datasets/README.md +++ /dev/null @@ -1,140 +0,0 @@ -# Use Builtin Datasets - -A dataset can be used by accessing [DatasetCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.DatasetCatalog) -for its data, or [MetadataCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.MetadataCatalog) for its metadata (class names, etc). -This document explains how to setup the builtin datasets so they can be used by the above APIs. -[Use Custom Datasets](https://detectron2.readthedocs.io/tutorials/datasets.html) gives a deeper dive on how to use `DatasetCatalog` and `MetadataCatalog`, -and how to add new datasets to them. - -Detectron2 has builtin support for a few datasets. -The datasets are assumed to exist in a directory specified by the environment variable -`DETECTRON2_DATASETS`. -Under this directory, detectron2 will look for datasets in the structure described below, if needed. -``` -$DETECTRON2_DATASETS/ - coco/ - lvis/ - cityscapes/ - VOC20{07,12}/ -``` - -You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`. -If left unset, the default is `./datasets` relative to your current working directory. - -The [model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md) -contains configs and models that use these builtin datasets. - -## Expected dataset structure for [COCO instance/keypoint detection](https://cocodataset.org/#download): - -``` -coco/ - annotations/ - instances_{train,val}2017.json - person_keypoints_{train,val}2017.json - {train,val}2017/ - # image files that are mentioned in the corresponding json -``` - -You can use the 2014 version of the dataset as well. - -Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset, -which you can download with `./datasets/prepare_for_tests.sh`. - -## Expected dataset structure for PanopticFPN: - -Extract panoptic annotations from [COCO website](https://cocodataset.org/#download) -into the following structure: -``` -coco/ - annotations/ - panoptic_{train,val}2017.json - panoptic_{train,val}2017/ # png annotations - panoptic_stuff_{train,val}2017/ # generated by the script mentioned below -``` - -Install panopticapi by: -``` -pip install git+https://github.com/cocodataset/panopticapi.git -``` -Then, run `python datasets/prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations. - -## Expected dataset structure for [LVIS instance segmentation](https://www.lvisdataset.org/dataset): -``` -coco/ - {train,val,test}2017/ -lvis/ - lvis_v0.5_{train,val}.json - lvis_v0.5_image_info_test.json - lvis_v1_{train,val}.json - lvis_v1_image_info_test{,_challenge}.json -``` - -Install lvis-api by: -``` -pip install git+https://github.com/lvis-dataset/lvis-api.git -``` - -To evaluate models trained on the COCO dataset using LVIS annotations, -run `python datasets/prepare_cocofied_lvis.py` to prepare "cocofied" LVIS annotations. - -## Expected dataset structure for [cityscapes](https://www.cityscapes-dataset.com/downloads/): -``` -cityscapes/ - gtFine/ - train/ - aachen/ - color.png, instanceIds.png, labelIds.png, polygons.json, - labelTrainIds.png - ... - val/ - test/ - # below are generated Cityscapes panoptic annotation - cityscapes_panoptic_train.json - cityscapes_panoptic_train/ - cityscapes_panoptic_val.json - cityscapes_panoptic_val/ - cityscapes_panoptic_test.json - cityscapes_panoptic_test/ - leftImg8bit/ - train/ - val/ - test/ -``` -Install cityscapes scripts by: -``` -pip install git+https://github.com/mcordts/cityscapesScripts.git -``` - -Note: to create labelTrainIds.png, first prepare the above structure, then run cityscapesescript with: -``` -CITYSCAPES_DATASET=/path/to/abovementioned/cityscapes python cityscapesscripts/preparation/createTrainIdLabelImgs.py -``` -These files are not needed for instance segmentation. - -Note: to generate Cityscapes panoptic dataset, run cityscapesescript with: -``` -CITYSCAPES_DATASET=/path/to/abovementioned/cityscapes python cityscapesscripts/preparation/createPanopticImgs.py -``` -These files are not needed for semantic and instance segmentation. - -## Expected dataset structure for [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/index.html): -``` -VOC20{07,12}/ - Annotations/ - ImageSets/ - Main/ - trainval.txt - test.txt - # train.txt or val.txt, if you use these splits - JPEGImages/ -``` - -## Expected dataset structure for [ADE20k Scene Parsing](http://sceneparsing.csail.mit.edu/): -``` -ADEChallengeData2016/ - annotations/ - annotations_detectron2/ - images/ - objectInfo150.txt -``` -The directory `annotations_detectron2` is generated by running `python datasets/prepare_ade20k_sem_seg.py`. diff --git a/detectron2/datasets/prepare_ade20k_sem_seg.py b/detectron2/datasets/prepare_ade20k_sem_seg.py deleted file mode 100644 index 8b4a58d8f2877544498e328b6d269f23aa1eb59f..0000000000000000000000000000000000000000 --- a/detectron2/datasets/prepare_ade20k_sem_seg.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -import os -from pathlib import Path -import tqdm -from PIL import Image - - -def convert(input, output): - img = np.asarray(Image.open(input)) - assert img.dtype == np.uint8 - img = img - 1 # 0 (ignore) becomes 255. others are shifted by 1 - Image.fromarray(img).save(output) - - -if __name__ == "__main__": - dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets")) / "ADEChallengeData2016" - for name in ["training", "validation"]: - annotation_dir = dataset_dir / "annotations" / name - output_dir = dataset_dir / "annotations_detectron2" / name - output_dir.mkdir(parents=True, exist_ok=True) - for file in tqdm.tqdm(list(annotation_dir.iterdir())): - output_file = output_dir / file.name - convert(file, output_file) diff --git a/detectron2/datasets/prepare_cocofied_lvis.py b/detectron2/datasets/prepare_cocofied_lvis.py deleted file mode 100644 index 245c88482a9e2405e5a912b5c560aed78a614a13..0000000000000000000000000000000000000000 --- a/detectron2/datasets/prepare_cocofied_lvis.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import copy -import json -import os -from collections import defaultdict - -# This mapping is extracted from the official LVIS mapping: -# https://github.com/lvis-dataset/lvis-api/blob/master/data/coco_to_synset.json -COCO_SYNSET_CATEGORIES = [ - {"synset": "person.n.01", "coco_cat_id": 1}, - {"synset": "bicycle.n.01", "coco_cat_id": 2}, - {"synset": "car.n.01", "coco_cat_id": 3}, - {"synset": "motorcycle.n.01", "coco_cat_id": 4}, - {"synset": "airplane.n.01", "coco_cat_id": 5}, - {"synset": "bus.n.01", "coco_cat_id": 6}, - {"synset": "train.n.01", "coco_cat_id": 7}, - {"synset": "truck.n.01", "coco_cat_id": 8}, - {"synset": "boat.n.01", "coco_cat_id": 9}, - {"synset": "traffic_light.n.01", "coco_cat_id": 10}, - {"synset": "fireplug.n.01", "coco_cat_id": 11}, - {"synset": "stop_sign.n.01", "coco_cat_id": 13}, - {"synset": "parking_meter.n.01", "coco_cat_id": 14}, - {"synset": "bench.n.01", "coco_cat_id": 15}, - {"synset": "bird.n.01", "coco_cat_id": 16}, - {"synset": "cat.n.01", "coco_cat_id": 17}, - {"synset": "dog.n.01", "coco_cat_id": 18}, - {"synset": "horse.n.01", "coco_cat_id": 19}, - {"synset": "sheep.n.01", "coco_cat_id": 20}, - {"synset": "beef.n.01", "coco_cat_id": 21}, - {"synset": "elephant.n.01", "coco_cat_id": 22}, - {"synset": "bear.n.01", "coco_cat_id": 23}, - {"synset": "zebra.n.01", "coco_cat_id": 24}, - {"synset": "giraffe.n.01", "coco_cat_id": 25}, - {"synset": "backpack.n.01", "coco_cat_id": 27}, - {"synset": "umbrella.n.01", "coco_cat_id": 28}, - {"synset": "bag.n.04", "coco_cat_id": 31}, - {"synset": "necktie.n.01", "coco_cat_id": 32}, - {"synset": "bag.n.06", "coco_cat_id": 33}, - {"synset": "frisbee.n.01", "coco_cat_id": 34}, - {"synset": "ski.n.01", "coco_cat_id": 35}, - {"synset": "snowboard.n.01", "coco_cat_id": 36}, - {"synset": "ball.n.06", "coco_cat_id": 37}, - {"synset": "kite.n.03", "coco_cat_id": 38}, - {"synset": "baseball_bat.n.01", "coco_cat_id": 39}, - {"synset": "baseball_glove.n.01", "coco_cat_id": 40}, - {"synset": "skateboard.n.01", "coco_cat_id": 41}, - {"synset": "surfboard.n.01", "coco_cat_id": 42}, - {"synset": "tennis_racket.n.01", "coco_cat_id": 43}, - {"synset": "bottle.n.01", "coco_cat_id": 44}, - {"synset": "wineglass.n.01", "coco_cat_id": 46}, - {"synset": "cup.n.01", "coco_cat_id": 47}, - {"synset": "fork.n.01", "coco_cat_id": 48}, - {"synset": "knife.n.01", "coco_cat_id": 49}, - {"synset": "spoon.n.01", "coco_cat_id": 50}, - {"synset": "bowl.n.03", "coco_cat_id": 51}, - {"synset": "banana.n.02", "coco_cat_id": 52}, - {"synset": "apple.n.01", "coco_cat_id": 53}, - {"synset": "sandwich.n.01", "coco_cat_id": 54}, - {"synset": "orange.n.01", "coco_cat_id": 55}, - {"synset": "broccoli.n.01", "coco_cat_id": 56}, - {"synset": "carrot.n.01", "coco_cat_id": 57}, - {"synset": "frank.n.02", "coco_cat_id": 58}, - {"synset": "pizza.n.01", "coco_cat_id": 59}, - {"synset": "doughnut.n.02", "coco_cat_id": 60}, - {"synset": "cake.n.03", "coco_cat_id": 61}, - {"synset": "chair.n.01", "coco_cat_id": 62}, - {"synset": "sofa.n.01", "coco_cat_id": 63}, - {"synset": "pot.n.04", "coco_cat_id": 64}, - {"synset": "bed.n.01", "coco_cat_id": 65}, - {"synset": "dining_table.n.01", "coco_cat_id": 67}, - {"synset": "toilet.n.02", "coco_cat_id": 70}, - {"synset": "television_receiver.n.01", "coco_cat_id": 72}, - {"synset": "laptop.n.01", "coco_cat_id": 73}, - {"synset": "mouse.n.04", "coco_cat_id": 74}, - {"synset": "remote_control.n.01", "coco_cat_id": 75}, - {"synset": "computer_keyboard.n.01", "coco_cat_id": 76}, - {"synset": "cellular_telephone.n.01", "coco_cat_id": 77}, - {"synset": "microwave.n.02", "coco_cat_id": 78}, - {"synset": "oven.n.01", "coco_cat_id": 79}, - {"synset": "toaster.n.02", "coco_cat_id": 80}, - {"synset": "sink.n.01", "coco_cat_id": 81}, - {"synset": "electric_refrigerator.n.01", "coco_cat_id": 82}, - {"synset": "book.n.01", "coco_cat_id": 84}, - {"synset": "clock.n.01", "coco_cat_id": 85}, - {"synset": "vase.n.01", "coco_cat_id": 86}, - {"synset": "scissors.n.01", "coco_cat_id": 87}, - {"synset": "teddy.n.01", "coco_cat_id": 88}, - {"synset": "hand_blower.n.01", "coco_cat_id": 89}, - {"synset": "toothbrush.n.01", "coco_cat_id": 90}, -] - - -def cocofy_lvis(input_filename, output_filename): - """ - Filter LVIS instance segmentation annotations to remove all categories that are not included in - COCO. The new json files can be used to evaluate COCO AP using `lvis-api`. The category ids in - the output json are the incontiguous COCO dataset ids. - - Args: - input_filename (str): path to the LVIS json file. - output_filename (str): path to the COCOfied json file. - """ - - with open(input_filename, "r") as f: - lvis_json = json.load(f) - - lvis_annos = lvis_json.pop("annotations") - cocofied_lvis = copy.deepcopy(lvis_json) - lvis_json["annotations"] = lvis_annos - - # Mapping from lvis cat id to coco cat id via synset - lvis_cat_id_to_synset = {cat["id"]: cat["synset"] for cat in lvis_json["categories"]} - synset_to_coco_cat_id = {x["synset"]: x["coco_cat_id"] for x in COCO_SYNSET_CATEGORIES} - # Synsets that we will keep in the dataset - synsets_to_keep = set(synset_to_coco_cat_id.keys()) - coco_cat_id_with_instances = defaultdict(int) - - new_annos = [] - ann_id = 1 - for ann in lvis_annos: - lvis_cat_id = ann["category_id"] - synset = lvis_cat_id_to_synset[lvis_cat_id] - if synset not in synsets_to_keep: - continue - coco_cat_id = synset_to_coco_cat_id[synset] - new_ann = copy.deepcopy(ann) - new_ann["category_id"] = coco_cat_id - new_ann["id"] = ann_id - ann_id += 1 - new_annos.append(new_ann) - coco_cat_id_with_instances[coco_cat_id] += 1 - cocofied_lvis["annotations"] = new_annos - - for image in cocofied_lvis["images"]: - for key in ["not_exhaustive_category_ids", "neg_category_ids"]: - new_category_list = [] - for lvis_cat_id in image[key]: - synset = lvis_cat_id_to_synset[lvis_cat_id] - if synset not in synsets_to_keep: - continue - coco_cat_id = synset_to_coco_cat_id[synset] - new_category_list.append(coco_cat_id) - coco_cat_id_with_instances[coco_cat_id] += 1 - image[key] = new_category_list - - coco_cat_id_with_instances = set(coco_cat_id_with_instances.keys()) - - new_categories = [] - for cat in lvis_json["categories"]: - synset = cat["synset"] - if synset not in synsets_to_keep: - continue - coco_cat_id = synset_to_coco_cat_id[synset] - if coco_cat_id not in coco_cat_id_with_instances: - continue - new_cat = copy.deepcopy(cat) - new_cat["id"] = coco_cat_id - new_categories.append(new_cat) - cocofied_lvis["categories"] = new_categories - - with open(output_filename, "w") as f: - json.dump(cocofied_lvis, f) - print("{} is COCOfied and stored in {}.".format(input_filename, output_filename)) - - -if __name__ == "__main__": - dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "lvis") - for s in ["lvis_v0.5_train", "lvis_v0.5_val"]: - print("Start COCOfing {}.".format(s)) - cocofy_lvis( - os.path.join(dataset_dir, "{}.json".format(s)), - os.path.join(dataset_dir, "{}_cocofied.json".format(s)), - ) diff --git a/detectron2/datasets/prepare_for_tests.sh b/detectron2/datasets/prepare_for_tests.sh deleted file mode 100644 index 67e875a41da652b2fcae6631b76d94584935ddb9..0000000000000000000000000000000000000000 --- a/detectron2/datasets/prepare_for_tests.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. - -# Download the mini dataset (coco val2017_100, with only 100 images) -# to be used in unittests & integration tests. - -cd "${0%/*}" - -BASE=https://dl.fbaipublicfiles.com/detectron2 -ROOT=${DETECTRON2_DATASETS:-./} -ROOT=${ROOT/#\~/$HOME} # expand ~ to HOME -mkdir -p $ROOT/coco/annotations - -for anno in instances_val2017_100 \ - person_keypoints_val2017_100 ; do - - dest=$ROOT/coco/annotations/$anno.json - [[ -s $dest ]] && { - echo "$dest exists. Skipping ..." - } || { - wget $BASE/annotations/coco/$anno.json -O $dest - } -done - -dest=$ROOT/coco/val2017_100.tgz -[[ -d $ROOT/coco/val2017 ]] && { - echo "$ROOT/coco/val2017 exists. Skipping ..." -} || { - wget $BASE/annotations/coco/val2017_100.tgz -O $dest - tar xzf $dest -C $ROOT/coco/ && rm -f $dest -} diff --git a/detectron2/datasets/prepare_panoptic_fpn.py b/detectron2/datasets/prepare_panoptic_fpn.py deleted file mode 100644 index 597d791afab1bcc0013203a66c7fba225065eebe..0000000000000000000000000000000000000000 --- a/detectron2/datasets/prepare_panoptic_fpn.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import functools -import json -import multiprocessing as mp -import numpy as np -import os -import time -from fvcore.common.download import download -from panopticapi.utils import rgb2id -from PIL import Image - -from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES - - -def _process_panoptic_to_semantic(input_panoptic, output_semantic, segments, id_map): - panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32) - panoptic = rgb2id(panoptic) - output = np.zeros_like(panoptic, dtype=np.uint8) + 255 - for seg in segments: - cat_id = seg["category_id"] - new_cat_id = id_map[cat_id] - output[panoptic == seg["id"]] = new_cat_id - Image.fromarray(output).save(output_semantic) - - -def separate_coco_semantic_from_panoptic(panoptic_json, panoptic_root, sem_seg_root, categories): - """ - Create semantic segmentation annotations from panoptic segmentation - annotations, to be used by PanopticFPN. - - It maps all thing categories to class 0, and maps all unlabeled pixels to class 255. - It maps all stuff categories to contiguous ids starting from 1. - - Args: - panoptic_json (str): path to the panoptic json file, in COCO's format. - panoptic_root (str): a directory with panoptic annotation files, in COCO's format. - sem_seg_root (str): a directory to output semantic annotation files - categories (list[dict]): category metadata. Each dict needs to have: - "id": corresponds to the "category_id" in the json annotations - "isthing": 0 or 1 - """ - os.makedirs(sem_seg_root, exist_ok=True) - - stuff_ids = [k["id"] for k in categories if k["isthing"] == 0] - thing_ids = [k["id"] for k in categories if k["isthing"] == 1] - id_map = {} # map from category id to id in the output semantic annotation - assert len(stuff_ids) <= 254 - for i, stuff_id in enumerate(stuff_ids): - id_map[stuff_id] = i + 1 - for thing_id in thing_ids: - id_map[thing_id] = 0 - id_map[0] = 255 - - with open(panoptic_json) as f: - obj = json.load(f) - - pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4)) - - def iter_annotations(): - for anno in obj["annotations"]: - file_name = anno["file_name"] - segments = anno["segments_info"] - input = os.path.join(panoptic_root, file_name) - output = os.path.join(sem_seg_root, file_name) - yield input, output, segments - - print("Start writing to {} ...".format(sem_seg_root)) - start = time.time() - pool.starmap( - functools.partial(_process_panoptic_to_semantic, id_map=id_map), - iter_annotations(), - chunksize=100, - ) - print("Finished. time: {:.2f}s".format(time.time() - start)) - - -if __name__ == "__main__": - dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "coco") - for s in ["val2017", "train2017"]: - separate_coco_semantic_from_panoptic( - os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)), - os.path.join(dataset_dir, "panoptic_{}".format(s)), - os.path.join(dataset_dir, "panoptic_stuff_{}".format(s)), - COCO_CATEGORIES, - ) - - # Prepare val2017_100 for quick testing: - - dest_dir = os.path.join(dataset_dir, "annotations/") - URL_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" - download(URL_PREFIX + "annotations/coco/panoptic_val2017_100.json", dest_dir) - with open(os.path.join(dest_dir, "panoptic_val2017_100.json")) as f: - obj = json.load(f) - - def link_val100(dir_full, dir_100): - print("Creating " + dir_100 + " ...") - os.makedirs(dir_100, exist_ok=True) - for img in obj["images"]: - basename = os.path.splitext(img["file_name"])[0] - src = os.path.join(dir_full, basename + ".png") - dst = os.path.join(dir_100, basename + ".png") - src = os.path.relpath(src, start=dir_100) - os.symlink(src, dst) - - link_val100( - os.path.join(dataset_dir, "panoptic_val2017"), - os.path.join(dataset_dir, "panoptic_val2017_100"), - ) - - link_val100( - os.path.join(dataset_dir, "panoptic_stuff_val2017"), - os.path.join(dataset_dir, "panoptic_stuff_val2017_100"), - ) diff --git a/detectron2/demo/README.md b/detectron2/demo/README.md deleted file mode 100644 index 133d8d38e5e9f5f44aca92c59f73309e166d7132..0000000000000000000000000000000000000000 --- a/detectron2/demo/README.md +++ /dev/null @@ -1,8 +0,0 @@ - -## Detectron2 Demo - -We provide a command line tool to run a simple demo of builtin configs. -The usage is explained in [GETTING_STARTED.md](../GETTING_STARTED.md). - -See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-) -for a high-quality demo generated with this tool. diff --git a/detectron2/demo/demo.py b/detectron2/demo/demo.py deleted file mode 100644 index f590ca2f24b891623589fe42bc0ebbbf7eabbca5..0000000000000000000000000000000000000000 --- a/detectron2/demo/demo.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import argparse -import glob -import multiprocessing as mp -import numpy as np -import os -import tempfile -import time -import warnings -import cv2 -import tqdm - -from detectron2.config import get_cfg -from detectron2.data.detection_utils import read_image -from detectron2.utils.logger import setup_logger - -from vision.fair.detectron2.demo.predictor import VisualizationDemo - -# constants -WINDOW_NAME = "COCO detections" - - -def setup_cfg(args): - # load config from file and command-line arguments - cfg = get_cfg() - # To use demo for Panoptic-DeepLab, please uncomment the following two lines. - # from detectron2.projects.panoptic_deeplab import add_panoptic_deeplab_config # noqa - # add_panoptic_deeplab_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - # Set score_threshold for builtin models - cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold - cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold - cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold - cfg.freeze() - return cfg - - -def get_parser(): - parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs") - parser.add_argument( - "--config-file", - default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml", - metavar="FILE", - help="path to config file", - ) - parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.") - parser.add_argument("--video-input", help="Path to video file.") - parser.add_argument( - "--input", - nargs="+", - help="A list of space separated input images; " - "or a single glob pattern such as 'directory/*.jpg'", - ) - parser.add_argument( - "--output", - help="A file or directory to save output visualizations. " - "If not given, will show output in an OpenCV window.", - ) - - parser.add_argument( - "--confidence-threshold", - type=float, - default=0.5, - help="Minimum score for instance predictions to be shown", - ) - parser.add_argument( - "--opts", - help="Modify config options using the command-line 'KEY VALUE' pairs", - default=[], - nargs=argparse.REMAINDER, - ) - return parser - - -def test_opencv_video_format(codec, file_ext): - with tempfile.TemporaryDirectory(prefix="video_format_test") as dir: - filename = os.path.join(dir, "test_file" + file_ext) - writer = cv2.VideoWriter( - filename=filename, - fourcc=cv2.VideoWriter_fourcc(*codec), - fps=float(30), - frameSize=(10, 10), - isColor=True, - ) - [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)] - writer.release() - if os.path.isfile(filename): - return True - return False - - -def main() -> None: - mp.set_start_method("spawn", force=True) - args = get_parser().parse_args() - setup_logger(name="fvcore") - logger = setup_logger() - logger.info("Arguments: " + str(args)) - - cfg = setup_cfg(args) - - demo = VisualizationDemo(cfg) - - if args.input: - if len(args.input) == 1: - args.input = glob.glob(os.path.expanduser(args.input[0])) - assert args.input, "The input path(s) was not found" - for path in tqdm.tqdm(args.input, disable=not args.output): - # use PIL, to be consistent with evaluation - img = read_image(path, format="BGR") - start_time = time.time() - predictions, visualized_output = demo.run_on_image(img) - logger.info( - "{}: {} in {:.2f}s".format( - path, - ( - "detected {} instances".format(len(predictions["instances"])) - if "instances" in predictions - else "finished" - ), - time.time() - start_time, - ) - ) - - if args.output: - if os.path.isdir(args.output): - assert os.path.isdir(args.output), args.output - out_filename = os.path.join(args.output, os.path.basename(path)) - else: - assert len(args.input) == 1, "Please specify a directory with args.output" - out_filename = args.output - visualized_output.save(out_filename) - else: - cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) - cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) - if cv2.waitKey(0) == 27: - break # esc to quit - elif args.webcam: - assert args.input is None, "Cannot have both --input and --webcam!" - assert args.output is None, "output not yet supported with --webcam!" - cam = cv2.VideoCapture(0) - for vis in tqdm.tqdm(demo.run_on_video(cam)): - cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) - cv2.imshow(WINDOW_NAME, vis) - if cv2.waitKey(1) == 27: - break # esc to quit - cam.release() - cv2.destroyAllWindows() - elif args.video_input: - video = cv2.VideoCapture(args.video_input) - width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) - frames_per_second = video.get(cv2.CAP_PROP_FPS) - num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) - basename = os.path.basename(args.video_input) - codec, file_ext = ( - ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4") - ) - if codec == ".mp4v": - warnings.warn("x264 codec not available, switching to mp4v") - if args.output: - if os.path.isdir(args.output): - output_fname = os.path.join(args.output, basename) - output_fname = os.path.splitext(output_fname)[0] + file_ext - else: - output_fname = args.output - assert not os.path.isfile(output_fname), output_fname - output_file = cv2.VideoWriter( - filename=output_fname, - # some installation of opencv may not support x264 (due to its license), - # you can try other format (e.g. MPEG) - fourcc=cv2.VideoWriter_fourcc(*codec), - fps=float(frames_per_second), - frameSize=(width, height), - isColor=True, - ) - assert os.path.isfile(args.video_input) - for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames): - if args.output: - output_file.write(vis_frame) - else: - cv2.namedWindow(basename, cv2.WINDOW_NORMAL) - cv2.imshow(basename, vis_frame) - if cv2.waitKey(1) == 27: - break # esc to quit - video.release() - if args.output: - output_file.release() - else: - cv2.destroyAllWindows() - - -if __name__ == "__main__": - main() # pragma: no cover diff --git a/detectron2/demo/predictor.py b/detectron2/demo/predictor.py deleted file mode 100644 index 1c589a18b49a0c36f82396918544f653923def1d..0000000000000000000000000000000000000000 --- a/detectron2/demo/predictor.py +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import atexit -import bisect -import multiprocessing as mp -from collections import deque -import cv2 -import torch - -from detectron2.data import MetadataCatalog -from detectron2.engine.defaults import DefaultPredictor -from detectron2.utils.video_visualizer import VideoVisualizer -from detectron2.utils.visualizer import ColorMode, Visualizer - - -class VisualizationDemo: - def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): - """ - Args: - cfg (CfgNode): - instance_mode (ColorMode): - parallel (bool): whether to run the model in different processes from visualization. - Useful since the visualization logic can be slow. - """ - self.metadata = MetadataCatalog.get( - cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" - ) - self.cpu_device = torch.device("cpu") - self.instance_mode = instance_mode - - self.parallel = parallel - if parallel: - num_gpu = torch.cuda.device_count() - self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) - else: - self.predictor = DefaultPredictor(cfg) - - def run_on_image(self, image): - """ - Args: - image (np.ndarray): an image of shape (H, W, C) (in BGR order). - This is the format used by OpenCV. - - Returns: - predictions (dict): the output of the model. - vis_output (VisImage): the visualized image output. - """ - vis_output = None - predictions = self.predictor(image) - # Convert image from OpenCV BGR format to Matplotlib RGB format. - image = image[:, :, ::-1] - visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) - if "panoptic_seg" in predictions: - panoptic_seg, segments_info = predictions["panoptic_seg"] - vis_output = visualizer.draw_panoptic_seg_predictions( - panoptic_seg.to(self.cpu_device), segments_info - ) - else: - if "sem_seg" in predictions: - vis_output = visualizer.draw_sem_seg( - predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) - ) - if "instances" in predictions: - instances = predictions["instances"].to(self.cpu_device) - vis_output = visualizer.draw_instance_predictions(predictions=instances) - - return predictions, vis_output - - def _frame_from_video(self, video): - while video.isOpened(): - success, frame = video.read() - if success: - yield frame - else: - break - - def run_on_video(self, video): - """ - Visualizes predictions on frames of the input video. - - Args: - video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be - either a webcam or a video file. - - Yields: - ndarray: BGR visualizations of each video frame. - """ - video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) - - def process_predictions(frame, predictions): - frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - if "panoptic_seg" in predictions: - panoptic_seg, segments_info = predictions["panoptic_seg"] - vis_frame = video_visualizer.draw_panoptic_seg_predictions( - frame, panoptic_seg.to(self.cpu_device), segments_info - ) - elif "instances" in predictions: - predictions = predictions["instances"].to(self.cpu_device) - vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) - elif "sem_seg" in predictions: - vis_frame = video_visualizer.draw_sem_seg( - frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) - ) - - # Converts Matplotlib RGB format to OpenCV BGR format - vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) - return vis_frame - - frame_gen = self._frame_from_video(video) - if self.parallel: - buffer_size = self.predictor.default_buffer_size - - frame_data = deque() - - for cnt, frame in enumerate(frame_gen): - frame_data.append(frame) - self.predictor.put(frame) - - if cnt >= buffer_size: - frame = frame_data.popleft() - predictions = self.predictor.get() - yield process_predictions(frame, predictions) - - while len(frame_data): - frame = frame_data.popleft() - predictions = self.predictor.get() - yield process_predictions(frame, predictions) - else: - for frame in frame_gen: - yield process_predictions(frame, self.predictor(frame)) - - -class AsyncPredictor: - """ - A predictor that runs the model asynchronously, possibly on >1 GPUs. - Because rendering the visualization takes considerably amount of time, - this helps improve throughput a little bit when rendering videos. - """ - - class _StopToken: - pass - - class _PredictWorker(mp.Process): - def __init__(self, cfg, task_queue, result_queue): - self.cfg = cfg - self.task_queue = task_queue - self.result_queue = result_queue - super().__init__() - - def run(self): - predictor = DefaultPredictor(self.cfg) - - while True: - task = self.task_queue.get() - if isinstance(task, AsyncPredictor._StopToken): - break - idx, data = task - result = predictor(data) - self.result_queue.put((idx, result)) - - def __init__(self, cfg, num_gpus: int = 1): - """ - Args: - cfg (CfgNode): - num_gpus (int): if 0, will run on CPU - """ - num_workers = max(num_gpus, 1) - self.task_queue = mp.Queue(maxsize=num_workers * 3) - self.result_queue = mp.Queue(maxsize=num_workers * 3) - self.procs = [] - for gpuid in range(max(num_gpus, 1)): - cfg = cfg.clone() - cfg.defrost() - cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu" - self.procs.append( - AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue) - ) - - self.put_idx = 0 - self.get_idx = 0 - self.result_rank = [] - self.result_data = [] - - for p in self.procs: - p.start() - atexit.register(self.shutdown) - - def put(self, image): - self.put_idx += 1 - self.task_queue.put((self.put_idx, image)) - - def get(self): - self.get_idx += 1 # the index needed for this request - if len(self.result_rank) and self.result_rank[0] == self.get_idx: - res = self.result_data[0] - del self.result_data[0], self.result_rank[0] - return res - - while True: - # make sure the results are returned in the correct order - idx, res = self.result_queue.get() - if idx == self.get_idx: - return res - insert = bisect.bisect(self.result_rank, idx) - self.result_rank.insert(insert, idx) - self.result_data.insert(insert, res) - - def __len__(self): - return self.put_idx - self.get_idx - - def __call__(self, image): - self.put(image) - return self.get() - - def shutdown(self): - for _ in self.procs: - self.task_queue.put(AsyncPredictor._StopToken()) - - @property - def default_buffer_size(self): - return len(self.procs) * 5 diff --git a/detectron2/detectron2/__init__.py b/detectron2/detectron2/__init__.py deleted file mode 100644 index bdd994b49294485c27610772f97f177741f5518f..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -from .utils.env import setup_environment - -setup_environment() - - -# This line will be programatically read/write by setup.py. -# Leave them at the bottom of this file and don't touch them. -__version__ = "0.6" diff --git a/detectron2/detectron2/checkpoint/__init__.py b/detectron2/detectron2/checkpoint/__init__.py deleted file mode 100644 index 99da0469ae7e169d8970e4b642fed3f870076860..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/checkpoint/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. -# File: - - -from . import catalog as _UNUSED # register the handler -from .detection_checkpoint import DetectionCheckpointer -from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer - -__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] diff --git a/detectron2/detectron2/checkpoint/c2_model_loading.py b/detectron2/detectron2/checkpoint/c2_model_loading.py deleted file mode 100644 index 551753b15a999774482e6e632b342acaa26f1b52..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/checkpoint/c2_model_loading.py +++ /dev/null @@ -1,406 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import logging -import re -from typing import Dict, List -import torch - - -def convert_basic_c2_names(original_keys): - """ - Apply some basic name conversion to names in C2 weights. - It only deals with typical backbone models. - - Args: - original_keys (list[str]): - Returns: - list[str]: The same number of strings matching those in original_keys. - """ - layer_keys = copy.deepcopy(original_keys) - layer_keys = [ - {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys - ] # some hard-coded mappings - - layer_keys = [k.replace("_", ".") for k in layer_keys] - layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys] - layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys] - # Uniform both bn and gn names to "norm" - layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys] - layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys] - layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys] - - # stem - layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys] - # to avoid mis-matching with "conv1" in other components (e.g. detection head) - layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys] - - # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5) - # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys] - # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys] - # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys] - # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys] - - # blocks - layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys] - layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys] - layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys] - layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys] - - # DensePose substitutions - layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys] - layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys] - layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys] - layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys] - layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys] - return layer_keys - - -def convert_c2_detectron_names(weights): - """ - Map Caffe2 Detectron weight names to Detectron2 names. - - Args: - weights (dict): name -> tensor - - Returns: - dict: detectron2 names -> tensor - dict: detectron2 names -> C2 names - """ - logger = logging.getLogger(__name__) - logger.info("Renaming Caffe2 weights ......") - original_keys = sorted(weights.keys()) - layer_keys = copy.deepcopy(original_keys) - - layer_keys = convert_basic_c2_names(layer_keys) - - # -------------------------------------------------------------------------- - # RPN hidden representation conv - # -------------------------------------------------------------------------- - # FPN case - # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then - # shared for all other levels, hence the appearance of "fpn2" - layer_keys = [ - k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys - ] - # Non-FPN case - layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys] - - # -------------------------------------------------------------------------- - # RPN box transformation conv - # -------------------------------------------------------------------------- - # FPN case (see note above about "fpn2") - layer_keys = [ - k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas") - for k in layer_keys - ] - layer_keys = [ - k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits") - for k in layer_keys - ] - # Non-FPN case - layer_keys = [ - k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys - ] - layer_keys = [ - k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits") - for k in layer_keys - ] - - # -------------------------------------------------------------------------- - # Fast R-CNN box head - # -------------------------------------------------------------------------- - layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys] - layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys] - layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys] - layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys] - # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s - layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys] - - # -------------------------------------------------------------------------- - # FPN lateral and output convolutions - # -------------------------------------------------------------------------- - def fpn_map(name): - """ - Look for keys with the following patterns: - 1) Starts with "fpn.inner." - Example: "fpn.inner.res2.2.sum.lateral.weight" - Meaning: These are lateral pathway convolutions - 2) Starts with "fpn.res" - Example: "fpn.res2.2.sum.weight" - Meaning: These are FPN output convolutions - """ - splits = name.split(".") - norm = ".norm" if "norm" in splits else "" - if name.startswith("fpn.inner."): - # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight'] - stage = int(splits[2][len("res") :]) - return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1]) - elif name.startswith("fpn.res"): - # splits example: ['fpn', 'res2', '2', 'sum', 'weight'] - stage = int(splits[1][len("res") :]) - return "fpn_output{}{}.{}".format(stage, norm, splits[-1]) - return name - - layer_keys = [fpn_map(k) for k in layer_keys] - - # -------------------------------------------------------------------------- - # Mask R-CNN mask head - # -------------------------------------------------------------------------- - # roi_heads.StandardROIHeads case - layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys] - layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys] - layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys] - # roi_heads.Res5ROIHeads case - layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys] - - # -------------------------------------------------------------------------- - # Keypoint R-CNN head - # -------------------------------------------------------------------------- - # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX" - layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys] - layer_keys = [ - k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys - ] - layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys] - - # -------------------------------------------------------------------------- - # Done with replacements - # -------------------------------------------------------------------------- - assert len(set(layer_keys)) == len(layer_keys) - assert len(original_keys) == len(layer_keys) - - new_weights = {} - new_keys_to_original_keys = {} - for orig, renamed in zip(original_keys, layer_keys): - new_keys_to_original_keys[renamed] = orig - if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."): - # remove the meaningless prediction weight for background class - new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1 - new_weights[renamed] = weights[orig][new_start_idx:] - logger.info( - "Remove prediction weight for background class in {}. The shape changes from " - "{} to {}.".format( - renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape) - ) - ) - elif renamed.startswith("cls_score."): - # move weights of bg class from original index 0 to last index - logger.info( - "Move classification weights for background class in {} from index 0 to " - "index {}.".format(renamed, weights[orig].shape[0] - 1) - ) - new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]]) - else: - new_weights[renamed] = weights[orig] - - return new_weights, new_keys_to_original_keys - - -# Note the current matching is not symmetric. -# it assumes model_state_dict will have longer names. -def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True): - """ - Match names between the two state-dict, and returns a new chkpt_state_dict with names - converted to match model_state_dict with heuristics. The returned dict can be later - loaded with fvcore checkpointer. - If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2 - model and will be renamed at first. - - Strategy: suppose that the models that we will create will have prefixes appended - to each of its keys, for example due to an extra level of nesting that the original - pre-trained weights from ImageNet won't contain. For example, model.state_dict() - might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains - res2.conv1.weight. We thus want to match both parameters together. - For that, we look for each model weight, look among all loaded keys if there is one - that is a suffix of the current weight name, and use it if that's the case. - If multiple matches exist, take the one with longest size - of the corresponding name. For example, for the same model as before, the pretrained - weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case, - we want to match backbone[0].body.conv1.weight to conv1.weight, and - backbone[0].body.res2.conv1.weight to res2.conv1.weight. - """ - model_keys = sorted(model_state_dict.keys()) - if c2_conversion: - ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict) - # original_keys: the name in the original dict (before renaming) - else: - original_keys = {x: x for x in ckpt_state_dict.keys()} - ckpt_keys = sorted(ckpt_state_dict.keys()) - - def match(a, b): - # Matched ckpt_key should be a complete (starts with '.') suffix. - # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1, - # but matches whatever_conv1 or mesh_head.whatever_conv1. - return a == b or a.endswith("." + b) - - # get a matrix of string matches, where each (i, j) entry correspond to the size of the - # ckpt_key string, if it matches - match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys] - match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys)) - # use the matched one with longest size in case of multiple matches - max_match_size, idxs = match_matrix.max(1) - # remove indices that correspond to no-match - idxs[max_match_size == 0] = -1 - - logger = logging.getLogger(__name__) - # matched_pairs (matched checkpoint key --> matched model key) - matched_keys = {} - result_state_dict = {} - for idx_model, idx_ckpt in enumerate(idxs.tolist()): - if idx_ckpt == -1: - continue - key_model = model_keys[idx_model] - key_ckpt = ckpt_keys[idx_ckpt] - value_ckpt = ckpt_state_dict[key_ckpt] - shape_in_model = model_state_dict[key_model].shape - - if shape_in_model != value_ckpt.shape: - logger.warning( - "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format( - key_ckpt, value_ckpt.shape, key_model, shape_in_model - ) - ) - logger.warning( - "{} will not be loaded. Please double check and see if this is desired.".format( - key_ckpt - ) - ) - continue - - assert key_model not in result_state_dict - result_state_dict[key_model] = value_ckpt - if key_ckpt in matched_keys: # already added to matched_keys - logger.error( - "Ambiguity found for {} in checkpoint!" - "It matches at least two keys in the model ({} and {}).".format( - key_ckpt, key_model, matched_keys[key_ckpt] - ) - ) - raise ValueError("Cannot match one checkpoint key to multiple keys in the model.") - - matched_keys[key_ckpt] = key_model - - # logging: - matched_model_keys = sorted(matched_keys.values()) - if len(matched_model_keys) == 0: - logger.warning("No weights in checkpoint matched with model.") - return ckpt_state_dict - common_prefix = _longest_common_prefix(matched_model_keys) - rev_matched_keys = {v: k for k, v in matched_keys.items()} - original_keys = {k: original_keys[rev_matched_keys[k]] for k in matched_model_keys} - - model_key_groups = _group_keys_by_module(matched_model_keys, original_keys) - table = [] - memo = set() - for key_model in matched_model_keys: - if key_model in memo: - continue - if key_model in model_key_groups: - group = model_key_groups[key_model] - memo |= set(group) - shapes = [tuple(model_state_dict[k].shape) for k in group] - table.append( - ( - _longest_common_prefix([k[len(common_prefix) :] for k in group]) + "*", - _group_str([original_keys[k] for k in group]), - " ".join([str(x).replace(" ", "") for x in shapes]), - ) - ) - else: - key_checkpoint = original_keys[key_model] - shape = str(tuple(model_state_dict[key_model].shape)) - table.append((key_model[len(common_prefix) :], key_checkpoint, shape)) - submodule_str = common_prefix[:-1] if common_prefix else "model" - logger.info( - f"Following weights matched with submodule {submodule_str} - Total num: {len(table)}" - ) - - unmatched_ckpt_keys = [k for k in ckpt_keys if k not in set(matched_keys.keys())] - for k in unmatched_ckpt_keys: - result_state_dict[k] = ckpt_state_dict[k] - return result_state_dict - - -def _group_keys_by_module(keys: List[str], original_names: Dict[str, str]): - """ - Params in the same submodule are grouped together. - - Args: - keys: names of all parameters - original_names: mapping from parameter name to their name in the checkpoint - - Returns: - dict[name -> all other names in the same group] - """ - - def _submodule_name(key): - pos = key.rfind(".") - if pos < 0: - return None - prefix = key[: pos + 1] - return prefix - - all_submodules = [_submodule_name(k) for k in keys] - all_submodules = [x for x in all_submodules if x] - all_submodules = sorted(all_submodules, key=len) - - ret = {} - for prefix in all_submodules: - group = [k for k in keys if k.startswith(prefix)] - if len(group) <= 1: - continue - original_name_lcp = _longest_common_prefix_str([original_names[k] for k in group]) - if len(original_name_lcp) == 0: - # don't group weights if original names don't share prefix - continue - - for k in group: - if k in ret: - continue - ret[k] = group - return ret - - -def _longest_common_prefix(names: List[str]) -> str: - """ - ["abc.zfg", "abc.zef"] -> "abc." - """ - names = [n.split(".") for n in names] - m1, m2 = min(names), max(names) - ret = [a for a, b in zip(m1, m2) if a == b] - ret = ".".join(ret) + "." if len(ret) else "" - return ret - - -def _longest_common_prefix_str(names: List[str]) -> str: - m1, m2 = min(names), max(names) - lcp = [] - for a, b in zip(m1, m2): - if a == b: - lcp.append(a) - else: - break - lcp = "".join(lcp) - return lcp - - -def _group_str(names: List[str]) -> str: - """ - Turn "common1", "common2", "common3" into "common{1,2,3}" - """ - lcp = _longest_common_prefix_str(names) - rest = [x[len(lcp) :] for x in names] - rest = "{" + ",".join(rest) + "}" - ret = lcp + rest - - # add some simplification for BN specifically - ret = ret.replace("bn_{beta,running_mean,running_var,gamma}", "bn_*") - ret = ret.replace("bn_beta,bn_running_mean,bn_running_var,bn_gamma", "bn_*") - return ret diff --git a/detectron2/detectron2/checkpoint/catalog.py b/detectron2/detectron2/checkpoint/catalog.py deleted file mode 100644 index c954fde210ba9b8124239c989f0a97e3ffcffcfe..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/checkpoint/catalog.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging - -from detectron2.utils.file_io import PathHandler, PathManager - - -class ModelCatalog: - """ - Store mappings from names to third-party models. - """ - - S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron" - - # MSRA models have STRIDE_IN_1X1=True. False otherwise. - # NOTE: all BN models here have fused BN into an affine layer. - # As a result, you should only load them to a model with "FrozenBN". - # Loading them to a model with regular BN or SyncBN is wrong. - # Even when loaded to FrozenBN, it is still different from affine by an epsilon, - # which should be negligible for training. - # NOTE: all models here uses PIXEL_STD=[1,1,1] - # NOTE: Most of the BN models here are no longer used. We use the - # re-converted pre-trained models under detectron2 model zoo instead. - C2_IMAGENET_MODELS = { - "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl", - "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl", - "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl", - "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl", - "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl", - "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl", - "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl", - } - - C2_DETECTRON_PATH_FORMAT = ( - "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950 - ) - - C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival" - C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival" - - # format: {model_name} -> part of the url - C2_DETECTRON_MODELS = { - "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950 - "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950 - "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950 - "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950 - "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950 - "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950 - "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950 - "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950 - "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950 - "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950 - "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950 - "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950 - "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950 - } - - @staticmethod - def get(name): - if name.startswith("Caffe2Detectron/COCO"): - return ModelCatalog._get_c2_detectron_baseline(name) - if name.startswith("ImageNetPretrained/"): - return ModelCatalog._get_c2_imagenet_pretrained(name) - raise RuntimeError("model not present in the catalog: {}".format(name)) - - @staticmethod - def _get_c2_imagenet_pretrained(name): - prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX - name = name[len("ImageNetPretrained/") :] - name = ModelCatalog.C2_IMAGENET_MODELS[name] - url = "/".join([prefix, name]) - return url - - @staticmethod - def _get_c2_detectron_baseline(name): - name = name[len("Caffe2Detectron/COCO/") :] - url = ModelCatalog.C2_DETECTRON_MODELS[name] - if "keypoint_rcnn" in name: - dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS - else: - dataset = ModelCatalog.C2_DATASET_COCO - - if "35998355/rpn_R-50-C4_1x" in name: - # this one model is somehow different from others .. - type = "rpn" - else: - type = "generalized_rcnn" - - # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`. - url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format( - prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset - ) - return url - - -class ModelCatalogHandler(PathHandler): - """ - Resolve URL like catalog://. - """ - - PREFIX = "catalog://" - - def _get_supported_prefixes(self): - return [self.PREFIX] - - def _get_local_path(self, path, **kwargs): - logger = logging.getLogger(__name__) - catalog_path = ModelCatalog.get(path[len(self.PREFIX) :]) - logger.info("Catalog entry {} points to {}".format(path, catalog_path)) - return PathManager.get_local_path(catalog_path, **kwargs) - - def _open(self, path, mode="r", **kwargs): - return PathManager.open(self._get_local_path(path), mode, **kwargs) - - -PathManager.register_handler(ModelCatalogHandler()) diff --git a/detectron2/detectron2/checkpoint/detection_checkpoint.py b/detectron2/detectron2/checkpoint/detection_checkpoint.py deleted file mode 100644 index cecb1fc2cfe46283b47096bcbcb2be3181431bf2..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/checkpoint/detection_checkpoint.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import os -import pickle -from urllib.parse import parse_qs, urlparse -import torch -from fvcore.common.checkpoint import Checkpointer -from torch.nn.parallel import DistributedDataParallel - -import detectron2.utils.comm as comm -from detectron2.utils.file_io import PathManager - -from .c2_model_loading import align_and_update_state_dicts - - -class DetectionCheckpointer(Checkpointer): - """ - Same as :class:`Checkpointer`, but is able to: - 1. handle models in detectron & detectron2 model zoo, and apply conversions for legacy models. - 2. correctly load checkpoints that are only available on the master worker - """ - - def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): - is_main_process = comm.is_main_process() - super().__init__( - model, - save_dir, - save_to_disk=is_main_process if save_to_disk is None else save_to_disk, - **checkpointables, - ) - self.path_manager = PathManager - self._parsed_url_during_load = None - - def load(self, path, *args, **kwargs): - assert self._parsed_url_during_load is None - need_sync = False - logger = logging.getLogger(__name__) - logger.info("[DetectionCheckpointer] Loading from {} ...".format(path)) - - if path and isinstance(self.model, DistributedDataParallel): - path = self.path_manager.get_local_path(path) - has_file = os.path.isfile(path) - all_has_file = comm.all_gather(has_file) - if not all_has_file[0]: - raise OSError(f"File {path} not found on main worker.") - if not all(all_has_file): - logger.warning( - f"Not all workers can read checkpoint {path}. " - "Training may fail to fully resume." - ) - # TODO: broadcast the checkpoint file contents from main - # worker, and load from it instead. - need_sync = True - if not has_file: - path = None # don't load if not readable - - if path: - parsed_url = urlparse(path) - self._parsed_url_during_load = parsed_url - path = parsed_url._replace(query="").geturl() # remove query from filename - path = self.path_manager.get_local_path(path) - ret = super().load(path, *args, **kwargs) - - if need_sync: - logger.info("Broadcasting model states from main worker ...") - self.model._sync_params_and_buffers() - self._parsed_url_during_load = None # reset to None - return ret - - def _load_file(self, filename): - if filename.endswith(".pkl"): - with PathManager.open(filename, "rb") as f: - data = pickle.load(f, encoding="latin1") - if "model" in data and "__author__" in data: - # file is in Detectron2 model zoo format - self.logger.info("Reading a file from '{}'".format(data["__author__"])) - return data - else: - # assume file is from Caffe2 / Detectron1 model zoo - if "blobs" in data: - # Detection models have "blobs", but ImageNet models don't - data = data["blobs"] - data = {k: v for k, v in data.items() if not k.endswith("_momentum")} - return {"model": data, "__author__": "Caffe2", "matching_heuristics": True} - elif filename.endswith(".pyth"): - # assume file is from pycls; no one else seems to use the ".pyth" extension - with PathManager.open(filename, "rb") as f: - data = torch.load(f) - assert ( - "model_state" in data - ), f"Cannot load .pyth file {filename}; pycls checkpoints must contain 'model_state'." - model_state = { - k: v - for k, v in data["model_state"].items() - if not k.endswith("num_batches_tracked") - } - return {"model": model_state, "__author__": "pycls", "matching_heuristics": True} - - loaded = self._torch_load(filename) - if "model" not in loaded: - loaded = {"model": loaded} - assert self._parsed_url_during_load is not None, "`_load_file` must be called inside `load`" - parsed_url = self._parsed_url_during_load - queries = parse_qs(parsed_url.query) - if queries.pop("matching_heuristics", "False") == ["True"]: - loaded["matching_heuristics"] = True - if len(queries) > 0: - raise ValueError( - f"Unsupported query remaining: f{queries}, orginal filename: {parsed_url.geturl()}" - ) - return loaded - - def _torch_load(self, f): - return super()._load_file(f) - - def _load_model(self, checkpoint): - if checkpoint.get("matching_heuristics", False): - self._convert_ndarray_to_tensor(checkpoint["model"]) - # convert weights by name-matching heuristics - checkpoint["model"] = align_and_update_state_dicts( - self.model.state_dict(), - checkpoint["model"], - c2_conversion=checkpoint.get("__author__", None) == "Caffe2", - ) - # for non-caffe2 models, use standard ways to load it - incompatible = super()._load_model(checkpoint) - - model_buffers = dict(self.model.named_buffers(recurse=False)) - for k in ["pixel_mean", "pixel_std"]: - # Ignore missing key message about pixel_mean/std. - # Though they may be missing in old checkpoints, they will be correctly - # initialized from config anyway. - if k in model_buffers: - try: - incompatible.missing_keys.remove(k) - except ValueError: - pass - for k in incompatible.unexpected_keys[:]: - # Ignore unexpected keys about cell anchors. They exist in old checkpoints - # but now they are non-persistent buffers and will not be in new checkpoints. - if "anchor_generator.cell_anchors" in k: - incompatible.unexpected_keys.remove(k) - return incompatible diff --git a/detectron2/detectron2/config/__init__.py b/detectron2/detectron2/config/__init__.py deleted file mode 100644 index 4e648e632d55c70f160d49630378d202fbde4e45..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/config/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .compat import downgrade_config, upgrade_config -from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable -from .instantiate import instantiate -from .lazy import LazyCall, LazyConfig - -__all__ = [ - "CfgNode", - "get_cfg", - "global_cfg", - "set_global_cfg", - "downgrade_config", - "upgrade_config", - "configurable", - "instantiate", - "LazyCall", - "LazyConfig", -] - - -from detectron2.utils.env import fixup_module_metadata - -fixup_module_metadata(__name__, globals(), __all__) -del fixup_module_metadata diff --git a/detectron2/detectron2/config/compat.py b/detectron2/detectron2/config/compat.py deleted file mode 100644 index 11a08c439bf14defd880e37a938fab8a08e68eeb..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/config/compat.py +++ /dev/null @@ -1,229 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -""" -Backward compatibility of configs. - -Instructions to bump version: -+ It's not needed to bump version if new keys are added. - It's only needed when backward-incompatible changes happen - (i.e., some existing keys disappear, or the meaning of a key changes) -+ To bump version, do the following: - 1. Increment _C.VERSION in defaults.py - 2. Add a converter in this file. - - Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X, - and a function "downgrade" which in-place downgrades config from X to X-1 - - In each function, VERSION is left unchanged. - - Each converter assumes that its input has the relevant keys - (i.e., the input is not a partial config). - 3. Run the tests (test_config.py) to make sure the upgrade & downgrade - functions are consistent. -""" - -import logging -from typing import List, Optional, Tuple - -from .config import CfgNode as CN -from .defaults import _C - -__all__ = ["upgrade_config", "downgrade_config"] - - -def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN: - """ - Upgrade a config from its current version to a newer version. - - Args: - cfg (CfgNode): - to_version (int): defaults to the latest version. - """ - cfg = cfg.clone() - if to_version is None: - to_version = _C.VERSION - - assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format( - cfg.VERSION, to_version - ) - for k in range(cfg.VERSION, to_version): - converter = globals()["ConverterV" + str(k + 1)] - converter.upgrade(cfg) - cfg.VERSION = k + 1 - return cfg - - -def downgrade_config(cfg: CN, to_version: int) -> CN: - """ - Downgrade a config from its current version to an older version. - - Args: - cfg (CfgNode): - to_version (int): - - Note: - A general downgrade of arbitrary configs is not always possible due to the - different functionalities in different versions. - The purpose of downgrade is only to recover the defaults in old versions, - allowing it to load an old partial yaml config. - Therefore, the implementation only needs to fill in the default values - in the old version when a general downgrade is not possible. - """ - cfg = cfg.clone() - assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format( - cfg.VERSION, to_version - ) - for k in range(cfg.VERSION, to_version, -1): - converter = globals()["ConverterV" + str(k)] - converter.downgrade(cfg) - cfg.VERSION = k - 1 - return cfg - - -def guess_version(cfg: CN, filename: str) -> int: - """ - Guess the version of a partial config where the VERSION field is not specified. - Returns the version, or the latest if cannot make a guess. - - This makes it easier for users to migrate. - """ - logger = logging.getLogger(__name__) - - def _has(name: str) -> bool: - cur = cfg - for n in name.split("."): - if n not in cur: - return False - cur = cur[n] - return True - - # Most users' partial configs have "MODEL.WEIGHT", so guess on it - ret = None - if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"): - ret = 1 - - if ret is not None: - logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret)) - else: - ret = _C.VERSION - logger.warning( - "Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format( - filename, ret - ) - ) - return ret - - -def _rename(cfg: CN, old: str, new: str) -> None: - old_keys = old.split(".") - new_keys = new.split(".") - - def _set(key_seq: List[str], val: str) -> None: - cur = cfg - for k in key_seq[:-1]: - if k not in cur: - cur[k] = CN() - cur = cur[k] - cur[key_seq[-1]] = val - - def _get(key_seq: List[str]) -> CN: - cur = cfg - for k in key_seq: - cur = cur[k] - return cur - - def _del(key_seq: List[str]) -> None: - cur = cfg - for k in key_seq[:-1]: - cur = cur[k] - del cur[key_seq[-1]] - if len(cur) == 0 and len(key_seq) > 1: - _del(key_seq[:-1]) - - _set(new_keys, _get(old_keys)) - _del(old_keys) - - -class _RenameConverter: - """ - A converter that handles simple rename. - """ - - RENAME: List[Tuple[str, str]] = [] # list of tuples of (old name, new name) - - @classmethod - def upgrade(cls, cfg: CN) -> None: - for old, new in cls.RENAME: - _rename(cfg, old, new) - - @classmethod - def downgrade(cls, cfg: CN) -> None: - for old, new in cls.RENAME[::-1]: - _rename(cfg, new, old) - - -class ConverterV1(_RenameConverter): - RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")] - - -class ConverterV2(_RenameConverter): - """ - A large bulk of rename, before public release. - """ - - RENAME = [ - ("MODEL.WEIGHT", "MODEL.WEIGHTS"), - ("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"), - ("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"), - ("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"), - ("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"), - ( - "MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD", - "MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH", - ), - ( - "MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT", - "MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT", - ), - ( - "MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD", - "MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH", - ), - ("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"), - ("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"), - ("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"), - ("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"), - ("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"), - ("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"), - ("TEST.AUG_ON", "TEST.AUG.ENABLED"), - ("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"), - ("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"), - ("TEST.AUG_FLIP", "TEST.AUG.FLIP"), - ] - - @classmethod - def upgrade(cls, cfg: CN) -> None: - super().upgrade(cfg) - - if cfg.MODEL.META_ARCHITECTURE == "RetinaNet": - _rename( - cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS" - ) - _rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES") - del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"] - del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"] - else: - _rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS") - _rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES") - del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"] - del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"] - del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"] - - @classmethod - def downgrade(cls, cfg: CN) -> None: - super().downgrade(cfg) - - _rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS") - _rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES") - cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS - cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES - cfg.MODEL.RETINANET.ANCHOR_STRIDES = [] # this is not used anywhere in any version diff --git a/detectron2/detectron2/config/config.py b/detectron2/detectron2/config/config.py deleted file mode 100644 index 49a55b1bc87509e2bb24b902ae12c21d5aaeda81..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/config/config.py +++ /dev/null @@ -1,265 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import functools -import inspect -import logging -from fvcore.common.config import CfgNode as _CfgNode - -from detectron2.utils.file_io import PathManager - - -class CfgNode(_CfgNode): - """ - The same as `fvcore.common.config.CfgNode`, but different in: - - 1. Use unsafe yaml loading by default. - Note that this may lead to arbitrary code execution: you must not - load a config file from untrusted sources before manually inspecting - the content of the file. - 2. Support config versioning. - When attempting to merge an old config, it will convert the old config automatically. - - .. automethod:: clone - .. automethod:: freeze - .. automethod:: defrost - .. automethod:: is_frozen - .. automethod:: load_yaml_with_base - .. automethod:: merge_from_list - .. automethod:: merge_from_other_cfg - """ - - @classmethod - def _open_cfg(cls, filename): - return PathManager.open(filename, "r") - - # Note that the default value of allow_unsafe is changed to True - def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None: - """ - Load content from the given config file and merge it into self. - - Args: - cfg_filename: config filename - allow_unsafe: allow unsafe yaml syntax - """ - assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!" - loaded_cfg = self.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe) - loaded_cfg = type(self)(loaded_cfg) - - # defaults.py needs to import CfgNode - from .defaults import _C - - latest_ver = _C.VERSION - assert ( - latest_ver == self.VERSION - ), "CfgNode.merge_from_file is only allowed on a config object of latest version!" - - logger = logging.getLogger(__name__) - - loaded_ver = loaded_cfg.get("VERSION", None) - if loaded_ver is None: - from .compat import guess_version - - loaded_ver = guess_version(loaded_cfg, cfg_filename) - assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format( - loaded_ver, self.VERSION - ) - - if loaded_ver == self.VERSION: - self.merge_from_other_cfg(loaded_cfg) - else: - # compat.py needs to import CfgNode - from .compat import upgrade_config, downgrade_config - - logger.warning( - "Loading an old v{} config file '{}' by automatically upgrading to v{}. " - "See docs/CHANGELOG.md for instructions to update your files.".format( - loaded_ver, cfg_filename, self.VERSION - ) - ) - # To convert, first obtain a full config at an old version - old_self = downgrade_config(self, to_version=loaded_ver) - old_self.merge_from_other_cfg(loaded_cfg) - new_config = upgrade_config(old_self) - self.clear() - self.update(new_config) - - def dump(self, *args, **kwargs): - """ - Returns: - str: a yaml string representation of the config - """ - # to make it show up in docs - return super().dump(*args, **kwargs) - - -global_cfg = CfgNode() - - -def get_cfg() -> CfgNode: - """ - Get a copy of the default config. - - Returns: - a detectron2 CfgNode instance. - """ - from .defaults import _C - - return _C.clone() - - -def set_global_cfg(cfg: CfgNode) -> None: - """ - Let the global config point to the given cfg. - - Assume that the given "cfg" has the key "KEY", after calling - `set_global_cfg(cfg)`, the key can be accessed by: - :: - from detectron2.config import global_cfg - print(global_cfg.KEY) - - By using a hacky global config, you can access these configs anywhere, - without having to pass the config object or the values deep into the code. - This is a hacky feature introduced for quick prototyping / research exploration. - """ - global global_cfg - global_cfg.clear() - global_cfg.update(cfg) - - -def configurable(init_func=None, *, from_config=None): - """ - Decorate a function or a class's __init__ method so that it can be called - with a :class:`CfgNode` object using a :func:`from_config` function that translates - :class:`CfgNode` to arguments. - - Examples: - :: - # Usage 1: Decorator on __init__: - class A: - @configurable - def __init__(self, a, b=2, c=3): - pass - - @classmethod - def from_config(cls, cfg): # 'cfg' must be the first argument - # Returns kwargs to be passed to __init__ - return {"a": cfg.A, "b": cfg.B} - - a1 = A(a=1, b=2) # regular construction - a2 = A(cfg) # construct with a cfg - a3 = A(cfg, b=3, c=4) # construct with extra overwrite - - # Usage 2: Decorator on any function. Needs an extra from_config argument: - @configurable(from_config=lambda cfg: {"a: cfg.A, "b": cfg.B}) - def a_func(a, b=2, c=3): - pass - - a1 = a_func(a=1, b=2) # regular call - a2 = a_func(cfg) # call with a cfg - a3 = a_func(cfg, b=3, c=4) # call with extra overwrite - - Args: - init_func (callable): a class's ``__init__`` method in usage 1. The - class must have a ``from_config`` classmethod which takes `cfg` as - the first argument. - from_config (callable): the from_config function in usage 2. It must take `cfg` - as its first argument. - """ - - if init_func is not None: - assert ( - inspect.isfunction(init_func) - and from_config is None - and init_func.__name__ == "__init__" - ), "Incorrect use of @configurable. Check API documentation for examples." - - @functools.wraps(init_func) - def wrapped(self, *args, **kwargs): - try: - from_config_func = type(self).from_config - except AttributeError as e: - raise AttributeError( - "Class with @configurable must have a 'from_config' classmethod." - ) from e - if not inspect.ismethod(from_config_func): - raise TypeError("Class with @configurable must have a 'from_config' classmethod.") - - if _called_with_cfg(*args, **kwargs): - explicit_args = _get_args_from_config(from_config_func, *args, **kwargs) - init_func(self, **explicit_args) - else: - init_func(self, *args, **kwargs) - - return wrapped - - else: - if from_config is None: - return configurable # @configurable() is made equivalent to @configurable - assert inspect.isfunction( - from_config - ), "from_config argument of configurable must be a function!" - - def wrapper(orig_func): - @functools.wraps(orig_func) - def wrapped(*args, **kwargs): - if _called_with_cfg(*args, **kwargs): - explicit_args = _get_args_from_config(from_config, *args, **kwargs) - return orig_func(**explicit_args) - else: - return orig_func(*args, **kwargs) - - wrapped.from_config = from_config - return wrapped - - return wrapper - - -def _get_args_from_config(from_config_func, *args, **kwargs): - """ - Use `from_config` to obtain explicit arguments. - - Returns: - dict: arguments to be used for cls.__init__ - """ - signature = inspect.signature(from_config_func) - if list(signature.parameters.keys())[0] != "cfg": - if inspect.isfunction(from_config_func): - name = from_config_func.__name__ - else: - name = f"{from_config_func.__self__}.from_config" - raise TypeError(f"{name} must take 'cfg' as the first argument!") - support_var_arg = any( - param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD] - for param in signature.parameters.values() - ) - if support_var_arg: # forward all arguments to from_config, if from_config accepts them - ret = from_config_func(*args, **kwargs) - else: - # forward supported arguments to from_config - supported_arg_names = set(signature.parameters.keys()) - extra_kwargs = {} - for name in list(kwargs.keys()): - if name not in supported_arg_names: - extra_kwargs[name] = kwargs.pop(name) - ret = from_config_func(*args, **kwargs) - # forward the other arguments to __init__ - ret.update(extra_kwargs) - return ret - - -def _called_with_cfg(*args, **kwargs): - """ - Returns: - bool: whether the arguments contain CfgNode and should be considered - forwarded to from_config. - """ - from omegaconf import DictConfig - - if len(args) and isinstance(args[0], (_CfgNode, DictConfig)): - return True - if isinstance(kwargs.pop("cfg", None), (_CfgNode, DictConfig)): - return True - # `from_config`'s first argument is forced to be "cfg". - # So the above check covers all cases. - return False diff --git a/detectron2/detectron2/config/defaults.py b/detectron2/detectron2/config/defaults.py deleted file mode 100644 index 506651730ef2ec4c7832aee5d2eb629dc3554805..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/config/defaults.py +++ /dev/null @@ -1,656 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .config import CfgNode as CN - -# NOTE: given the new config system -# (https://detectron2.readthedocs.io/en/latest/tutorials/lazyconfigs.html), -# we will stop adding new functionalities to default CfgNode. - -# ----------------------------------------------------------------------------- -# Convention about Training / Test specific parameters -# ----------------------------------------------------------------------------- -# Whenever an argument can be either used for training or for testing, the -# corresponding name will be post-fixed by a _TRAIN for a training parameter, -# or _TEST for a test-specific parameter. -# For example, the number of images during training will be -# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be -# IMAGES_PER_BATCH_TEST - -# ----------------------------------------------------------------------------- -# Config definition -# ----------------------------------------------------------------------------- - -_C = CN() - -# The version number, to upgrade from old configs to new ones if any -# changes happen. It's recommended to keep a VERSION in your config file. -_C.VERSION = 2 - -_C.MODEL = CN() -_C.MODEL.LOAD_PROPOSALS = False -_C.MODEL.MASK_ON = False -_C.MODEL.KEYPOINT_ON = False -_C.MODEL.DEVICE = "cuda" -_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN" - -# Path (a file path, or URL like detectron2://.., https://..) to a checkpoint file -# to be loaded to the model. You can find available models in the model zoo. -_C.MODEL.WEIGHTS = "" - -# Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR). -# To train on images of different number of channels, just set different mean & std. -# Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675] -_C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675] -# When using pre-trained models in Detectron1 or any MSRA models, -# std has been absorbed into its conv1 weights, so the std needs to be set 1. -# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std) -_C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0] - - -# ----------------------------------------------------------------------------- -# INPUT -# ----------------------------------------------------------------------------- -_C.INPUT = CN() -# By default, {MIN,MAX}_SIZE options are used in transforms.ResizeShortestEdge. -# Please refer to ResizeShortestEdge for detailed definition. -# Size of the smallest side of the image during training -_C.INPUT.MIN_SIZE_TRAIN = (800,) -# Sample size of smallest side by choice or random selection from range give by -# INPUT.MIN_SIZE_TRAIN -_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice" -# Maximum size of the side of the image during training -_C.INPUT.MAX_SIZE_TRAIN = 1333 -# Size of the smallest side of the image during testing. Set to zero to disable resize in testing. -_C.INPUT.MIN_SIZE_TEST = 800 -# Maximum size of the side of the image during testing -_C.INPUT.MAX_SIZE_TEST = 1333 -# Mode for flipping images used in data augmentation during training -# choose one of ["horizontal, "vertical", "none"] -_C.INPUT.RANDOM_FLIP = "horizontal" - -# `True` if cropping is used for data augmentation during training -_C.INPUT.CROP = CN({"ENABLED": False}) -# Cropping type. See documentation of `detectron2.data.transforms.RandomCrop` for explanation. -_C.INPUT.CROP.TYPE = "relative_range" -# Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of -# pixels if CROP.TYPE is "absolute" -_C.INPUT.CROP.SIZE = [0.9, 0.9] - - -# Whether the model needs RGB, YUV, HSV etc. -# Should be one of the modes defined here, as we use PIL to read the image: -# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes -# with BGR being the one exception. One can set image format to BGR, we will -# internally use RGB for conversion and flip the channels over -_C.INPUT.FORMAT = "BGR" -# The ground truth mask format that the model will use. -# Mask R-CNN supports either "polygon" or "bitmask" as ground truth. -_C.INPUT.MASK_FORMAT = "polygon" # alternative: "bitmask" - - -# ----------------------------------------------------------------------------- -# Dataset -# ----------------------------------------------------------------------------- -_C.DATASETS = CN() -# List of the dataset names for training. Must be registered in DatasetCatalog -# Samples from these datasets will be merged and used as one dataset. -_C.DATASETS.TRAIN = () -# List of the pre-computed proposal files for training, which must be consistent -# with datasets listed in DATASETS.TRAIN. -_C.DATASETS.PROPOSAL_FILES_TRAIN = () -# Number of top scoring precomputed proposals to keep for training -_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000 -# List of the dataset names for testing. Must be registered in DatasetCatalog -_C.DATASETS.TEST = () -# List of the pre-computed proposal files for test, which must be consistent -# with datasets listed in DATASETS.TEST. -_C.DATASETS.PROPOSAL_FILES_TEST = () -# Number of top scoring precomputed proposals to keep for test -_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000 - -# ----------------------------------------------------------------------------- -# DataLoader -# ----------------------------------------------------------------------------- -_C.DATALOADER = CN() -# Number of data loading threads -_C.DATALOADER.NUM_WORKERS = 4 -# If True, each batch should contain only images for which the aspect ratio -# is compatible. This groups portrait images together, and landscape images -# are not batched with portrait images. -_C.DATALOADER.ASPECT_RATIO_GROUPING = True -# Options: TrainingSampler, RepeatFactorTrainingSampler -_C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler" -# Repeat threshold for RepeatFactorTrainingSampler -_C.DATALOADER.REPEAT_THRESHOLD = 0.0 -# if True, take square root when computing repeating factor -_C.DATALOADER.REPEAT_SQRT = True -# Tf True, when working on datasets that have instance annotations, the -# training dataloader will filter out images without associated annotations -_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True - -# ---------------------------------------------------------------------------- # -# Backbone options -# ---------------------------------------------------------------------------- # -_C.MODEL.BACKBONE = CN() - -_C.MODEL.BACKBONE.NAME = "build_resnet_backbone" -# Freeze the first several stages so they are not trained. -# There are 5 stages in ResNet. The first is a convolution, and the following -# stages are each group of residual blocks. -_C.MODEL.BACKBONE.FREEZE_AT = 2 - - -# ---------------------------------------------------------------------------- # -# FPN options -# ---------------------------------------------------------------------------- # -_C.MODEL.FPN = CN() -# Names of the input feature maps to be used by FPN -# They must have contiguous power of 2 strides -# e.g., ["res2", "res3", "res4", "res5"] -_C.MODEL.FPN.IN_FEATURES = [] -_C.MODEL.FPN.OUT_CHANNELS = 256 - -# Options: "" (no norm), "GN" -_C.MODEL.FPN.NORM = "" - -# Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg" -_C.MODEL.FPN.FUSE_TYPE = "sum" - - -# ---------------------------------------------------------------------------- # -# Proposal generator options -# ---------------------------------------------------------------------------- # -_C.MODEL.PROPOSAL_GENERATOR = CN() -# Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals" -_C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN" -# Proposal height and width both need to be greater than MIN_SIZE -# (a the scale used during training or inference) -_C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0 - - -# ---------------------------------------------------------------------------- # -# Anchor generator options -# ---------------------------------------------------------------------------- # -_C.MODEL.ANCHOR_GENERATOR = CN() -# The generator can be any name in the ANCHOR_GENERATOR registry -_C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator" -# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input. -# Format: list[list[float]]. SIZES[i] specifies the list of sizes to use for -# IN_FEATURES[i]; len(SIZES) must be equal to len(IN_FEATURES) or 1. -# When len(SIZES) == 1, SIZES[0] is used for all IN_FEATURES. -_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]] -# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect -# ratios are generated by an anchor generator. -# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W) -# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true, -# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used -# for all IN_FEATURES. -_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]] -# Anchor angles. -# list[list[float]], the angle in degrees, for each input feature map. -# ANGLES[i] specifies the list of angles for IN_FEATURES[i]. -_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]] -# Relative offset between the center of the first anchor and the top-left corner of the image -# Value has to be in [0, 1). Recommend to use 0.5, which means half stride. -# The value is not expected to affect model accuracy. -_C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0 - -# ---------------------------------------------------------------------------- # -# RPN options -# ---------------------------------------------------------------------------- # -_C.MODEL.RPN = CN() -_C.MODEL.RPN.HEAD_NAME = "StandardRPNHead" # used by RPN_HEAD_REGISTRY - -# Names of the input feature maps to be used by RPN -# e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN -_C.MODEL.RPN.IN_FEATURES = ["res4"] -# Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels -# Set to -1 or a large value, e.g. 100000, to disable pruning anchors -_C.MODEL.RPN.BOUNDARY_THRESH = -1 -# IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD] -# Minimum overlap required between an anchor and ground-truth box for the -# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD -# ==> positive RPN example: 1) -# Maximum overlap allowed between an anchor and ground-truth box for the -# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD -# ==> negative RPN example: 0) -# Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD) -# are ignored (-1) -_C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7] -_C.MODEL.RPN.IOU_LABELS = [0, -1, 1] -# Number of regions per image used to train RPN -_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256 -# Target fraction of foreground (positive) examples per RPN minibatch -_C.MODEL.RPN.POSITIVE_FRACTION = 0.5 -# Options are: "smooth_l1", "giou", "diou", "ciou" -_C.MODEL.RPN.BBOX_REG_LOSS_TYPE = "smooth_l1" -_C.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = 1.0 -# Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets -_C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0) -# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1. -_C.MODEL.RPN.SMOOTH_L1_BETA = 0.0 -_C.MODEL.RPN.LOSS_WEIGHT = 1.0 -# Number of top scoring RPN proposals to keep before applying NMS -# When FPN is used, this is *per FPN level* (not total) -_C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000 -_C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000 -# Number of top scoring RPN proposals to keep after applying NMS -# When FPN is used, this limit is applied per level and then again to the union -# of proposals from all levels -# NOTE: When FPN is used, the meaning of this config is different from Detectron1. -# It means per-batch topk in Detectron1, but per-image topk here. -# See the "find_top_rpn_proposals" function for details. -_C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000 -_C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000 -# NMS threshold used on RPN proposals -_C.MODEL.RPN.NMS_THRESH = 0.7 -# Set this to -1 to use the same number of output channels as input channels. -_C.MODEL.RPN.CONV_DIMS = [-1] - -# ---------------------------------------------------------------------------- # -# ROI HEADS options -# ---------------------------------------------------------------------------- # -_C.MODEL.ROI_HEADS = CN() -_C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads" -# Number of foreground classes -_C.MODEL.ROI_HEADS.NUM_CLASSES = 80 -# Names of the input feature maps to be used by ROI heads -# Currently all heads (box, mask, ...) use the same input feature map list -# e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN -_C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"] -# IOU overlap ratios [IOU_THRESHOLD] -# Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD) -# Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD) -_C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5] -_C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1] -# RoI minibatch size *per image* (number of regions of interest [ROIs]) during training -# Total number of RoIs per training minibatch = -# ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH -# E.g., a common configuration is: 512 * 16 = 8192 -_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 -# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0) -_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25 - -# Only used on test mode - -# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to -# balance obtaining high recall with not having too many low precision -# detections that will slow down inference post processing steps (like NMS) -# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down -# inference. -_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05 -# Overlap threshold used for non-maximum suppression (suppress boxes with -# IoU >= this threshold) -_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5 -# If True, augment proposals with ground-truth boxes before sampling proposals to -# train ROI heads. -_C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True - -# ---------------------------------------------------------------------------- # -# Box Head -# ---------------------------------------------------------------------------- # -_C.MODEL.ROI_BOX_HEAD = CN() -# C4 don't use head name option -# Options for non-C4 models: FastRCNNConvFCHead, -_C.MODEL.ROI_BOX_HEAD.NAME = "" -# Options are: "smooth_l1", "giou", "diou", "ciou" -_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1" -# The final scaling coefficient on the box regression loss, used to balance the magnitude of its -# gradients with other losses in the model. See also `MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT`. -_C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0 -# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets -# These are empirically chosen to approximately lead to unit variance targets -_C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0) -# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1. -_C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0 -_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14 -_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0 -# Type of pooling operation applied to the incoming feature map for each RoI -_C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" - -_C.MODEL.ROI_BOX_HEAD.NUM_FC = 0 -# Hidden layer dimension for FC layers in the RoI box head -_C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024 -_C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0 -# Channel dimension for Conv layers in the RoI box head -_C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256 -# Normalization method for the convolution layers. -# Options: "" (no norm), "GN", "SyncBN". -_C.MODEL.ROI_BOX_HEAD.NORM = "" -# Whether to use class agnostic for bbox regression -_C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False -# If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes. -_C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False - -# Federated loss can be used to improve the training of LVIS -_C.MODEL.ROI_BOX_HEAD.USE_FED_LOSS = False -# Sigmoid cross entrophy is used with federated loss -_C.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE = False -# The power value applied to image_count when calcualting frequency weight -_C.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER = 0.5 -# Number of classes to keep in total -_C.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES = 50 - -# ---------------------------------------------------------------------------- # -# Cascaded Box Head -# ---------------------------------------------------------------------------- # -_C.MODEL.ROI_BOX_CASCADE_HEAD = CN() -# The number of cascade stages is implicitly defined by the length of the following two configs. -_C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = ( - (10.0, 10.0, 5.0, 5.0), - (20.0, 20.0, 10.0, 10.0), - (30.0, 30.0, 15.0, 15.0), -) -_C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7) - - -# ---------------------------------------------------------------------------- # -# Mask Head -# ---------------------------------------------------------------------------- # -_C.MODEL.ROI_MASK_HEAD = CN() -_C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead" -_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14 -_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0 -_C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0 # The number of convs in the mask head -_C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256 -# Normalization method for the convolution layers. -# Options: "" (no norm), "GN", "SyncBN". -_C.MODEL.ROI_MASK_HEAD.NORM = "" -# Whether to use class agnostic for mask prediction -_C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False -# Type of pooling operation applied to the incoming feature map for each RoI -_C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2" - - -# ---------------------------------------------------------------------------- # -# Keypoint Head -# ---------------------------------------------------------------------------- # -_C.MODEL.ROI_KEYPOINT_HEAD = CN() -_C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead" -_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14 -_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0 -_C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8)) -_C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17 # 17 is the number of keypoints in COCO. - -# Images with too few (or no) keypoints are excluded from training. -_C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1 -# Normalize by the total number of visible keypoints in the minibatch if True. -# Otherwise, normalize by the total number of keypoints that could ever exist -# in the minibatch. -# The keypoint softmax loss is only calculated on visible keypoints. -# Since the number of visible keypoints can vary significantly between -# minibatches, this has the effect of up-weighting the importance of -# minibatches with few visible keypoints. (Imagine the extreme case of -# only one visible keypoint versus N: in the case of N, each one -# contributes 1/N to the gradient compared to the single keypoint -# determining the gradient direction). Instead, we can normalize the -# loss by the total number of keypoints, if it were the case that all -# keypoints were visible in a full minibatch. (Returning to the example, -# this means that the one visible keypoint contributes as much as each -# of the N keypoints.) -_C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True -# Multi-task loss weight to use for keypoints -# Recommended values: -# - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True -# - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False -_C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0 -# Type of pooling operation applied to the incoming feature map for each RoI -_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2" - -# ---------------------------------------------------------------------------- # -# Semantic Segmentation Head -# ---------------------------------------------------------------------------- # -_C.MODEL.SEM_SEG_HEAD = CN() -_C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead" -_C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"] -# Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for -# the correposnding pixel. -_C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255 -# Number of classes in the semantic segmentation head -_C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54 -# Number of channels in the 3x3 convs inside semantic-FPN heads. -_C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128 -# Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride. -_C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4 -# Normalization method for the convolution layers. Options: "" (no norm), "GN". -_C.MODEL.SEM_SEG_HEAD.NORM = "GN" -_C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0 - -_C.MODEL.PANOPTIC_FPN = CN() -# Scaling of all losses from instance detection / segmentation head. -_C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0 - -# options when combining instance & semantic segmentation outputs -_C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True}) # "COMBINE.ENABLED" is deprecated & not used -_C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5 -_C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096 -_C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5 - - -# ---------------------------------------------------------------------------- # -# RetinaNet Head -# ---------------------------------------------------------------------------- # -_C.MODEL.RETINANET = CN() - -# This is the number of foreground classes. -_C.MODEL.RETINANET.NUM_CLASSES = 80 - -_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"] - -# Convolutions to use in the cls and bbox tower -# NOTE: this doesn't include the last conv for logits -_C.MODEL.RETINANET.NUM_CONVS = 4 - -# IoU overlap ratio [bg, fg] for labeling anchors. -# Anchors with < bg are labeled negative (0) -# Anchors with >= bg and < fg are ignored (-1) -# Anchors with >= fg are labeled positive (1) -_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5] -_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1] - -# Prior prob for rare case (i.e. foreground) at the beginning of training. -# This is used to set the bias for the logits layer of the classifier subnet. -# This improves training stability in the case of heavy class imbalance. -_C.MODEL.RETINANET.PRIOR_PROB = 0.01 - -# Inference cls score threshold, only anchors with score > INFERENCE_TH are -# considered for inference (to improve speed) -_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05 -# Select topk candidates before NMS -_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000 -_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5 - -# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets -_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0) - -# Loss parameters -_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0 -_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25 -_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1 -# Options are: "smooth_l1", "giou", "diou", "ciou" -_C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1" - -# One of BN, SyncBN, FrozenBN, GN -# Only supports GN until unshared norm is implemented -_C.MODEL.RETINANET.NORM = "" - - -# ---------------------------------------------------------------------------- # -# ResNe[X]t options (ResNets = {ResNet, ResNeXt} -# Note that parts of a resnet may be used for both the backbone and the head -# These options apply to both -# ---------------------------------------------------------------------------- # -_C.MODEL.RESNETS = CN() - -_C.MODEL.RESNETS.DEPTH = 50 -_C.MODEL.RESNETS.OUT_FEATURES = ["res4"] # res4 for C4 backbone, res2..5 for FPN backbone - -# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt -_C.MODEL.RESNETS.NUM_GROUPS = 1 - -# Options: FrozenBN, GN, "SyncBN", "BN" -_C.MODEL.RESNETS.NORM = "FrozenBN" - -# Baseline width of each group. -# Scaling this parameters will scale the width of all bottleneck layers. -_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64 - -# Place the stride 2 conv on the 1x1 filter -# Use True only for the original MSRA ResNet; use False for C2 and Torch models -_C.MODEL.RESNETS.STRIDE_IN_1X1 = True - -# Apply dilation in stage "res5" -_C.MODEL.RESNETS.RES5_DILATION = 1 - -# Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet -# For R18 and R34, this needs to be set to 64 -_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256 -_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64 - -# Apply Deformable Convolution in stages -# Specify if apply deform_conv on Res2, Res3, Res4, Res5 -_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False] -# Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168); -# Use False for DeformableV1. -_C.MODEL.RESNETS.DEFORM_MODULATED = False -# Number of groups in deformable conv. -_C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1 - - -# ---------------------------------------------------------------------------- # -# Solver -# ---------------------------------------------------------------------------- # -_C.SOLVER = CN() - -# Options: WarmupMultiStepLR, WarmupCosineLR. -# See detectron2/solver/build.py for definition. -_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" - -_C.SOLVER.MAX_ITER = 40000 - -_C.SOLVER.BASE_LR = 0.001 -# The end lr, only used by WarmupCosineLR -_C.SOLVER.BASE_LR_END = 0.0 - -_C.SOLVER.MOMENTUM = 0.9 - -_C.SOLVER.NESTEROV = False - -_C.SOLVER.WEIGHT_DECAY = 0.0001 -# The weight decay that's applied to parameters of normalization layers -# (typically the affine transformation) -_C.SOLVER.WEIGHT_DECAY_NORM = 0.0 - -_C.SOLVER.GAMMA = 0.1 -# The iteration number to decrease learning rate by GAMMA. -_C.SOLVER.STEPS = (30000,) -# Number of decays in WarmupStepWithFixedGammaLR schedule -_C.SOLVER.NUM_DECAYS = 3 - -_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000 -_C.SOLVER.WARMUP_ITERS = 1000 -_C.SOLVER.WARMUP_METHOD = "linear" -# Whether to rescale the interval for the learning schedule after warmup -_C.SOLVER.RESCALE_INTERVAL = False - -# Save a checkpoint after every this number of iterations -_C.SOLVER.CHECKPOINT_PERIOD = 5000 - -# Number of images per batch across all machines. This is also the number -# of training images per step (i.e. per iteration). If we use 16 GPUs -# and IMS_PER_BATCH = 32, each GPU will see 2 images per batch. -# May be adjusted automatically if REFERENCE_WORLD_SIZE is set. -_C.SOLVER.IMS_PER_BATCH = 16 - -# The reference number of workers (GPUs) this config is meant to train with. -# It takes no effect when set to 0. -# With a non-zero value, it will be used by DefaultTrainer to compute a desired -# per-worker batch size, and then scale the other related configs (total batch size, -# learning rate, etc) to match the per-worker batch size. -# See documentation of `DefaultTrainer.auto_scale_workers` for details: -_C.SOLVER.REFERENCE_WORLD_SIZE = 0 - -# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for -# biases. This is not useful (at least for recent models). You should avoid -# changing these and they exist only to reproduce Detectron v1 training if -# desired. -_C.SOLVER.BIAS_LR_FACTOR = 1.0 -_C.SOLVER.WEIGHT_DECAY_BIAS = None # None means following WEIGHT_DECAY - -# Gradient clipping -_C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False}) -# Type of gradient clipping, currently 2 values are supported: -# - "value": the absolute values of elements of each gradients are clipped -# - "norm": the norm of the gradient for each parameter is clipped thus -# affecting all elements in the parameter -_C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value" -# Maximum absolute value used for clipping gradients -_C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0 -# Floating point number p for L-p norm to be used with the "norm" -# gradient clipping type; for L-inf, please specify .inf -_C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0 - -# Enable automatic mixed precision for training -# Note that this does not change model's inference behavior. -# To use AMP in inference, run inference under autocast() -_C.SOLVER.AMP = CN({"ENABLED": False}) - -# ---------------------------------------------------------------------------- # -# Specific test options -# ---------------------------------------------------------------------------- # -_C.TEST = CN() -# For end-to-end tests to verify the expected accuracy. -# Each item is [task, metric, value, tolerance] -# e.g.: [['bbox', 'AP', 38.5, 0.2]] -_C.TEST.EXPECTED_RESULTS = [] -# The period (in terms of steps) to evaluate the model during training. -# Set to 0 to disable. -_C.TEST.EVAL_PERIOD = 0 -# The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval -# When empty, it will use the defaults in COCO. -# Otherwise it should be a list[float] with the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. -_C.TEST.KEYPOINT_OKS_SIGMAS = [] -# Maximum number of detections to return per image during inference (100 is -# based on the limit established for the COCO dataset). -_C.TEST.DETECTIONS_PER_IMAGE = 100 - -_C.TEST.AUG = CN({"ENABLED": False}) -_C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200) -_C.TEST.AUG.MAX_SIZE = 4000 -_C.TEST.AUG.FLIP = True - -_C.TEST.PRECISE_BN = CN({"ENABLED": False}) -_C.TEST.PRECISE_BN.NUM_ITER = 200 - -# ---------------------------------------------------------------------------- # -# Misc options -# ---------------------------------------------------------------------------- # -# Directory where output files are written -_C.OUTPUT_DIR = "./output" -# Set seed to negative to fully randomize everything. -# Set seed to positive to use a fixed seed. Note that a fixed seed increases -# reproducibility but does not guarantee fully deterministic behavior. -# Disabling all parallelism further increases reproducibility. -_C.SEED = -1 -# Benchmark different cudnn algorithms. -# If input images have very different sizes, this option will have large overhead -# for about 10k iterations. It usually hurts total time, but can benefit for certain models. -# If input images have the same or similar sizes, benchmark is often helpful. -_C.CUDNN_BENCHMARK = False -# Option to set PyTorch matmul and CuDNN's float32 precision. When set to non-empty string, -# the corresponding precision ("highest", "high" or "medium") will be used. The highest -# precision will effectively disable tf32. -_C.FLOAT32_PRECISION = "" -# The period (in terms of steps) for minibatch visualization at train time. -# Set to 0 to disable. -_C.VIS_PERIOD = 0 - -# global config is for quick hack purposes. -# You can set them in command line or config files, -# and access it with: -# -# from detectron2.config import global_cfg -# print(global_cfg.HACK) -# -# Do not commit any configs into it. -_C.GLOBAL = CN() -_C.GLOBAL.HACK = 1.0 diff --git a/detectron2/detectron2/config/instantiate.py b/detectron2/detectron2/config/instantiate.py deleted file mode 100644 index 05ee2c7d21c9bf3e56a0a8e98447d2587b4b8fed..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/config/instantiate.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import collections.abc as abc -import dataclasses -import logging -from typing import Any - -from detectron2.utils.registry import _convert_target_to_string, locate - -__all__ = ["dump_dataclass", "instantiate"] - - -def dump_dataclass(obj: Any): - """ - Dump a dataclass recursively into a dict that can be later instantiated. - - Args: - obj: a dataclass object - - Returns: - dict - """ - assert dataclasses.is_dataclass(obj) and not isinstance( - obj, type - ), "dump_dataclass() requires an instance of a dataclass." - ret = {"_target_": _convert_target_to_string(type(obj))} - for f in dataclasses.fields(obj): - v = getattr(obj, f.name) - if dataclasses.is_dataclass(v): - v = dump_dataclass(v) - if isinstance(v, (list, tuple)): - v = [dump_dataclass(x) if dataclasses.is_dataclass(x) else x for x in v] - ret[f.name] = v - return ret - - -def instantiate(cfg): - """ - Recursively instantiate objects defined in dictionaries by - "_target_" and arguments. - - Args: - cfg: a dict-like object with "_target_" that defines the caller, and - other keys that define the arguments - - Returns: - object instantiated by cfg - """ - from omegaconf import ListConfig, DictConfig, OmegaConf - - if isinstance(cfg, ListConfig): - lst = [instantiate(x) for x in cfg] - return ListConfig(lst, flags={"allow_objects": True}) - if isinstance(cfg, list): - # Specialize for list, because many classes take - # list[objects] as arguments, such as ResNet, DatasetMapper - return [instantiate(x) for x in cfg] - - # If input is a DictConfig backed by dataclasses (i.e. omegaconf's structured config), - # instantiate it to the actual dataclass. - if isinstance(cfg, DictConfig) and dataclasses.is_dataclass(cfg._metadata.object_type): - return OmegaConf.to_object(cfg) - - if isinstance(cfg, abc.Mapping) and "_target_" in cfg: - # conceptually equivalent to hydra.utils.instantiate(cfg) with _convert_=all, - # but faster: https://github.com/facebookresearch/hydra/issues/1200 - cfg = {k: instantiate(v) for k, v in cfg.items()} - cls = cfg.pop("_target_") - cls = instantiate(cls) - - if isinstance(cls, str): - cls_name = cls - cls = locate(cls_name) - assert cls is not None, cls_name - else: - try: - cls_name = cls.__module__ + "." + cls.__qualname__ - except Exception: - # target could be anything, so the above could fail - cls_name = str(cls) - assert callable(cls), f"_target_ {cls} does not define a callable object" - try: - return cls(**cfg) - except TypeError: - logger = logging.getLogger(__name__) - logger.error(f"Error when instantiating {cls_name}!") - raise - return cfg # return as-is if don't know what to do diff --git a/detectron2/detectron2/config/lazy.py b/detectron2/detectron2/config/lazy.py deleted file mode 100644 index a0d295a27cf2e76a75a2628b5d1e1deaf4dd803f..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/config/lazy.py +++ /dev/null @@ -1,436 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import ast -import builtins -import collections.abc as abc -import importlib -import inspect -import logging -import os -import uuid -from contextlib import contextmanager -from copy import deepcopy -from dataclasses import is_dataclass -from typing import List, Tuple, Union -import cloudpickle -import yaml -from omegaconf import DictConfig, ListConfig, OmegaConf, SCMode - -from detectron2.utils.file_io import PathManager -from detectron2.utils.registry import _convert_target_to_string - -__all__ = ["LazyCall", "LazyConfig"] - - -class LazyCall: - """ - Wrap a callable so that when it's called, the call will not be executed, - but returns a dict that describes the call. - - LazyCall object has to be called with only keyword arguments. Positional - arguments are not yet supported. - - Examples: - :: - from detectron2.config import instantiate, LazyCall - - layer_cfg = LazyCall(nn.Conv2d)(in_channels=32, out_channels=32) - layer_cfg.out_channels = 64 # can edit it afterwards - layer = instantiate(layer_cfg) - """ - - def __init__(self, target): - if not (callable(target) or isinstance(target, (str, abc.Mapping))): - raise TypeError( - f"target of LazyCall must be a callable or defines a callable! Got {target}" - ) - self._target = target - - def __call__(self, **kwargs): - if is_dataclass(self._target): - # omegaconf object cannot hold dataclass type - # https://github.com/omry/omegaconf/issues/784 - target = _convert_target_to_string(self._target) - else: - target = self._target - kwargs["_target_"] = target - - return DictConfig(content=kwargs, flags={"allow_objects": True}) - - -def _visit_dict_config(cfg, func): - """ - Apply func recursively to all DictConfig in cfg. - """ - if isinstance(cfg, DictConfig): - func(cfg) - for v in cfg.values(): - _visit_dict_config(v, func) - elif isinstance(cfg, ListConfig): - for v in cfg: - _visit_dict_config(v, func) - - -def _validate_py_syntax(filename): - # see also https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py - with PathManager.open(filename, "r") as f: - content = f.read() - try: - ast.parse(content) - except SyntaxError as e: - raise SyntaxError(f"Config file {filename} has syntax error!") from e - - -def _cast_to_config(obj): - # if given a dict, return DictConfig instead - if isinstance(obj, dict): - return DictConfig(obj, flags={"allow_objects": True}) - return obj - - -_CFG_PACKAGE_NAME = "detectron2._cfg_loader" -""" -A namespace to put all imported config into. -""" - - -def _random_package_name(filename): - # generate a random package name when loading config files - return _CFG_PACKAGE_NAME + str(uuid.uuid4())[:4] + "." + os.path.basename(filename) - - -@contextmanager -def _patch_import(): - """ - Enhance relative import statements in config files, so that they: - 1. locate files purely based on relative location, regardless of packages. - e.g. you can import file without having __init__ - 2. do not cache modules globally; modifications of module states has no side effect - 3. support other storage system through PathManager, so config files can be in the cloud - 4. imported dict are turned into omegaconf.DictConfig automatically - """ - old_import = builtins.__import__ - - def find_relative_file(original_file, relative_import_path, level): - # NOTE: "from . import x" is not handled. Because then it's unclear - # if such import should produce `x` as a python module or DictConfig. - # This can be discussed further if needed. - relative_import_err = """ -Relative import of directories is not allowed within config files. -Within a config file, relative import can only import other config files. -""".replace( - "\n", " " - ) - if not len(relative_import_path): - raise ImportError(relative_import_err) - - cur_file = os.path.dirname(original_file) - for _ in range(level - 1): - cur_file = os.path.dirname(cur_file) - cur_name = relative_import_path.lstrip(".") - for part in cur_name.split("."): - cur_file = os.path.join(cur_file, part) - if not cur_file.endswith(".py"): - cur_file += ".py" - if not PathManager.isfile(cur_file): - cur_file_no_suffix = cur_file[: -len(".py")] - if PathManager.isdir(cur_file_no_suffix): - raise ImportError(f"Cannot import from {cur_file_no_suffix}." + relative_import_err) - else: - raise ImportError( - f"Cannot import name {relative_import_path} from " - f"{original_file}: {cur_file} does not exist." - ) - return cur_file - - def new_import(name, globals=None, locals=None, fromlist=(), level=0): - if ( - # Only deal with relative imports inside config files - level != 0 - and globals is not None - and (globals.get("__package__", "") or "").startswith(_CFG_PACKAGE_NAME) - ): - cur_file = find_relative_file(globals["__file__"], name, level) - _validate_py_syntax(cur_file) - spec = importlib.machinery.ModuleSpec( - _random_package_name(cur_file), None, origin=cur_file - ) - module = importlib.util.module_from_spec(spec) - module.__file__ = cur_file - with PathManager.open(cur_file) as f: - content = f.read() - exec(compile(content, cur_file, "exec"), module.__dict__) - for name in fromlist: # turn imported dict into DictConfig automatically - val = _cast_to_config(module.__dict__[name]) - module.__dict__[name] = val - return module - return old_import(name, globals, locals, fromlist=fromlist, level=level) - - builtins.__import__ = new_import - yield new_import - builtins.__import__ = old_import - - -class LazyConfig: - """ - Provide methods to save, load, and overrides an omegaconf config object - which may contain definition of lazily-constructed objects. - """ - - @staticmethod - def load_rel(filename: str, keys: Union[None, str, Tuple[str, ...]] = None): - """ - Similar to :meth:`load()`, but load path relative to the caller's - source file. - - This has the same functionality as a relative import, except that this method - accepts filename as a string, so more characters are allowed in the filename. - """ - caller_frame = inspect.stack()[1] - caller_fname = caller_frame[0].f_code.co_filename - assert caller_fname != "", "load_rel Unable to find caller" - caller_dir = os.path.dirname(caller_fname) - filename = os.path.join(caller_dir, filename) - return LazyConfig.load(filename, keys) - - @staticmethod - def load(filename: str, keys: Union[None, str, Tuple[str, ...]] = None): - """ - Load a config file. - - Args: - filename: absolute path or relative path w.r.t. the current working directory - keys: keys to load and return. If not given, return all keys - (whose values are config objects) in a dict. - """ - has_keys = keys is not None - filename = filename.replace("/./", "/") # redundant - if os.path.splitext(filename)[1] not in [".py", ".yaml", ".yml"]: - raise ValueError(f"Config file {filename} has to be a python or yaml file.") - if filename.endswith(".py"): - _validate_py_syntax(filename) - - with _patch_import(): - # Record the filename - module_namespace = { - "__file__": filename, - "__package__": _random_package_name(filename), - } - with PathManager.open(filename) as f: - content = f.read() - # Compile first with filename to: - # 1. make filename appears in stacktrace - # 2. make load_rel able to find its parent's (possibly remote) location - exec(compile(content, filename, "exec"), module_namespace) - - ret = module_namespace - else: - with PathManager.open(filename) as f: - obj = yaml.unsafe_load(f) - ret = OmegaConf.create(obj, flags={"allow_objects": True}) - - if has_keys: - if isinstance(keys, str): - return _cast_to_config(ret[keys]) - else: - return tuple(_cast_to_config(ret[a]) for a in keys) - else: - if filename.endswith(".py"): - # when not specified, only load those that are config objects - ret = DictConfig( - { - name: _cast_to_config(value) - for name, value in ret.items() - if isinstance(value, (DictConfig, ListConfig, dict)) - and not name.startswith("_") - }, - flags={"allow_objects": True}, - ) - return ret - - @staticmethod - def save(cfg, filename: str): - """ - Save a config object to a yaml file. - Note that when the config dictionary contains complex objects (e.g. lambda), - it can't be saved to yaml. In that case we will print an error and - attempt to save to a pkl file instead. - - Args: - cfg: an omegaconf config object - filename: yaml file name to save the config file - """ - logger = logging.getLogger(__name__) - try: - cfg = deepcopy(cfg) - except Exception: - pass - else: - # if it's deep-copyable, then... - def _replace_type_by_name(x): - if "_target_" in x and callable(x._target_): - try: - x._target_ = _convert_target_to_string(x._target_) - except AttributeError: - pass - - # not necessary, but makes yaml looks nicer - _visit_dict_config(cfg, _replace_type_by_name) - - save_pkl = False - try: - dict = OmegaConf.to_container( - cfg, - # Do not resolve interpolation when saving, i.e. do not turn ${a} into - # actual values when saving. - resolve=False, - # Save structures (dataclasses) in a format that can be instantiated later. - # Without this option, the type information of the dataclass will be erased. - structured_config_mode=SCMode.INSTANTIATE, - ) - dumped = yaml.dump(dict, default_flow_style=None, allow_unicode=True, width=9999) - with PathManager.open(filename, "w") as f: - f.write(dumped) - - try: - _ = yaml.unsafe_load(dumped) # test that it is loadable - except Exception: - logger.warning( - "The config contains objects that cannot serialize to a valid yaml. " - f"{filename} is human-readable but cannot be loaded." - ) - save_pkl = True - except Exception: - logger.exception("Unable to serialize the config to yaml. Error:") - save_pkl = True - - if save_pkl: - new_filename = filename + ".pkl" - try: - # retry by pickle - with PathManager.open(new_filename, "wb") as f: - cloudpickle.dump(cfg, f) - logger.warning(f"Config is saved using cloudpickle at {new_filename}.") - except Exception: - pass - - @staticmethod - def apply_overrides(cfg, overrides: List[str]): - """ - In-place override contents of cfg. - - Args: - cfg: an omegaconf config object - overrides: list of strings in the format of "a=b" to override configs. - See https://hydra.cc/docs/next/advanced/override_grammar/basic/ - for syntax. - - Returns: - the cfg object - """ - - def safe_update(cfg, key, value): - parts = key.split(".") - for idx in range(1, len(parts)): - prefix = ".".join(parts[:idx]) - v = OmegaConf.select(cfg, prefix, default=None) - if v is None: - break - if not OmegaConf.is_config(v): - raise KeyError( - f"Trying to update key {key}, but {prefix} " - f"is not a config, but has type {type(v)}." - ) - OmegaConf.update(cfg, key, value, merge=True) - - try: - from hydra.core.override_parser.overrides_parser import OverridesParser - - has_hydra = True - except ImportError: - has_hydra = False - - if has_hydra: - parser = OverridesParser.create() - overrides = parser.parse_overrides(overrides) - for o in overrides: - key = o.key_or_group - value = o.value() - if o.is_delete(): - # TODO support this - raise NotImplementedError("deletion is not yet a supported override") - safe_update(cfg, key, value) - else: - # Fallback. Does not support all the features and error checking like hydra. - for o in overrides: - key, value = o.split("=") - try: - value = ast.literal_eval(value) - except NameError: - pass - safe_update(cfg, key, value) - return cfg - - @staticmethod - def to_py(cfg, prefix: str = "cfg."): - """ - Try to convert a config object into Python-like psuedo code. - - Note that perfect conversion is not always possible. So the returned - results are mainly meant to be human-readable, and not meant to be executed. - - Args: - cfg: an omegaconf config object - prefix: root name for the resulting code (default: "cfg.") - - - Returns: - str of formatted Python code - """ - import black - - cfg = OmegaConf.to_container(cfg, resolve=True) - - def _to_str(obj, prefix=None, inside_call=False): - if prefix is None: - prefix = [] - if isinstance(obj, abc.Mapping) and "_target_" in obj: - # Dict representing a function call - target = _convert_target_to_string(obj.pop("_target_")) - args = [] - for k, v in sorted(obj.items()): - args.append(f"{k}={_to_str(v, inside_call=True)}") - args = ", ".join(args) - call = f"{target}({args})" - return "".join(prefix) + call - elif isinstance(obj, abc.Mapping) and not inside_call: - # Dict that is not inside a call is a list of top-level config objects that we - # render as one object per line with dot separated prefixes - key_list = [] - for k, v in sorted(obj.items()): - if isinstance(v, abc.Mapping) and "_target_" not in v: - key_list.append(_to_str(v, prefix=prefix + [k + "."])) - else: - key = "".join(prefix) + k - key_list.append(f"{key}={_to_str(v)}") - return "\n".join(key_list) - elif isinstance(obj, abc.Mapping): - # Dict that is inside a call is rendered as a regular dict - return ( - "{" - + ",".join( - f"{repr(k)}: {_to_str(v, inside_call=inside_call)}" - for k, v in sorted(obj.items()) - ) - + "}" - ) - elif isinstance(obj, list): - return "[" + ",".join(_to_str(x, inside_call=inside_call) for x in obj) + "]" - else: - return repr(obj) - - py_str = _to_str(cfg, prefix=[prefix]) - try: - return black.format_str(py_str, mode=black.Mode()) - except black.InvalidInput: - return py_str diff --git a/detectron2/detectron2/data/__init__.py b/detectron2/detectron2/data/__init__.py deleted file mode 100644 index 259f669b78bd05815cb8d3351fd6c5fc9a1b85a1..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from . import transforms # isort:skip - -from .build import ( - build_batch_data_loader, - build_detection_test_loader, - build_detection_train_loader, - get_detection_dataset_dicts, - load_proposals_into_dataset, - print_instances_class_histogram, -) -from .catalog import DatasetCatalog, MetadataCatalog, Metadata -from .common import DatasetFromList, MapDataset, ToIterableDataset -from .dataset_mapper import DatasetMapper - -# ensure the builtin datasets are registered -from . import datasets, samplers # isort:skip - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/detectron2/detectron2/data/benchmark.py b/detectron2/detectron2/data/benchmark.py deleted file mode 100644 index ac2f372a4b111ad40b8e720adea208608271bab6..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/benchmark.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import numpy as np -from itertools import count -from typing import List, Tuple -import torch -import tqdm -from fvcore.common.timer import Timer - -from detectron2.utils import comm - -from .build import build_batch_data_loader -from .common import DatasetFromList, MapDataset -from .samplers import TrainingSampler - -logger = logging.getLogger(__name__) - - -class _EmptyMapDataset(torch.utils.data.Dataset): - """ - Map anything to emptiness. - """ - - def __init__(self, dataset): - self.ds = dataset - - def __len__(self): - return len(self.ds) - - def __getitem__(self, idx): - _ = self.ds[idx] - return [0] - - -def iter_benchmark( - iterator, num_iter: int, warmup: int = 5, max_time_seconds: float = 60 -) -> Tuple[float, List[float]]: - """ - Benchmark an iterator/iterable for `num_iter` iterations with an extra - `warmup` iterations of warmup. - End early if `max_time_seconds` time is spent on iterations. - - Returns: - float: average time (seconds) per iteration - list[float]: time spent on each iteration. Sometimes useful for further analysis. - """ - num_iter, warmup = int(num_iter), int(warmup) - - iterator = iter(iterator) - for _ in range(warmup): - next(iterator) - timer = Timer() - all_times = [] - for curr_iter in tqdm.trange(num_iter): - start = timer.seconds() - if start > max_time_seconds: - num_iter = curr_iter - break - next(iterator) - all_times.append(timer.seconds() - start) - avg = timer.seconds() / num_iter - return avg, all_times - - -class DataLoaderBenchmark: - """ - Some common benchmarks that help understand perf bottleneck of a standard dataloader - made of dataset, mapper and sampler. - """ - - def __init__( - self, - dataset, - *, - mapper, - sampler=None, - total_batch_size, - num_workers=0, - max_time_seconds: int = 90, - ): - """ - Args: - max_time_seconds (int): maximum time to spent for each benchmark - other args: same as in `build.py:build_detection_train_loader` - """ - if isinstance(dataset, list): - dataset = DatasetFromList(dataset, copy=False, serialize=True) - if sampler is None: - sampler = TrainingSampler(len(dataset)) - - self.dataset = dataset - self.mapper = mapper - self.sampler = sampler - self.total_batch_size = total_batch_size - self.num_workers = num_workers - self.per_gpu_batch_size = self.total_batch_size // comm.get_world_size() - - self.max_time_seconds = max_time_seconds - - def _benchmark(self, iterator, num_iter, warmup, msg=None): - avg, all_times = iter_benchmark(iterator, num_iter, warmup, self.max_time_seconds) - if msg is not None: - self._log_time(msg, avg, all_times) - return avg, all_times - - def _log_time(self, msg, avg, all_times, distributed=False): - percentiles = [np.percentile(all_times, k, interpolation="nearest") for k in [1, 5, 95, 99]] - if not distributed: - logger.info( - f"{msg}: avg={1.0/avg:.1f} it/s, " - f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, " - f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s." - ) - return - avg_per_gpu = comm.all_gather(avg) - percentiles_per_gpu = comm.all_gather(percentiles) - if comm.get_rank() > 0: - return - for idx, avg, percentiles in zip(count(), avg_per_gpu, percentiles_per_gpu): - logger.info( - f"GPU{idx} {msg}: avg={1.0/avg:.1f} it/s, " - f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, " - f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s." - ) - - def benchmark_dataset(self, num_iter, warmup=5): - """ - Benchmark the speed of taking raw samples from the dataset. - """ - - def loader(): - while True: - for k in self.sampler: - yield self.dataset[k] - - self._benchmark(loader(), num_iter, warmup, "Dataset Alone") - - def benchmark_mapper(self, num_iter, warmup=5): - """ - Benchmark the speed of taking raw samples from the dataset and map - them in a single process. - """ - - def loader(): - while True: - for k in self.sampler: - yield self.mapper(self.dataset[k]) - - self._benchmark(loader(), num_iter, warmup, "Single Process Mapper (sec/sample)") - - def benchmark_workers(self, num_iter, warmup=10): - """ - Benchmark the dataloader by tuning num_workers to [0, 1, self.num_workers]. - """ - candidates = [0, 1] - if self.num_workers not in candidates: - candidates.append(self.num_workers) - - dataset = MapDataset(self.dataset, self.mapper) - for n in candidates: - loader = build_batch_data_loader( - dataset, - self.sampler, - self.total_batch_size, - num_workers=n, - ) - self._benchmark( - iter(loader), - num_iter * max(n, 1), - warmup * max(n, 1), - f"DataLoader ({n} workers, bs={self.per_gpu_batch_size})", - ) - del loader - - def benchmark_IPC(self, num_iter, warmup=10): - """ - Benchmark the dataloader where each worker outputs nothing. This - eliminates the IPC overhead compared to the regular dataloader. - - PyTorch multiprocessing's IPC only optimizes for torch tensors. - Large numpy arrays or other data structure may incur large IPC overhead. - """ - n = self.num_workers - dataset = _EmptyMapDataset(MapDataset(self.dataset, self.mapper)) - loader = build_batch_data_loader( - dataset, self.sampler, self.total_batch_size, num_workers=n - ) - self._benchmark( - iter(loader), - num_iter * max(n, 1), - warmup * max(n, 1), - f"DataLoader ({n} workers, bs={self.per_gpu_batch_size}) w/o comm", - ) - - def benchmark_distributed(self, num_iter, warmup=10): - """ - Benchmark the dataloader in each distributed worker, and log results of - all workers. This helps understand the final performance as well as - the variances among workers. - - It also prints startup time (first iter) of the dataloader. - """ - gpu = comm.get_world_size() - dataset = MapDataset(self.dataset, self.mapper) - n = self.num_workers - loader = build_batch_data_loader( - dataset, self.sampler, self.total_batch_size, num_workers=n - ) - - timer = Timer() - loader = iter(loader) - next(loader) - startup_time = timer.seconds() - logger.info("Dataloader startup time: {:.2f} seconds".format(startup_time)) - - comm.synchronize() - - avg, all_times = self._benchmark(loader, num_iter * max(n, 1), warmup * max(n, 1)) - del loader - self._log_time( - f"DataLoader ({gpu} GPUs x {n} workers, total bs={self.total_batch_size})", - avg, - all_times, - True, - ) diff --git a/detectron2/detectron2/data/build.py b/detectron2/detectron2/data/build.py deleted file mode 100644 index 1cc8f0f4be84bf6317978a78da23d6a3a195c48a..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/build.py +++ /dev/null @@ -1,694 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import itertools -import logging -import numpy as np -import operator -import pickle -from collections import OrderedDict, defaultdict -from typing import Any, Callable, Dict, List, Optional, Union -import torch -import torch.utils.data as torchdata -from tabulate import tabulate -from termcolor import colored - -from detectron2.config import configurable -from detectron2.structures import BoxMode -from detectron2.utils.comm import get_world_size -from detectron2.utils.env import seed_all_rng -from detectron2.utils.file_io import PathManager -from detectron2.utils.logger import _log_api_usage, log_first_n - -from .catalog import DatasetCatalog, MetadataCatalog -from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset, ToIterableDataset -from .dataset_mapper import DatasetMapper -from .detection_utils import check_metadata_consistency -from .samplers import ( - InferenceSampler, - RandomSubsetTrainingSampler, - RepeatFactorTrainingSampler, - TrainingSampler, -) - -""" -This file contains the default logic to build a dataloader for training or testing. -""" - -__all__ = [ - "build_batch_data_loader", - "build_detection_train_loader", - "build_detection_test_loader", - "get_detection_dataset_dicts", - "load_proposals_into_dataset", - "print_instances_class_histogram", -] - - -def filter_images_with_only_crowd_annotations(dataset_dicts): - """ - Filter out images with none annotations or only crowd annotations - (i.e., images without non-crowd annotations). - A common training-time preprocessing on COCO dataset. - - Args: - dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. - - Returns: - list[dict]: the same format, but filtered. - """ - num_before = len(dataset_dicts) - - def valid(anns): - for ann in anns: - if ann.get("iscrowd", 0) == 0: - return True - return False - - dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])] - num_after = len(dataset_dicts) - logger = logging.getLogger(__name__) - logger.info( - "Removed {} images with no usable annotations. {} images left.".format( - num_before - num_after, num_after - ) - ) - return dataset_dicts - - -def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image): - """ - Filter out images with too few number of keypoints. - - Args: - dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. - - Returns: - list[dict]: the same format as dataset_dicts, but filtered. - """ - num_before = len(dataset_dicts) - - def visible_keypoints_in_image(dic): - # Each keypoints field has the format [x1, y1, v1, ...], where v is visibility - annotations = dic["annotations"] - return sum( - (np.array(ann["keypoints"][2::3]) > 0).sum() - for ann in annotations - if "keypoints" in ann - ) - - dataset_dicts = [ - x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image - ] - num_after = len(dataset_dicts) - logger = logging.getLogger(__name__) - logger.info( - "Removed {} images with fewer than {} keypoints.".format( - num_before - num_after, min_keypoints_per_image - ) - ) - return dataset_dicts - - -def load_proposals_into_dataset(dataset_dicts, proposal_file): - """ - Load precomputed object proposals into the dataset. - - The proposal file should be a pickled dict with the following keys: - - - "ids": list[int] or list[str], the image ids - - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id - - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores - corresponding to the boxes. - - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``. - - Args: - dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. - proposal_file (str): file path of pre-computed proposals, in pkl format. - - Returns: - list[dict]: the same format as dataset_dicts, but added proposal field. - """ - logger = logging.getLogger(__name__) - logger.info("Loading proposals from: {}".format(proposal_file)) - - with PathManager.open(proposal_file, "rb") as f: - proposals = pickle.load(f, encoding="latin1") - - # Rename the key names in D1 proposal files - rename_keys = {"indexes": "ids", "scores": "objectness_logits"} - for key in rename_keys: - if key in proposals: - proposals[rename_keys[key]] = proposals.pop(key) - - # Fetch the indexes of all proposals that are in the dataset - # Convert image_id to str since they could be int. - img_ids = set({str(record["image_id"]) for record in dataset_dicts}) - id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids} - - # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS' - bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS - - for record in dataset_dicts: - # Get the index of the proposal - i = id_to_index[str(record["image_id"])] - - boxes = proposals["boxes"][i] - objectness_logits = proposals["objectness_logits"][i] - # Sort the proposals in descending order of the scores - inds = objectness_logits.argsort()[::-1] - record["proposal_boxes"] = boxes[inds] - record["proposal_objectness_logits"] = objectness_logits[inds] - record["proposal_bbox_mode"] = bbox_mode - - return dataset_dicts - - -def print_instances_class_histogram(dataset_dicts, class_names): - """ - Args: - dataset_dicts (list[dict]): list of dataset dicts. - class_names (list[str]): list of class names (zero-indexed). - """ - num_classes = len(class_names) - hist_bins = np.arange(num_classes + 1) - histogram = np.zeros((num_classes,), dtype=int) - for entry in dataset_dicts: - annos = entry["annotations"] - classes = np.asarray( - [x["category_id"] for x in annos if not x.get("iscrowd", 0)], dtype=int - ) - if len(classes): - assert classes.min() >= 0, f"Got an invalid category_id={classes.min()}" - assert ( - classes.max() < num_classes - ), f"Got an invalid category_id={classes.max()} for a dataset of {num_classes} classes" - histogram += np.histogram(classes, bins=hist_bins)[0] - - N_COLS = min(6, len(class_names) * 2) - - def short_name(x): - # make long class names shorter. useful for lvis - if len(x) > 13: - return x[:11] + ".." - return x - - data = list( - itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)]) - ) - total_num_instances = sum(data[1::2]) - data.extend([None] * (N_COLS - (len(data) % N_COLS))) - if num_classes > 1: - data.extend(["total", total_num_instances]) - data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)]) - table = tabulate( - data, - headers=["category", "#instances"] * (N_COLS // 2), - tablefmt="pipe", - numalign="left", - stralign="center", - ) - log_first_n( - logging.INFO, - "Distribution of instances among all {} categories:\n".format(num_classes) - + colored(table, "cyan"), - key="message", - ) - - -def get_detection_dataset_dicts( - names, - filter_empty=True, - min_keypoints=0, - proposal_files=None, - check_consistency=True, -): - """ - Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation. - - Args: - names (str or list[str]): a dataset name or a list of dataset names - filter_empty (bool): whether to filter out images without instance annotations - min_keypoints (int): filter out images with fewer keypoints than - `min_keypoints`. Set to 0 to do nothing. - proposal_files (list[str]): if given, a list of object proposal files - that match each dataset in `names`. - check_consistency (bool): whether to check if datasets have consistent metadata. - - Returns: - list[dict]: a list of dicts following the standard dataset dict format. - """ - if isinstance(names, str): - names = [names] - assert len(names), names - - available_datasets = DatasetCatalog.keys() - names_set = set(names) - if not names_set.issubset(available_datasets): - logger = logging.getLogger(__name__) - logger.warning( - "The following dataset names are not registered in the DatasetCatalog: " - f"{names_set - available_datasets}. " - f"Available datasets are {available_datasets}" - ) - - dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in names] - - if isinstance(dataset_dicts[0], torchdata.Dataset): - if len(dataset_dicts) > 1: - # ConcatDataset does not work for iterable style dataset. - # We could support concat for iterable as well, but it's often - # not a good idea to concat iterables anyway. - return torchdata.ConcatDataset(dataset_dicts) - return dataset_dicts[0] - - for dataset_name, dicts in zip(names, dataset_dicts): - assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) - - if proposal_files is not None: - assert len(names) == len(proposal_files) - # load precomputed proposals from proposal files - dataset_dicts = [ - load_proposals_into_dataset(dataset_i_dicts, proposal_file) - for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files) - ] - - dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) - - has_instances = "annotations" in dataset_dicts[0] - if filter_empty and has_instances: - dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts) - if min_keypoints > 0 and has_instances: - dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints) - - if check_consistency and has_instances: - try: - class_names = MetadataCatalog.get(names[0]).thing_classes - check_metadata_consistency("thing_classes", names) - print_instances_class_histogram(dataset_dicts, class_names) - except AttributeError: # class names are not available for this dataset - pass - - assert len(dataset_dicts), "No valid data found in {}.".format(",".join(names)) - return dataset_dicts - - -def build_batch_data_loader( - dataset, - sampler, - total_batch_size, - *, - aspect_ratio_grouping=False, - num_workers=0, - collate_fn=None, - drop_last: bool = True, - single_gpu_batch_size=None, - prefetch_factor=2, - persistent_workers=False, - pin_memory=False, - seed=None, - **kwargs, -): - """ - Build a batched dataloader. The main differences from `torch.utils.data.DataLoader` are: - 1. support aspect ratio grouping options - 2. use no "batch collation", because this is common for detection training - - Args: - dataset (torch.utils.data.Dataset): a pytorch map-style or iterable dataset. - sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces indices. - Must be provided iff. ``dataset`` is a map-style dataset. - total_batch_size, aspect_ratio_grouping, num_workers, collate_fn: see - :func:`build_detection_train_loader`. - single_gpu_batch_size: You can specify either `single_gpu_batch_size` or `total_batch_size`. - `single_gpu_batch_size` specifies the batch size that will be used for each gpu/process. - `total_batch_size` allows you to specify the total aggregate batch size across gpus. - It is an error to supply a value for both. - drop_last (bool): if ``True``, the dataloader will drop incomplete batches. - - Returns: - iterable[list]. Length of each list is the batch size of the current - GPU. Each element in the list comes from the dataset. - """ - if single_gpu_batch_size: - if total_batch_size: - raise ValueError( - """total_batch_size and single_gpu_batch_size are mutually incompatible. - Please specify only one. """ - ) - batch_size = single_gpu_batch_size - else: - world_size = get_world_size() - assert ( - total_batch_size > 0 and total_batch_size % world_size == 0 - ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format( - total_batch_size, world_size - ) - batch_size = total_batch_size // world_size - logger = logging.getLogger(__name__) - logger.info("Making batched data loader with batch_size=%d", batch_size) - - if isinstance(dataset, torchdata.IterableDataset): - assert sampler is None, "sampler must be None if dataset is IterableDataset" - else: - dataset = ToIterableDataset(dataset, sampler, shard_chunk_size=batch_size) - - generator = None - if seed is not None: - generator = torch.Generator() - generator.manual_seed(seed) - - if aspect_ratio_grouping: - assert drop_last, "Aspect ratio grouping will drop incomplete batches." - data_loader = torchdata.DataLoader( - dataset, - num_workers=num_workers, - collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements - worker_init_fn=worker_init_reset_seed, - prefetch_factor=prefetch_factor if num_workers > 0 else None, - persistent_workers=persistent_workers, - pin_memory=pin_memory, - generator=generator, - **kwargs, - ) # yield individual mapped dict - data_loader = AspectRatioGroupedDataset(data_loader, batch_size) - if collate_fn is None: - return data_loader - return MapDataset(data_loader, collate_fn) - else: - return torchdata.DataLoader( - dataset, - batch_size=batch_size, - drop_last=drop_last, - num_workers=num_workers, - collate_fn=trivial_batch_collator if collate_fn is None else collate_fn, - worker_init_fn=worker_init_reset_seed, - prefetch_factor=prefetch_factor if num_workers > 0 else None, - persistent_workers=persistent_workers, - pin_memory=pin_memory, - generator=generator, - **kwargs, - ) - - -def _get_train_datasets_repeat_factors(cfg) -> Dict[str, float]: - repeat_factors = cfg.DATASETS.TRAIN_REPEAT_FACTOR - assert all(len(tup) == 2 for tup in repeat_factors) - name_to_weight = defaultdict(lambda: 1, dict(repeat_factors)) - # The sampling weights map should only contain datasets in train config - unrecognized = set(name_to_weight.keys()) - set(cfg.DATASETS.TRAIN) - assert not unrecognized, f"unrecognized datasets: {unrecognized}" - logger = logging.getLogger(__name__) - logger.info(f"Found repeat factors: {list(name_to_weight.items())}") - - # pyre-fixme[7]: Expected `Dict[str, float]` but got `DefaultDict[typing.Any, int]`. - return name_to_weight - - -def _build_weighted_sampler(cfg, enable_category_balance=False): - dataset_repeat_factors = _get_train_datasets_repeat_factors(cfg) - # OrderedDict to guarantee order of values() consistent with repeat factors - dataset_name_to_dicts = OrderedDict( - { - name: get_detection_dataset_dicts( - [name], - filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, - min_keypoints=( - cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE - if cfg.MODEL.KEYPOINT_ON - else 0 - ), - proposal_files=( - cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None - ), - ) - for name in cfg.DATASETS.TRAIN - } - ) - # Repeat factor for every sample in the dataset - repeat_factors = [ - [dataset_repeat_factors[dsname]] * len(dataset_name_to_dicts[dsname]) - for dsname in cfg.DATASETS.TRAIN - ] - - repeat_factors = list(itertools.chain.from_iterable(repeat_factors)) - - repeat_factors = torch.tensor(repeat_factors) - logger = logging.getLogger(__name__) - if enable_category_balance: - """ - 1. Calculate repeat factors using category frequency for each dataset and then merge them. - 2. Element wise dot producting the dataset frequency repeat factors with - the category frequency repeat factors gives the final repeat factors. - """ - category_repeat_factors = [ - RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( - dataset_dict, cfg.DATALOADER.REPEAT_THRESHOLD, sqrt=cfg.DATALOADER.REPEAT_SQRT - ) - for dataset_dict in dataset_name_to_dicts.values() - ] - # flatten the category repeat factors from all datasets - category_repeat_factors = list(itertools.chain.from_iterable(category_repeat_factors)) - category_repeat_factors = torch.tensor(category_repeat_factors) - repeat_factors = torch.mul(category_repeat_factors, repeat_factors) - repeat_factors = repeat_factors / torch.min(repeat_factors) - logger.info( - "Using WeightedCategoryTrainingSampler with repeat_factors={}".format( - cfg.DATASETS.TRAIN_REPEAT_FACTOR - ) - ) - else: - logger.info( - "Using WeightedTrainingSampler with repeat_factors={}".format( - cfg.DATASETS.TRAIN_REPEAT_FACTOR - ) - ) - - sampler = RepeatFactorTrainingSampler(repeat_factors) - return sampler - - -def _train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None): - if dataset is None: - dataset = get_detection_dataset_dicts( - cfg.DATASETS.TRAIN, - filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, - min_keypoints=( - cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0 - ), - proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, - ) - _log_api_usage("dataset." + cfg.DATASETS.TRAIN[0]) - - if mapper is None: - mapper = DatasetMapper(cfg, True) - - if sampler is None: - sampler_name = cfg.DATALOADER.SAMPLER_TRAIN - logger = logging.getLogger(__name__) - if isinstance(dataset, torchdata.IterableDataset): - logger.info("Not using any sampler since the dataset is IterableDataset.") - sampler = None - else: - logger.info("Using training sampler {}".format(sampler_name)) - if sampler_name == "TrainingSampler": - sampler = TrainingSampler(len(dataset), seed=cfg.SEED) - elif sampler_name == "RepeatFactorTrainingSampler": - repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( - dataset, cfg.DATALOADER.REPEAT_THRESHOLD, sqrt=cfg.DATALOADER.REPEAT_SQRT - ) - sampler = RepeatFactorTrainingSampler(repeat_factors, seed=cfg.SEED) - elif sampler_name == "RandomSubsetTrainingSampler": - sampler = RandomSubsetTrainingSampler( - len(dataset), cfg.DATALOADER.RANDOM_SUBSET_RATIO - ) - elif sampler_name == "WeightedTrainingSampler": - sampler = _build_weighted_sampler(cfg) - elif sampler_name == "WeightedCategoryTrainingSampler": - sampler = _build_weighted_sampler(cfg, enable_category_balance=True) - else: - raise ValueError("Unknown training sampler: {}".format(sampler_name)) - - return { - "dataset": dataset, - "sampler": sampler, - "mapper": mapper, - "total_batch_size": cfg.SOLVER.IMS_PER_BATCH, - "aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING, - "num_workers": cfg.DATALOADER.NUM_WORKERS, - } - - -@configurable(from_config=_train_loader_from_config) -def build_detection_train_loader( - dataset, - *, - mapper, - sampler=None, - total_batch_size, - aspect_ratio_grouping=True, - num_workers=0, - collate_fn=None, - **kwargs, -): - """ - Build a dataloader for object detection with some default features. - - Args: - dataset (list or torch.utils.data.Dataset): a list of dataset dicts, - or a pytorch dataset (either map-style or iterable). It can be obtained - by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`. - mapper (callable): a callable which takes a sample (dict) from dataset and - returns the format to be consumed by the model. - When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``. - sampler (torch.utils.data.sampler.Sampler or None): a sampler that produces - indices to be applied on ``dataset``. - If ``dataset`` is map-style, the default sampler is a :class:`TrainingSampler`, - which coordinates an infinite random shuffle sequence across all workers. - Sampler must be None if ``dataset`` is iterable. - total_batch_size (int): total batch size across all workers. - aspect_ratio_grouping (bool): whether to group images with similar - aspect ratio for efficiency. When enabled, it requires each - element in dataset be a dict with keys "width" and "height". - num_workers (int): number of parallel data loading workers - collate_fn: a function that determines how to do batching, same as the argument of - `torch.utils.data.DataLoader`. Defaults to do no collation and return a list of - data. No collation is OK for small batch size and simple data structures. - If your batch size is large and each sample contains too many small tensors, - it's more efficient to collate them in data loader. - - Returns: - torch.utils.data.DataLoader: - a dataloader. Each output from it is a ``list[mapped_element]`` of length - ``total_batch_size / num_workers``, where ``mapped_element`` is produced - by the ``mapper``. - """ - if isinstance(dataset, list): - dataset = DatasetFromList(dataset, copy=False) - if mapper is not None: - dataset = MapDataset(dataset, mapper) - - if isinstance(dataset, torchdata.IterableDataset): - assert sampler is None, "sampler must be None if dataset is IterableDataset" - else: - if sampler is None: - sampler = TrainingSampler(len(dataset)) - assert isinstance(sampler, torchdata.Sampler), f"Expect a Sampler but got {type(sampler)}" - return build_batch_data_loader( - dataset, - sampler, - total_batch_size, - aspect_ratio_grouping=aspect_ratio_grouping, - num_workers=num_workers, - collate_fn=collate_fn, - **kwargs, - ) - - -def _test_loader_from_config(cfg, dataset_name, mapper=None): - """ - Uses the given `dataset_name` argument (instead of the names in cfg), because the - standard practice is to evaluate each test set individually (not combining them). - """ - if isinstance(dataset_name, str): - dataset_name = [dataset_name] - - dataset = get_detection_dataset_dicts( - dataset_name, - filter_empty=False, - proposal_files=( - [ - cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(x)] - for x in dataset_name - ] - if cfg.MODEL.LOAD_PROPOSALS - else None - ), - ) - if mapper is None: - mapper = DatasetMapper(cfg, False) - return { - "dataset": dataset, - "mapper": mapper, - "num_workers": cfg.DATALOADER.NUM_WORKERS, - "sampler": ( - InferenceSampler(len(dataset)) - if not isinstance(dataset, torchdata.IterableDataset) - else None - ), - } - - -@configurable(from_config=_test_loader_from_config) -def build_detection_test_loader( - dataset: Union[List[Any], torchdata.Dataset], - *, - mapper: Callable[[Dict[str, Any]], Any], - sampler: Optional[torchdata.Sampler] = None, - batch_size: int = 1, - num_workers: int = 0, - collate_fn: Optional[Callable[[List[Any]], Any]] = None, -) -> torchdata.DataLoader: - """ - Similar to `build_detection_train_loader`, with default batch size = 1, - and sampler = :class:`InferenceSampler`. This sampler coordinates all workers - to produce the exact set of all samples. - - Args: - dataset: a list of dataset dicts, - or a pytorch dataset (either map-style or iterable). They can be obtained - by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`. - mapper: a callable which takes a sample (dict) from dataset - and returns the format to be consumed by the model. - When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``. - sampler: a sampler that produces - indices to be applied on ``dataset``. Default to :class:`InferenceSampler`, - which splits the dataset across all workers. Sampler must be None - if `dataset` is iterable. - batch_size: the batch size of the data loader to be created. - Default to 1 image per worker since this is the standard when reporting - inference time in papers. - num_workers: number of parallel data loading workers - collate_fn: same as the argument of `torch.utils.data.DataLoader`. - Defaults to do no collation and return a list of data. - - Returns: - DataLoader: a torch DataLoader, that loads the given detection - dataset, with test-time transformation and batching. - - Examples: - :: - data_loader = build_detection_test_loader( - DatasetRegistry.get("my_test"), - mapper=DatasetMapper(...)) - - # or, instantiate with a CfgNode: - data_loader = build_detection_test_loader(cfg, "my_test") - """ - if isinstance(dataset, list): - dataset = DatasetFromList(dataset, copy=False) - if mapper is not None: - dataset = MapDataset(dataset, mapper) - if isinstance(dataset, torchdata.IterableDataset): - assert sampler is None, "sampler must be None if dataset is IterableDataset" - else: - if sampler is None: - sampler = InferenceSampler(len(dataset)) - return torchdata.DataLoader( - dataset, - batch_size=batch_size, - sampler=sampler, - drop_last=False, - num_workers=num_workers, - collate_fn=trivial_batch_collator if collate_fn is None else collate_fn, - ) - - -def trivial_batch_collator(batch): - """ - A batch collator that does nothing. - """ - return batch - - -def worker_init_reset_seed(worker_id): - initial_seed = torch.initial_seed() % 2**31 - seed_all_rng(initial_seed + worker_id) diff --git a/detectron2/detectron2/data/catalog.py b/detectron2/detectron2/data/catalog.py deleted file mode 100644 index 45c110c19508f23921b9033cdaf0aa8056f0c125..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/catalog.py +++ /dev/null @@ -1,236 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import logging -import types -from collections import UserDict -from typing import List - -from detectron2.utils.logger import log_first_n - -__all__ = ["DatasetCatalog", "MetadataCatalog", "Metadata"] - - -class _DatasetCatalog(UserDict): - """ - A global dictionary that stores information about the datasets and how to obtain them. - - It contains a mapping from strings - (which are names that identify a dataset, e.g. "coco_2014_train") - to a function which parses the dataset and returns the samples in the - format of `list[dict]`. - - The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details) - if used with the data loader functionalities in `data/build.py,data/detection_transform.py`. - - The purpose of having this catalog is to make it easy to choose - different datasets, by just using the strings in the config. - """ - - def register(self, name, func): - """ - Args: - name (str): the name that identifies a dataset, e.g. "coco_2014_train". - func (callable): a callable which takes no arguments and returns a list of dicts. - It must return the same results if called multiple times. - """ - assert callable(func), "You must register a function with `DatasetCatalog.register`!" - assert name not in self, "Dataset '{}' is already registered!".format(name) - self[name] = func - - def get(self, name): - """ - Call the registered function and return its results. - - Args: - name (str): the name that identifies a dataset, e.g. "coco_2014_train". - - Returns: - list[dict]: dataset annotations. - """ - try: - f = self[name] - except KeyError as e: - raise KeyError( - "Dataset '{}' is not registered! Available datasets are: {}".format( - name, ", ".join(list(self.keys())) - ) - ) from e - return f() - - def list(self) -> List[str]: - """ - List all registered datasets. - - Returns: - list[str] - """ - return list(self.keys()) - - def remove(self, name): - """ - Alias of ``pop``. - """ - self.pop(name) - - def __str__(self): - return "DatasetCatalog(registered datasets: {})".format(", ".join(self.keys())) - - __repr__ = __str__ - - -DatasetCatalog = _DatasetCatalog() -DatasetCatalog.__doc__ = ( - _DatasetCatalog.__doc__ - + """ - .. automethod:: detectron2.data.catalog.DatasetCatalog.register - .. automethod:: detectron2.data.catalog.DatasetCatalog.get -""" -) - - -class Metadata(types.SimpleNamespace): - """ - A class that supports simple attribute setter/getter. - It is intended for storing metadata of a dataset and make it accessible globally. - - Examples: - :: - # somewhere when you load the data: - MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"] - - # somewhere when you print statistics or visualize: - classes = MetadataCatalog.get("mydataset").thing_classes - """ - - # the name of the dataset - # set default to N/A so that `self.name` in the errors will not trigger getattr again - name: str = "N/A" - - _RENAMED = { - "class_names": "thing_classes", - "dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id", - "stuff_class_names": "stuff_classes", - } - - def __getattr__(self, key): - if key in self._RENAMED: - log_first_n( - logging.WARNING, - "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]), - n=10, - ) - return getattr(self, self._RENAMED[key]) - - # "name" exists in every metadata - if len(self.__dict__) > 1: - raise AttributeError( - "Attribute '{}' does not exist in the metadata of dataset '{}'. Available " - "keys are {}.".format(key, self.name, str(self.__dict__.keys())) - ) - else: - raise AttributeError( - f"Attribute '{key}' does not exist in the metadata of dataset '{self.name}': " - "metadata is empty." - ) - - def __setattr__(self, key, val): - if key in self._RENAMED: - log_first_n( - logging.WARNING, - "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]), - n=10, - ) - setattr(self, self._RENAMED[key], val) - - # Ensure that metadata of the same name stays consistent - try: - oldval = getattr(self, key) - assert oldval == val, ( - "Attribute '{}' in the metadata of '{}' cannot be set " - "to a different value!\n{} != {}".format(key, self.name, oldval, val) - ) - except AttributeError: - super().__setattr__(key, val) - - def as_dict(self): - """ - Returns all the metadata as a dict. - Note that modifications to the returned dict will not reflect on the Metadata object. - """ - return copy.copy(self.__dict__) - - def set(self, **kwargs): - """ - Set multiple metadata with kwargs. - """ - for k, v in kwargs.items(): - setattr(self, k, v) - return self - - def get(self, key, default=None): - """ - Access an attribute and return its value if exists. - Otherwise return default. - """ - try: - return getattr(self, key) - except AttributeError: - return default - - -class _MetadataCatalog(UserDict): - """ - MetadataCatalog is a global dictionary that provides access to - :class:`Metadata` of a given dataset. - - The metadata associated with a certain name is a singleton: once created, the - metadata will stay alive and will be returned by future calls to ``get(name)``. - - It's like global variables, so don't abuse it. - It's meant for storing knowledge that's constant and shared across the execution - of the program, e.g.: the class names in COCO. - """ - - def get(self, name): - """ - Args: - name (str): name of a dataset (e.g. coco_2014_train). - - Returns: - Metadata: The :class:`Metadata` instance associated with this name, - or create an empty one if none is available. - """ - assert len(name) - r = super().get(name, None) - if r is None: - r = self[name] = Metadata(name=name) - return r - - def list(self): - """ - List all registered metadata. - - Returns: - list[str]: keys (names of datasets) of all registered metadata - """ - return list(self.keys()) - - def remove(self, name): - """ - Alias of ``pop``. - """ - self.pop(name) - - def __str__(self): - return "MetadataCatalog(registered metadata: {})".format(", ".join(self.keys())) - - __repr__ = __str__ - - -MetadataCatalog = _MetadataCatalog() -MetadataCatalog.__doc__ = ( - _MetadataCatalog.__doc__ - + """ - .. automethod:: detectron2.data.catalog.MetadataCatalog.get -""" -) diff --git a/detectron2/detectron2/data/common.py b/detectron2/detectron2/data/common.py deleted file mode 100644 index 4e1723b339fe964211d77e05cc0d4d6bd99afe0d..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/common.py +++ /dev/null @@ -1,339 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import contextlib -import copy -import itertools -import logging -import numpy as np -import pickle -import random -from typing import Callable, Union -import torch -import torch.utils.data as data -from torch.utils.data.sampler import Sampler - -from detectron2.utils.serialize import PicklableWrapper - -__all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset", "ToIterableDataset"] - -logger = logging.getLogger(__name__) - - -# copied from: https://docs.python.org/3/library/itertools.html#recipes -def _roundrobin(*iterables): - "roundrobin('ABC', 'D', 'EF') --> A D E B F C" - # Recipe credited to George Sakkis - num_active = len(iterables) - nexts = itertools.cycle(iter(it).__next__ for it in iterables) - while num_active: - try: - for next in nexts: - yield next() - except StopIteration: - # Remove the iterator we just exhausted from the cycle. - num_active -= 1 - nexts = itertools.cycle(itertools.islice(nexts, num_active)) - - -def _shard_iterator_dataloader_worker(iterable, chunk_size=1): - # Shard the iterable if we're currently inside pytorch dataloader worker. - worker_info = data.get_worker_info() - if worker_info is None or worker_info.num_workers == 1: - # do nothing - yield from iterable - else: - # worker0: 0, 1, ..., chunk_size-1, num_workers*chunk_size, num_workers*chunk_size+1, ... - # worker1: chunk_size, chunk_size+1, ... - # worker2: 2*chunk_size, 2*chunk_size+1, ... - # ... - yield from _roundrobin( - *[ - itertools.islice( - iterable, - worker_info.id * chunk_size + chunk_i, - None, - worker_info.num_workers * chunk_size, - ) - for chunk_i in range(chunk_size) - ] - ) - - -class _MapIterableDataset(data.IterableDataset): - """ - Map a function over elements in an IterableDataset. - - Similar to pytorch's MapIterDataPipe, but support filtering when map_func - returns None. - - This class is not public-facing. Will be called by `MapDataset`. - """ - - def __init__(self, dataset, map_func): - self._dataset = dataset - self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work - - def __len__(self): - return len(self._dataset) - - def __iter__(self): - for x in map(self._map_func, self._dataset): - if x is not None: - yield x - - -class MapDataset(data.Dataset): - """ - Map a function over the elements in a dataset. - """ - - def __init__(self, dataset, map_func): - """ - Args: - dataset: a dataset where map function is applied. Can be either - map-style or iterable dataset. When given an iterable dataset, - the returned object will also be an iterable dataset. - map_func: a callable which maps the element in dataset. map_func can - return None to skip the data (e.g. in case of errors). - How None is handled depends on the style of `dataset`. - If `dataset` is map-style, it randomly tries other elements. - If `dataset` is iterable, it skips the data and tries the next. - """ - self._dataset = dataset - self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work - - self._rng = random.Random(42) - self._fallback_candidates = set(range(len(dataset))) - - def __new__(cls, dataset, map_func): - is_iterable = isinstance(dataset, data.IterableDataset) - if is_iterable: - return _MapIterableDataset(dataset, map_func) - else: - return super().__new__(cls) - - def __getnewargs__(self): - return self._dataset, self._map_func - - def __len__(self): - return len(self._dataset) - - def __getitem__(self, idx): - retry_count = 0 - cur_idx = int(idx) - - while True: - data = self._map_func(self._dataset[cur_idx]) - if data is not None: - self._fallback_candidates.add(cur_idx) - return data - - # _map_func fails for this idx, use a random new index from the pool - retry_count += 1 - self._fallback_candidates.discard(cur_idx) - cur_idx = self._rng.sample(list(self._fallback_candidates), k=1)[0] - - if retry_count >= 3: - logger = logging.getLogger(__name__) - logger.warning( - "Failed to apply `_map_func` for idx: {}, retry count: {}".format( - idx, retry_count - ) - ) - - -class _TorchSerializedList: - """ - A list-like object whose items are serialized and stored in a torch tensor. When - launching a process that uses TorchSerializedList with "fork" start method, - the subprocess can read the same buffer without triggering copy-on-access. When - launching a process that uses TorchSerializedList with "spawn/forkserver" start - method, the list will be pickled by a special ForkingPickler registered by PyTorch - that moves data to shared memory. In both cases, this allows parent and child - processes to share RAM for the list data, hence avoids the issue in - https://github.com/pytorch/pytorch/issues/13246. - - See also https://ppwwyyxx.com/blog/2022/Demystify-RAM-Usage-in-Multiprocess-DataLoader/ - on how it works. - """ - - def __init__(self, lst: list): - self._lst = lst - - def _serialize(data): - buffer = pickle.dumps(data, protocol=-1) - return np.frombuffer(buffer, dtype=np.uint8) - - logger.info( - "Serializing {} elements to byte tensors and concatenating them all ...".format( - len(self._lst) - ) - ) - self._lst = [_serialize(x) for x in self._lst] - self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64) - self._addr = torch.from_numpy(np.cumsum(self._addr)) - self._lst = torch.from_numpy(np.concatenate(self._lst)) - logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024**2)) - - def __len__(self): - return len(self._addr) - - def __getitem__(self, idx): - start_addr = 0 if idx == 0 else self._addr[idx - 1].item() - end_addr = self._addr[idx].item() - bytes = memoryview(self._lst[start_addr:end_addr].numpy()) - - # @lint-ignore PYTHONPICKLEISBAD - return pickle.loads(bytes) - - -_DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = _TorchSerializedList - - -@contextlib.contextmanager -def set_default_dataset_from_list_serialize_method(new): - """ - Context manager for using custom serialize function when creating DatasetFromList - """ - - global _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD - orig = _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD - _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = new - yield - _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD = orig - - -class DatasetFromList(data.Dataset): - """ - Wrap a list to a torch Dataset. It produces elements of the list as data. - """ - - def __init__( - self, - lst: list, - copy: bool = True, - serialize: Union[bool, Callable] = True, - ): - """ - Args: - lst (list): a list which contains elements to produce. - copy (bool): whether to deepcopy the element when producing it, - so that the result can be modified in place without affecting the - source in the list. - serialize (bool or callable): whether to serialize the stroage to other - backend. If `True`, the default serialize method will be used, if given - a callable, the callable will be used as serialize method. - """ - self._lst = lst - self._copy = copy - if not isinstance(serialize, (bool, Callable)): - raise TypeError(f"Unsupported type for argument `serailzie`: {serialize}") - self._serialize = serialize is not False - - if self._serialize: - serialize_method = ( - serialize - if isinstance(serialize, Callable) - else _DEFAULT_DATASET_FROM_LIST_SERIALIZE_METHOD - ) - logger.info(f"Serializing the dataset using: {serialize_method}") - self._lst = serialize_method(self._lst) - - def __len__(self): - return len(self._lst) - - def __getitem__(self, idx): - if self._copy and not self._serialize: - return copy.deepcopy(self._lst[idx]) - else: - return self._lst[idx] - - -class ToIterableDataset(data.IterableDataset): - """ - Convert an old indices-based (also called map-style) dataset - to an iterable-style dataset. - """ - - def __init__( - self, - dataset: data.Dataset, - sampler: Sampler, - shard_sampler: bool = True, - shard_chunk_size: int = 1, - ): - """ - Args: - dataset: an old-style dataset with ``__getitem__`` - sampler: a cheap iterable that produces indices to be applied on ``dataset``. - shard_sampler: whether to shard the sampler based on the current pytorch data loader - worker id. When an IterableDataset is forked by pytorch's DataLoader into multiple - workers, it is responsible for sharding its data based on worker id so that workers - don't produce identical data. - - Most samplers (like our TrainingSampler) do not shard based on dataloader worker id - and this argument should be set to True. But certain samplers may be already - sharded, in that case this argument should be set to False. - shard_chunk_size: when sharding the sampler, each worker will - """ - assert not isinstance(dataset, data.IterableDataset), dataset - assert isinstance(sampler, Sampler), sampler - self.dataset = dataset - self.sampler = sampler - self.shard_sampler = shard_sampler - self.shard_chunk_size = shard_chunk_size - - def __iter__(self): - if not self.shard_sampler: - sampler = self.sampler - else: - # With map-style dataset, `DataLoader(dataset, sampler)` runs the - # sampler in main process only. But `DataLoader(ToIterableDataset(dataset, sampler))` - # will run sampler in every of the N worker. So we should only keep 1/N of the ids on - # each worker. The assumption is that sampler is cheap to iterate so it's fine to - # discard ids in workers. - sampler = _shard_iterator_dataloader_worker(self.sampler, self.shard_chunk_size) - for idx in sampler: - yield self.dataset[idx] - - def __len__(self): - return len(self.sampler) - - -class AspectRatioGroupedDataset(data.IterableDataset): - """ - Batch data that have similar aspect ratio together. - In this implementation, images whose aspect ratio < (or >) 1 will - be batched together. - This improves training speed because the images then need less padding - to form a batch. - - It assumes the underlying dataset produces dicts with "width" and "height" keys. - It will then produce a list of original dicts with length = batch_size, - all with similar aspect ratios. - """ - - def __init__(self, dataset, batch_size): - """ - Args: - dataset: an iterable. Each element must be a dict with keys - "width" and "height", which will be used to batch data. - batch_size (int): - """ - self.dataset = dataset - self.batch_size = batch_size - self._buckets = [[] for _ in range(2)] - # Hard-coded two aspect ratio groups: w > h and w < h. - # Can add support for more aspect ratio groups, but doesn't seem useful - - def __iter__(self): - for d in self.dataset: - w, h = d["width"], d["height"] - bucket_id = 0 if w > h else 1 - bucket = self._buckets[bucket_id] - bucket.append(d) - if len(bucket) == self.batch_size: - data = bucket[:] - # Clear bucket first, because code after yield is not - # guaranteed to execute - del bucket[:] - yield data diff --git a/detectron2/detectron2/data/dataset_mapper.py b/detectron2/detectron2/data/dataset_mapper.py deleted file mode 100644 index a8714f7990f11e146a01e03d108518e0356b50c4..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/dataset_mapper.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import logging -import numpy as np -from typing import List, Optional, Union -import torch - -from detectron2.config import configurable - -from . import detection_utils as utils -from . import transforms as T - -""" -This file contains the default mapping that's applied to "dataset dicts". -""" - -__all__ = ["DatasetMapper"] - - -class DatasetMapper: - """ - A callable which takes a dataset dict in Detectron2 Dataset format, - and map it into a format used by the model. - - This is the default callable to be used to map your dataset dict into training data. - You may need to follow it to implement your own one for customized logic, - such as a different way to read or transform images. - See :doc:`/tutorials/data_loading` for details. - - The callable currently does the following: - - 1. Read the image from "file_name" - 2. Applies cropping/geometric transforms to the image and annotations - 3. Prepare data and annotations to Tensor and :class:`Instances` - """ - - @configurable - def __init__( - self, - is_train: bool, - *, - augmentations: List[Union[T.Augmentation, T.Transform]], - image_format: str, - use_instance_mask: bool = False, - use_keypoint: bool = False, - instance_mask_format: str = "polygon", - keypoint_hflip_indices: Optional[np.ndarray] = None, - precomputed_proposal_topk: Optional[int] = None, - recompute_boxes: bool = False, - ): - """ - NOTE: this interface is experimental. - - Args: - is_train: whether it's used in training or inference - augmentations: a list of augmentations or deterministic transforms to apply - image_format: an image format supported by :func:`detection_utils.read_image`. - use_instance_mask: whether to process instance segmentation annotations, if available - use_keypoint: whether to process keypoint annotations if available - instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation - masks into this format. - keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices` - precomputed_proposal_topk: if given, will load pre-computed - proposals from dataset_dict and keep the top k proposals for each image. - recompute_boxes: whether to overwrite bounding box annotations - by computing tight bounding boxes from instance mask annotations. - """ - if recompute_boxes: - assert use_instance_mask, "recompute_boxes requires instance masks" - # fmt: off - self.is_train = is_train - self.augmentations = T.AugmentationList(augmentations) - self.image_format = image_format - self.use_instance_mask = use_instance_mask - self.instance_mask_format = instance_mask_format - self.use_keypoint = use_keypoint - self.keypoint_hflip_indices = keypoint_hflip_indices - self.proposal_topk = precomputed_proposal_topk - self.recompute_boxes = recompute_boxes - # fmt: on - logger = logging.getLogger(__name__) - mode = "training" if is_train else "inference" - logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}") - - @classmethod - def from_config(cls, cfg, is_train: bool = True): - augs = utils.build_augmentation(cfg, is_train) - if cfg.INPUT.CROP.ENABLED and is_train: - augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) - recompute_boxes = cfg.MODEL.MASK_ON - else: - recompute_boxes = False - - ret = { - "is_train": is_train, - "augmentations": augs, - "image_format": cfg.INPUT.FORMAT, - "use_instance_mask": cfg.MODEL.MASK_ON, - "instance_mask_format": cfg.INPUT.MASK_FORMAT, - "use_keypoint": cfg.MODEL.KEYPOINT_ON, - "recompute_boxes": recompute_boxes, - } - - if cfg.MODEL.KEYPOINT_ON: - ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) - - if cfg.MODEL.LOAD_PROPOSALS: - ret["precomputed_proposal_topk"] = ( - cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN - if is_train - else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST - ) - return ret - - def _transform_annotations(self, dataset_dict, transforms, image_shape): - # USER: Modify this if you want to keep them for some reason. - for anno in dataset_dict["annotations"]: - if not self.use_instance_mask: - anno.pop("segmentation", None) - if not self.use_keypoint: - anno.pop("keypoints", None) - - # USER: Implement additional transformations if you have other types of data - annos = [ - utils.transform_instance_annotations( - obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices - ) - for obj in dataset_dict.pop("annotations") - if obj.get("iscrowd", 0) == 0 - ] - instances = utils.annotations_to_instances( - annos, image_shape, mask_format=self.instance_mask_format - ) - - # After transforms such as cropping are applied, the bounding box may no longer - # tightly bound the object. As an example, imagine a triangle object - # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight - # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to - # the intersection of original bounding box and the cropping box. - if self.recompute_boxes: - instances.gt_boxes = instances.gt_masks.get_bounding_boxes() - dataset_dict["instances"] = utils.filter_empty_instances(instances) - - def __call__(self, dataset_dict): - """ - Args: - dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. - - Returns: - dict: a format that builtin models in detectron2 accept - """ - dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below - # USER: Write your own image loading if it's not from a file - image = utils.read_image(dataset_dict["file_name"], format=self.image_format) - utils.check_image_size(dataset_dict, image) - - # USER: Remove if you don't do semantic/panoptic segmentation. - if "sem_seg_file_name" in dataset_dict: - sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) - else: - sem_seg_gt = None - - aug_input = T.AugInput(image, sem_seg=sem_seg_gt) - transforms = self.augmentations(aug_input) - image, sem_seg_gt = aug_input.image, aug_input.sem_seg - - image_shape = image.shape[:2] # h, w - # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, - # but not efficient on large generic data structures due to the use of pickle & mp.Queue. - # Therefore it's important to use torch.Tensor. - dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) - if sem_seg_gt is not None: - dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) - - # USER: Remove if you don't use pre-computed proposals. - # Most users would not need this feature. - if self.proposal_topk is not None: - utils.transform_proposals( - dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk - ) - - if not self.is_train: - # USER: Modify this if you want to keep them for some reason. - dataset_dict.pop("annotations", None) - dataset_dict.pop("sem_seg_file_name", None) - return dataset_dict - - if "annotations" in dataset_dict: - self._transform_annotations(dataset_dict, transforms, image_shape) - - return dataset_dict diff --git a/detectron2/detectron2/data/datasets/README.md b/detectron2/detectron2/data/datasets/README.md deleted file mode 100644 index 9fb3e4f7afec17137c95c78be6ef06d520ec8032..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/README.md +++ /dev/null @@ -1,9 +0,0 @@ - - -### Common Datasets - -The dataset implemented here do not need to load the data into the final format. -It should provide the minimal data structure needed to use the dataset, so it can be very efficient. - -For example, for an image dataset, just provide the file names and labels, but don't read the images. -Let the downstream decide how to read. diff --git a/detectron2/detectron2/data/datasets/__init__.py b/detectron2/detectron2/data/datasets/__init__.py deleted file mode 100644 index a44bedc15e5f0e762fc4d77efd6f1b07c6ff77d0..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .coco import load_coco_json, load_sem_seg, register_coco_instances, convert_to_coco_json -from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated -from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta -from .pascal_voc import load_voc_instances, register_pascal_voc -from . import builtin as _builtin # ensure the builtin datasets are registered - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/detectron2/detectron2/data/datasets/builtin.py b/detectron2/detectron2/data/datasets/builtin.py deleted file mode 100644 index c3a68aa833f12f0fa324a269c36190f21b8a75bd..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/builtin.py +++ /dev/null @@ -1,259 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - - -""" -This file registers pre-defined datasets at hard-coded paths, and their metadata. - -We hard-code metadata for common datasets. This will enable: -1. Consistency check when loading the datasets -2. Use models on these standard datasets directly and run demos, - without having to download the dataset annotations - -We hard-code some paths to the dataset that's assumed to -exist in "./datasets/". - -Users SHOULD NOT use this file to create new dataset / metadata for new dataset. -To add new dataset, refer to the tutorial "docs/DATASETS.md". -""" - -import os - -from detectron2.data import DatasetCatalog, MetadataCatalog - -from .builtin_meta import ADE20K_SEM_SEG_CATEGORIES, _get_builtin_metadata -from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic -from .cityscapes_panoptic import register_all_cityscapes_panoptic -from .coco import load_sem_seg, register_coco_instances -from .coco_panoptic import register_coco_panoptic, register_coco_panoptic_separated -from .lvis import get_lvis_instances_meta, register_lvis_instances -from .pascal_voc import register_pascal_voc - -# ==== Predefined datasets and splits for COCO ========== - -_PREDEFINED_SPLITS_COCO = {} -_PREDEFINED_SPLITS_COCO["coco"] = { - "coco_2014_train": ("coco/train2014", "coco/annotations/instances_train2014.json"), - "coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"), - "coco_2014_minival": ("coco/val2014", "coco/annotations/instances_minival2014.json"), - "coco_2014_valminusminival": ( - "coco/val2014", - "coco/annotations/instances_valminusminival2014.json", - ), - "coco_2017_train": ("coco/train2017", "coco/annotations/instances_train2017.json"), - "coco_2017_val": ("coco/val2017", "coco/annotations/instances_val2017.json"), - "coco_2017_test": ("coco/test2017", "coco/annotations/image_info_test2017.json"), - "coco_2017_test-dev": ("coco/test2017", "coco/annotations/image_info_test-dev2017.json"), - "coco_2017_val_100": ("coco/val2017", "coco/annotations/instances_val2017_100.json"), -} - -_PREDEFINED_SPLITS_COCO["coco_person"] = { - "keypoints_coco_2014_train": ( - "coco/train2014", - "coco/annotations/person_keypoints_train2014.json", - ), - "keypoints_coco_2014_val": ("coco/val2014", "coco/annotations/person_keypoints_val2014.json"), - "keypoints_coco_2014_minival": ( - "coco/val2014", - "coco/annotations/person_keypoints_minival2014.json", - ), - "keypoints_coco_2014_valminusminival": ( - "coco/val2014", - "coco/annotations/person_keypoints_valminusminival2014.json", - ), - "keypoints_coco_2017_train": ( - "coco/train2017", - "coco/annotations/person_keypoints_train2017.json", - ), - "keypoints_coco_2017_val": ("coco/val2017", "coco/annotations/person_keypoints_val2017.json"), - "keypoints_coco_2017_val_100": ( - "coco/val2017", - "coco/annotations/person_keypoints_val2017_100.json", - ), -} - - -_PREDEFINED_SPLITS_COCO_PANOPTIC = { - "coco_2017_train_panoptic": ( - # This is the original panoptic annotation directory - "coco/panoptic_train2017", - "coco/annotations/panoptic_train2017.json", - # This directory contains semantic annotations that are - # converted from panoptic annotations. - # It is used by PanopticFPN. - # You can use the script at detectron2/datasets/prepare_panoptic_fpn.py - # to create these directories. - "coco/panoptic_stuff_train2017", - ), - "coco_2017_val_panoptic": ( - "coco/panoptic_val2017", - "coco/annotations/panoptic_val2017.json", - "coco/panoptic_stuff_val2017", - ), - "coco_2017_val_100_panoptic": ( - "coco/panoptic_val2017_100", - "coco/annotations/panoptic_val2017_100.json", - "coco/panoptic_stuff_val2017_100", - ), -} - - -def register_all_coco(root): - for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items(): - for key, (image_root, json_file) in splits_per_dataset.items(): - # Assume pre-defined datasets live in `./datasets`. - register_coco_instances( - key, - _get_builtin_metadata(dataset_name), - os.path.join(root, json_file) if "://" not in json_file else json_file, - os.path.join(root, image_root), - ) - - for ( - prefix, - (panoptic_root, panoptic_json, semantic_root), - ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items(): - prefix_instances = prefix[: -len("_panoptic")] - instances_meta = MetadataCatalog.get(prefix_instances) - image_root, instances_json = instances_meta.image_root, instances_meta.json_file - # The "separated" version of COCO panoptic segmentation dataset, - # e.g. used by Panoptic FPN - register_coco_panoptic_separated( - prefix, - _get_builtin_metadata("coco_panoptic_separated"), - image_root, - os.path.join(root, panoptic_root), - os.path.join(root, panoptic_json), - os.path.join(root, semantic_root), - instances_json, - ) - # The "standard" version of COCO panoptic segmentation dataset, - # e.g. used by Panoptic-DeepLab - register_coco_panoptic( - prefix, - _get_builtin_metadata("coco_panoptic_standard"), - image_root, - os.path.join(root, panoptic_root), - os.path.join(root, panoptic_json), - instances_json, - ) - - -# ==== Predefined datasets and splits for LVIS ========== - - -_PREDEFINED_SPLITS_LVIS = { - "lvis_v1": { - "lvis_v1_train": ("coco/", "lvis/lvis_v1_train.json"), - "lvis_v1_val": ("coco/", "lvis/lvis_v1_val.json"), - "lvis_v1_test_dev": ("coco/", "lvis/lvis_v1_image_info_test_dev.json"), - "lvis_v1_test_challenge": ("coco/", "lvis/lvis_v1_image_info_test_challenge.json"), - }, - "lvis_v0.5": { - "lvis_v0.5_train": ("coco/", "lvis/lvis_v0.5_train.json"), - "lvis_v0.5_val": ("coco/", "lvis/lvis_v0.5_val.json"), - "lvis_v0.5_val_rand_100": ("coco/", "lvis/lvis_v0.5_val_rand_100.json"), - "lvis_v0.5_test": ("coco/", "lvis/lvis_v0.5_image_info_test.json"), - }, - "lvis_v0.5_cocofied": { - "lvis_v0.5_train_cocofied": ("coco/", "lvis/lvis_v0.5_train_cocofied.json"), - "lvis_v0.5_val_cocofied": ("coco/", "lvis/lvis_v0.5_val_cocofied.json"), - }, -} - - -def register_all_lvis(root): - for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_LVIS.items(): - for key, (image_root, json_file) in splits_per_dataset.items(): - register_lvis_instances( - key, - get_lvis_instances_meta(dataset_name), - os.path.join(root, json_file) if "://" not in json_file else json_file, - os.path.join(root, image_root), - ) - - -# ==== Predefined splits for raw cityscapes images =========== -_RAW_CITYSCAPES_SPLITS = { - "cityscapes_fine_{task}_train": ("cityscapes/leftImg8bit/train/", "cityscapes/gtFine/train/"), - "cityscapes_fine_{task}_val": ("cityscapes/leftImg8bit/val/", "cityscapes/gtFine/val/"), - "cityscapes_fine_{task}_test": ("cityscapes/leftImg8bit/test/", "cityscapes/gtFine/test/"), -} - - -def register_all_cityscapes(root): - for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items(): - meta = _get_builtin_metadata("cityscapes") - image_dir = os.path.join(root, image_dir) - gt_dir = os.path.join(root, gt_dir) - - inst_key = key.format(task="instance_seg") - DatasetCatalog.register( - inst_key, - lambda x=image_dir, y=gt_dir: load_cityscapes_instances( - x, y, from_json=True, to_polygons=True - ), - ) - MetadataCatalog.get(inst_key).set( - image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta - ) - - sem_key = key.format(task="sem_seg") - DatasetCatalog.register( - sem_key, lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y) - ) - MetadataCatalog.get(sem_key).set( - image_dir=image_dir, - gt_dir=gt_dir, - evaluator_type="cityscapes_sem_seg", - ignore_label=255, - **meta, - ) - - -# ==== Predefined splits for PASCAL VOC =========== -def register_all_pascal_voc(root): - SPLITS = [ - ("voc_2007_trainval", "VOC2007", "trainval"), - ("voc_2007_train", "VOC2007", "train"), - ("voc_2007_val", "VOC2007", "val"), - ("voc_2007_test", "VOC2007", "test"), - ("voc_2012_trainval", "VOC2012", "trainval"), - ("voc_2012_train", "VOC2012", "train"), - ("voc_2012_val", "VOC2012", "val"), - ] - for name, dirname, split in SPLITS: - year = 2007 if "2007" in name else 2012 - register_pascal_voc(name, os.path.join(root, dirname), split, year) - MetadataCatalog.get(name).evaluator_type = "pascal_voc" - - -def register_all_ade20k(root): - root = os.path.join(root, "ADEChallengeData2016") - for name, dirname in [("train", "training"), ("val", "validation")]: - image_dir = os.path.join(root, "images", dirname) - gt_dir = os.path.join(root, "annotations_detectron2", dirname) - name = f"ade20k_sem_seg_{name}" - DatasetCatalog.register( - name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg") - ) - MetadataCatalog.get(name).set( - stuff_classes=ADE20K_SEM_SEG_CATEGORIES[:], - image_root=image_dir, - sem_seg_root=gt_dir, - evaluator_type="sem_seg", - ignore_label=255, - ) - - -# True for open source; -# Internally at fb, we register them elsewhere -if __name__.endswith(".builtin"): - # Assume pre-defined datasets live in `./datasets`. - _root = os.path.expanduser(os.getenv("DETECTRON2_DATASETS", "datasets")) - register_all_coco(_root) - register_all_lvis(_root) - register_all_cityscapes(_root) - register_all_cityscapes_panoptic(_root) - register_all_pascal_voc(_root) - register_all_ade20k(_root) diff --git a/detectron2/detectron2/data/datasets/builtin_meta.py b/detectron2/detectron2/data/datasets/builtin_meta.py deleted file mode 100644 index 63c7a1a31b31dd89b82011effee26471faccacf5..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/builtin_meta.py +++ /dev/null @@ -1,350 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -Note: -For your custom dataset, there is no need to hard-code metadata anywhere in the code. -For example, for COCO-format dataset, metadata will be obtained automatically -when calling `load_coco_json`. For other dataset, metadata may also be obtained in other ways -during loading. - -However, we hard-coded metadata for a few common dataset here. -The only goal is to allow users who don't have these dataset to use pre-trained models. -Users don't have to download a COCO json (which contains metadata), in order to visualize a -COCO model (with correct class names and colors). -""" - - -# All coco categories, together with their nice-looking visualization colors -# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json -COCO_CATEGORIES = [ - {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"}, - {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"}, - {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"}, - {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"}, - {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"}, - {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"}, - {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"}, - {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"}, - {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"}, - {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"}, - {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"}, - {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"}, - {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"}, - {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"}, - {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"}, - {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"}, - {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"}, - {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"}, - {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"}, - {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"}, - {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"}, - {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"}, - {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"}, - {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"}, - {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"}, - {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"}, - {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"}, - {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"}, - {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"}, - {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"}, - {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"}, - {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"}, - {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"}, - {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"}, - {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"}, - {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"}, - {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"}, - {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"}, - {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"}, - {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"}, - {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"}, - {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"}, - {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"}, - {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"}, - {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"}, - {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"}, - {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"}, - {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"}, - {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"}, - {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"}, - {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"}, - {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"}, - {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"}, - {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"}, - {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"}, - {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"}, - {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"}, - {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"}, - {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"}, - {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"}, - {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"}, - {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"}, - {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"}, - {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"}, - {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"}, - {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"}, - {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"}, - {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"}, - {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"}, - {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"}, - {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"}, - {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"}, - {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"}, - {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"}, - {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"}, - {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"}, - {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"}, - {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"}, - {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"}, - {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"}, - {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"}, - {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"}, - {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"}, - {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"}, - {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"}, - {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"}, - {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"}, - {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"}, - {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"}, - {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"}, - {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"}, - {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"}, - {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"}, - {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"}, - {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"}, - {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"}, - {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"}, - {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"}, - {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"}, - {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"}, - {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"}, - {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"}, - {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"}, - {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"}, - {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"}, - {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"}, - {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"}, - {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"}, - {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"}, - {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"}, - {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"}, - {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"}, - {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"}, - {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"}, - {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"}, - {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"}, - {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"}, - {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"}, - {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"}, - {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"}, - {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"}, - {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"}, - {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"}, - {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"}, - {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"}, - {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"}, - {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"}, - {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"}, - {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"}, - {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"}, - {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"}, - {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"}, - {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"}, -] - -# fmt: off -COCO_PERSON_KEYPOINT_NAMES = ( - "nose", - "left_eye", "right_eye", - "left_ear", "right_ear", - "left_shoulder", "right_shoulder", - "left_elbow", "right_elbow", - "left_wrist", "right_wrist", - "left_hip", "right_hip", - "left_knee", "right_knee", - "left_ankle", "right_ankle", -) -# fmt: on - -# Pairs of keypoints that should be exchanged under horizontal flipping -COCO_PERSON_KEYPOINT_FLIP_MAP = ( - ("left_eye", "right_eye"), - ("left_ear", "right_ear"), - ("left_shoulder", "right_shoulder"), - ("left_elbow", "right_elbow"), - ("left_wrist", "right_wrist"), - ("left_hip", "right_hip"), - ("left_knee", "right_knee"), - ("left_ankle", "right_ankle"), -) - -# rules for pairs of keypoints to draw a line between, and the line color to use. -KEYPOINT_CONNECTION_RULES = [ - # face - ("left_ear", "left_eye", (102, 204, 255)), - ("right_ear", "right_eye", (51, 153, 255)), - ("left_eye", "nose", (102, 0, 204)), - ("nose", "right_eye", (51, 102, 255)), - # upper-body - ("left_shoulder", "right_shoulder", (255, 128, 0)), - ("left_shoulder", "left_elbow", (153, 255, 204)), - ("right_shoulder", "right_elbow", (128, 229, 255)), - ("left_elbow", "left_wrist", (153, 255, 153)), - ("right_elbow", "right_wrist", (102, 255, 224)), - # lower-body - ("left_hip", "right_hip", (255, 102, 0)), - ("left_hip", "left_knee", (255, 255, 77)), - ("right_hip", "right_knee", (153, 255, 204)), - ("left_knee", "left_ankle", (191, 255, 128)), - ("right_knee", "right_ankle", (255, 195, 77)), -] - -# All Cityscapes categories, together with their nice-looking visualization colors -# It's from https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py # noqa -CITYSCAPES_CATEGORIES = [ - {"color": (128, 64, 128), "isthing": 0, "id": 7, "trainId": 0, "name": "road"}, - {"color": (244, 35, 232), "isthing": 0, "id": 8, "trainId": 1, "name": "sidewalk"}, - {"color": (70, 70, 70), "isthing": 0, "id": 11, "trainId": 2, "name": "building"}, - {"color": (102, 102, 156), "isthing": 0, "id": 12, "trainId": 3, "name": "wall"}, - {"color": (190, 153, 153), "isthing": 0, "id": 13, "trainId": 4, "name": "fence"}, - {"color": (153, 153, 153), "isthing": 0, "id": 17, "trainId": 5, "name": "pole"}, - {"color": (250, 170, 30), "isthing": 0, "id": 19, "trainId": 6, "name": "traffic light"}, - {"color": (220, 220, 0), "isthing": 0, "id": 20, "trainId": 7, "name": "traffic sign"}, - {"color": (107, 142, 35), "isthing": 0, "id": 21, "trainId": 8, "name": "vegetation"}, - {"color": (152, 251, 152), "isthing": 0, "id": 22, "trainId": 9, "name": "terrain"}, - {"color": (70, 130, 180), "isthing": 0, "id": 23, "trainId": 10, "name": "sky"}, - {"color": (220, 20, 60), "isthing": 1, "id": 24, "trainId": 11, "name": "person"}, - {"color": (255, 0, 0), "isthing": 1, "id": 25, "trainId": 12, "name": "rider"}, - {"color": (0, 0, 142), "isthing": 1, "id": 26, "trainId": 13, "name": "car"}, - {"color": (0, 0, 70), "isthing": 1, "id": 27, "trainId": 14, "name": "truck"}, - {"color": (0, 60, 100), "isthing": 1, "id": 28, "trainId": 15, "name": "bus"}, - {"color": (0, 80, 100), "isthing": 1, "id": 31, "trainId": 16, "name": "train"}, - {"color": (0, 0, 230), "isthing": 1, "id": 32, "trainId": 17, "name": "motorcycle"}, - {"color": (119, 11, 32), "isthing": 1, "id": 33, "trainId": 18, "name": "bicycle"}, -] - -# fmt: off -ADE20K_SEM_SEG_CATEGORIES = [ - "wall", "building", "sky", "floor", "tree", "ceiling", "road, route", "bed", "window ", "grass", "cabinet", "sidewalk, pavement", "person", "earth, ground", "door", "table", "mountain, mount", "plant", "curtain", "chair", "car", "water", "painting, picture", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock, stone", "wardrobe, closet, press", "lamp", "tub", "rail", "cushion", "base, pedestal, stand", "box", "column, pillar", "signboard, sign", "chest of drawers, chest, bureau, dresser", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator, icebox", "grandstand, covered stand", "path", "stairs", "runway", "case, display case, showcase, vitrine", "pool table, billiard table, snooker table", "pillow", "screen door, screen", "stairway, staircase", "river", "bridge, span", "bookcase", "blind, screen", "coffee table", "toilet, can, commode, crapper, pot, potty, stool, throne", "flower", "book", "hill", "bench", "countertop", "stove", "palm, palm tree", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel, hut, hutch, shack, shanty", "bus", "towel", "light", "truck", "tower", "chandelier", "awning, sunshade, sunblind", "street lamp", "booth", "tv", "plane", "dirt track", "clothes", "pole", "land, ground, soil", "bannister, banister, balustrade, balusters, handrail", "escalator, moving staircase, moving stairway", "ottoman, pouf, pouffe, puff, hassock", "bottle", "buffet, counter, sideboard", "poster, posting, placard, notice, bill, card", "stage", "van", "ship", "fountain", "conveyer belt, conveyor belt, conveyer, conveyor, transporter", "canopy", "washer, automatic washer, washing machine", "plaything, toy", "pool", "stool", "barrel, cask", "basket, handbasket", "falls", "tent", "bag", "minibike, motorbike", "cradle", "oven", "ball", "food, solid food", "step, stair", "tank, storage tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket, cover", "sculpture", "hood, exhaust hood", "sconce", "vase", "traffic light", "tray", "trash can", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass, drinking glass", "clock", "flag", # noqa -] -# After processed by `prepare_ade20k_sem_seg.py`, id 255 means ignore -# fmt: on - - -def _get_coco_instances_meta(): - thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1] - thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1] - assert len(thing_ids) == 80, len(thing_ids) - # Mapping from the incontiguous COCO category id to an id in [0, 79] - thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} - thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1] - ret = { - "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, - "thing_classes": thing_classes, - "thing_colors": thing_colors, - } - return ret - - -def _get_coco_panoptic_separated_meta(): - """ - Returns metadata for "separated" version of the panoptic segmentation dataset. - """ - stuff_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 0] - assert len(stuff_ids) == 53, len(stuff_ids) - - # For semantic segmentation, this mapping maps from contiguous stuff id - # (in [0, 53], used in models) to ids in the dataset (used for processing results) - # The id 0 is mapped to an extra category "thing". - stuff_dataset_id_to_contiguous_id = {k: i + 1 for i, k in enumerate(stuff_ids)} - # When converting COCO panoptic annotations to semantic annotations - # We label the "thing" category to 0 - stuff_dataset_id_to_contiguous_id[0] = 0 - - # 54 names for COCO stuff categories (including "things") - stuff_classes = ["things"] + [ - k["name"].replace("-other", "").replace("-merged", "") - for k in COCO_CATEGORIES - if k["isthing"] == 0 - ] - - # NOTE: I randomly picked a color for things - stuff_colors = [[82, 18, 128]] + [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 0] - ret = { - "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id, - "stuff_classes": stuff_classes, - "stuff_colors": stuff_colors, - } - ret.update(_get_coco_instances_meta()) - return ret - - -def _get_builtin_metadata(dataset_name): - if dataset_name == "coco": - return _get_coco_instances_meta() - if dataset_name == "coco_panoptic_separated": - return _get_coco_panoptic_separated_meta() - elif dataset_name == "coco_panoptic_standard": - meta = {} - # The following metadata maps contiguous id from [0, #thing categories + - # #stuff categories) to their names and colors. We have to replica of the - # same name and color under "thing_*" and "stuff_*" because the current - # visualization function in D2 handles thing and class classes differently - # due to some heuristic used in Panoptic FPN. We keep the same naming to - # enable reusing existing visualization functions. - thing_classes = [k["name"] for k in COCO_CATEGORIES] - thing_colors = [k["color"] for k in COCO_CATEGORIES] - stuff_classes = [k["name"] for k in COCO_CATEGORIES] - stuff_colors = [k["color"] for k in COCO_CATEGORIES] - - meta["thing_classes"] = thing_classes - meta["thing_colors"] = thing_colors - meta["stuff_classes"] = stuff_classes - meta["stuff_colors"] = stuff_colors - - # Convert category id for training: - # category id: like semantic segmentation, it is the class id for each - # pixel. Since there are some classes not used in evaluation, the category - # id is not always contiguous and thus we have two set of category ids: - # - original category id: category id in the original dataset, mainly - # used for evaluation. - # - contiguous category id: [0, #classes), in order to train the linear - # softmax classifier. - thing_dataset_id_to_contiguous_id = {} - stuff_dataset_id_to_contiguous_id = {} - - for i, cat in enumerate(COCO_CATEGORIES): - if cat["isthing"]: - thing_dataset_id_to_contiguous_id[cat["id"]] = i - else: - stuff_dataset_id_to_contiguous_id[cat["id"]] = i - - meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id - meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id - - return meta - elif dataset_name == "coco_person": - return { - "thing_classes": ["person"], - "keypoint_names": COCO_PERSON_KEYPOINT_NAMES, - "keypoint_flip_map": COCO_PERSON_KEYPOINT_FLIP_MAP, - "keypoint_connection_rules": KEYPOINT_CONNECTION_RULES, - } - elif dataset_name == "cityscapes": - # fmt: off - CITYSCAPES_THING_CLASSES = [ - "person", "rider", "car", "truck", - "bus", "train", "motorcycle", "bicycle", - ] - CITYSCAPES_STUFF_CLASSES = [ - "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light", - "traffic sign", "vegetation", "terrain", "sky", "person", "rider", "car", - "truck", "bus", "train", "motorcycle", "bicycle", - ] - # fmt: on - return { - "thing_classes": CITYSCAPES_THING_CLASSES, - "stuff_classes": CITYSCAPES_STUFF_CLASSES, - } - raise KeyError("No built-in metadata for dataset {}".format(dataset_name)) diff --git a/detectron2/detectron2/data/datasets/cityscapes.py b/detectron2/detectron2/data/datasets/cityscapes.py deleted file mode 100644 index 73cd0e0061e32c2c39b722b262100083ff39eb99..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/cityscapes.py +++ /dev/null @@ -1,337 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import functools -import json -import logging -import multiprocessing as mp -import numpy as np -import os -from itertools import chain -import pycocotools.mask as mask_util -from PIL import Image - -from detectron2.structures import BoxMode -from detectron2.utils.comm import get_world_size -from detectron2.utils.file_io import PathManager -from detectron2.utils.logger import setup_logger - -try: - import cv2 # noqa -except ImportError: - # OpenCV is an optional dependency at the moment - pass - - -logger = logging.getLogger(__name__) - - -def _get_cityscapes_files(image_dir, gt_dir): - files = [] - # scan through the directory - cities = PathManager.ls(image_dir) - logger.info(f"{len(cities)} cities found in '{image_dir}'.") - for city in cities: - city_img_dir = os.path.join(image_dir, city) - city_gt_dir = os.path.join(gt_dir, city) - for basename in PathManager.ls(city_img_dir): - image_file = os.path.join(city_img_dir, basename) - - suffix = "leftImg8bit.png" - assert basename.endswith(suffix), basename - basename = basename[: -len(suffix)] - - instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png") - label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png") - json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json") - - files.append((image_file, instance_file, label_file, json_file)) - assert len(files), "No images found in {}".format(image_dir) - for f in files[0]: - assert PathManager.isfile(f), f - return files - - -def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True): - """ - Args: - image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". - gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". - from_json (bool): whether to read annotations from the raw json file or the png files. - to_polygons (bool): whether to represent the segmentation as polygons - (COCO's format) instead of masks (cityscapes's format). - - Returns: - list[dict]: a list of dicts in Detectron2 standard format. (See - `Using Custom Datasets `_ ) - """ - if from_json: - assert to_polygons, ( - "Cityscapes's json annotations are in polygon format. " - "Converting to mask format is not supported now." - ) - files = _get_cityscapes_files(image_dir, gt_dir) - - logger.info("Preprocessing cityscapes annotations ...") - # This is still not fast: all workers will execute duplicate works and will - # take up to 10m on a 8GPU server. - pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4)) - - ret = pool.map( - functools.partial(_cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons), - files, - ) - logger.info("Loaded {} images from {}".format(len(ret), image_dir)) - - # Map cityscape ids to contiguous ids - from deeplearning.projects.cityscapesApi.cityscapesscripts.helpers.labels import labels - - labels = [l for l in labels if l.hasInstances and not l.ignoreInEval] - dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)} - for dict_per_image in ret: - for anno in dict_per_image["annotations"]: - anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]] - return ret - - -def load_cityscapes_semantic(image_dir, gt_dir): - """ - Args: - image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". - gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". - - Returns: - list[dict]: a list of dict, each has "file_name" and - "sem_seg_file_name". - """ - ret = [] - # gt_dir is small and contain many small files. make sense to fetch to local first - gt_dir = PathManager.get_local_path(gt_dir) - for image_file, _, label_file, json_file in _get_cityscapes_files(image_dir, gt_dir): - label_file = label_file.replace("labelIds", "labelTrainIds") - - with PathManager.open(json_file, "r") as f: - jsonobj = json.load(f) - ret.append( - { - "file_name": image_file, - "sem_seg_file_name": label_file, - "height": jsonobj["imgHeight"], - "width": jsonobj["imgWidth"], - } - ) - assert len(ret), f"No images found in {image_dir}!" - assert PathManager.isfile( - ret[0]["sem_seg_file_name"] - ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa - return ret - - -def _cityscapes_files_to_dict(files, from_json, to_polygons): - """ - Parse cityscapes annotation files to a instance segmentation dataset dict. - - Args: - files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file) - from_json (bool): whether to read annotations from the raw json file or the png files. - to_polygons (bool): whether to represent the segmentation as polygons - (COCO's format) instead of masks (cityscapes's format). - - Returns: - A dict in Detectron2 Dataset format. - """ - from deeplearning.projects.cityscapesApi.cityscapesscripts.helpers.labels import ( - id2label, - name2label, - ) - - image_file, instance_id_file, _, json_file = files - - annos = [] - - if from_json: - from shapely.geometry import MultiPolygon, Polygon - - with PathManager.open(json_file, "r") as f: - jsonobj = json.load(f) - ret = { - "file_name": image_file, - "image_id": os.path.basename(image_file), - "height": jsonobj["imgHeight"], - "width": jsonobj["imgWidth"], - } - - # `polygons_union` contains the union of all valid polygons. - polygons_union = Polygon() - - # CityscapesScripts draw the polygons in sequential order - # and each polygon *overwrites* existing ones. See - # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa - # We use reverse order, and each polygon *avoids* early ones. - # This will resolve the ploygon overlaps in the same way as CityscapesScripts. - for obj in jsonobj["objects"][::-1]: - if "deleted" in obj: # cityscapes data format specific - continue - label_name = obj["label"] - - try: - label = name2label[label_name] - except KeyError: - if label_name.endswith("group"): # crowd area - label = name2label[label_name[: -len("group")]] - else: - raise - if label.id < 0: # cityscapes data format - continue - - # Cityscapes's raw annotations uses integer coordinates - # Therefore +0.5 here - poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5 - # CityscapesScript uses PIL.ImageDraw.polygon to rasterize - # polygons for evaluation. This function operates in integer space - # and draws each pixel whose center falls into the polygon. - # Therefore it draws a polygon which is 0.5 "fatter" in expectation. - # We therefore dilate the input polygon by 0.5 as our input. - poly = Polygon(poly_coord).buffer(0.5, resolution=4) - - if not label.hasInstances or label.ignoreInEval: - # even if we won't store the polygon it still contributes to overlaps resolution - polygons_union = polygons_union.union(poly) - continue - - # Take non-overlapping part of the polygon - poly_wo_overlaps = poly.difference(polygons_union) - if poly_wo_overlaps.is_empty: - continue - polygons_union = polygons_union.union(poly) - - anno = {} - anno["iscrowd"] = label_name.endswith("group") - anno["category_id"] = label.id - - if isinstance(poly_wo_overlaps, Polygon): - poly_list = [poly_wo_overlaps] - elif isinstance(poly_wo_overlaps, MultiPolygon): - poly_list = poly_wo_overlaps.geoms - else: - raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps)) - - poly_coord = [] - for poly_el in poly_list: - # COCO API can work only with exterior boundaries now, hence we store only them. - # TODO: store both exterior and interior boundaries once other parts of the - # codebase support holes in polygons. - poly_coord.append(list(chain(*poly_el.exterior.coords))) - anno["segmentation"] = poly_coord - (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds - - anno["bbox"] = (xmin, ymin, xmax, ymax) - anno["bbox_mode"] = BoxMode.XYXY_ABS - - annos.append(anno) - else: - # See also the official annotation parsing scripts at - # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa - with PathManager.open(instance_id_file, "rb") as f: - inst_image = np.asarray(Image.open(f), order="F") - # ids < 24 are stuff labels (filtering them first is about 5% faster) - flattened_ids = np.unique(inst_image[inst_image >= 24]) - - ret = { - "file_name": image_file, - "image_id": os.path.basename(image_file), - "height": inst_image.shape[0], - "width": inst_image.shape[1], - } - - for instance_id in flattened_ids: - # For non-crowd annotations, instance_id // 1000 is the label_id - # Crowd annotations have <1000 instance ids - label_id = instance_id // 1000 if instance_id >= 1000 else instance_id - label = id2label[label_id] - if not label.hasInstances or label.ignoreInEval: - continue - - anno = {} - anno["iscrowd"] = instance_id < 1000 - anno["category_id"] = label.id - - mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F") - - inds = np.nonzero(mask) - ymin, ymax = inds[0].min(), inds[0].max() - xmin, xmax = inds[1].min(), inds[1].max() - anno["bbox"] = (xmin, ymin, xmax, ymax) - if xmax <= xmin or ymax <= ymin: - continue - anno["bbox_mode"] = BoxMode.XYXY_ABS - if to_polygons: - # This conversion comes from D4809743 and D5171122, - # when Mask-RCNN was first developed. - contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[ - -2 - ] - polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3] - # opencv's can produce invalid polygons - if len(polygons) == 0: - continue - anno["segmentation"] = polygons - else: - anno["segmentation"] = mask_util.encode(mask[:, :, None])[0] - annos.append(anno) - ret["annotations"] = annos - return ret - - -def main() -> None: - global logger, labels - """ - Test the cityscapes dataset loader. - - Usage: - python -m detectron2.data.datasets.cityscapes \ - cityscapes/leftImg8bit/train cityscapes/gtFine/train - """ - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument("image_dir") - parser.add_argument("gt_dir") - parser.add_argument("--type", choices=["instance", "semantic"], default="instance") - args = parser.parse_args() - from deeplearning.projects.cityscapesApi.cityscapesscripts.helpers.labels import labels - from detectron2.data.catalog import Metadata - from detectron2.utils.visualizer import Visualizer - - logger = setup_logger(name=__name__) - - dirname = "cityscapes-data-vis" - os.makedirs(dirname, exist_ok=True) - - if args.type == "instance": - dicts = load_cityscapes_instances( - args.image_dir, args.gt_dir, from_json=True, to_polygons=True - ) - logger.info("Done loading {} samples.".format(len(dicts))) - - thing_classes = [k.name for k in labels if k.hasInstances and not k.ignoreInEval] - meta = Metadata().set(thing_classes=thing_classes) - - else: - dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir) - logger.info("Done loading {} samples.".format(len(dicts))) - - stuff_classes = [k.name for k in labels if k.trainId != 255] - stuff_colors = [k.color for k in labels if k.trainId != 255] - meta = Metadata().set(stuff_classes=stuff_classes, stuff_colors=stuff_colors) - - for d in dicts: - img = np.array(Image.open(PathManager.open(d["file_name"], "rb"))) - visualizer = Visualizer(img, metadata=meta) - vis = visualizer.draw_dataset_dict(d) - # cv2.imshow("a", vis.get_image()[:, :, ::-1]) - # cv2.waitKey() - fpath = os.path.join(dirname, os.path.basename(d["file_name"])) - vis.save(fpath) - - -if __name__ == "__main__": - main() # pragma: no cover diff --git a/detectron2/detectron2/data/datasets/cityscapes_panoptic.py b/detectron2/detectron2/data/datasets/cityscapes_panoptic.py deleted file mode 100644 index 48c136f1623261b079591065fec7c7fc38165076..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/cityscapes_panoptic.py +++ /dev/null @@ -1,187 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import json -import logging -import os - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.data.datasets.builtin_meta import CITYSCAPES_CATEGORIES -from detectron2.utils.file_io import PathManager - -""" -This file contains functions to register the Cityscapes panoptic dataset to the DatasetCatalog. -""" - - -logger = logging.getLogger(__name__) - - -def get_cityscapes_panoptic_files(image_dir, gt_dir, json_info): - files = [] - # scan through the directory - cities = PathManager.ls(image_dir) - logger.info(f"{len(cities)} cities found in '{image_dir}'.") - image_dict = {} - for city in cities: - city_img_dir = os.path.join(image_dir, city) - for basename in PathManager.ls(city_img_dir): - image_file = os.path.join(city_img_dir, basename) - - suffix = "_leftImg8bit.png" - assert basename.endswith(suffix), basename - basename = os.path.basename(basename)[: -len(suffix)] - - image_dict[basename] = image_file - - for ann in json_info["annotations"]: - image_file = image_dict.get(ann["image_id"], None) - assert image_file is not None, "No image {} found for annotation {}".format( - ann["image_id"], ann["file_name"] - ) - label_file = os.path.join(gt_dir, ann["file_name"]) - segments_info = ann["segments_info"] - - files.append((image_file, label_file, segments_info)) - - assert len(files), "No images found in {}".format(image_dir) - assert PathManager.isfile(files[0][0]), files[0][0] - assert PathManager.isfile(files[0][1]), files[0][1] - return files - - -def load_cityscapes_panoptic(image_dir, gt_dir, gt_json, meta): - """ - Args: - image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". - gt_dir (str): path to the raw annotations. e.g., - "~/cityscapes/gtFine/cityscapes_panoptic_train". - gt_json (str): path to the json file. e.g., - "~/cityscapes/gtFine/cityscapes_panoptic_train.json". - meta (dict): dictionary containing "thing_dataset_id_to_contiguous_id" - and "stuff_dataset_id_to_contiguous_id" to map category ids to - contiguous ids for training. - - Returns: - list[dict]: a list of dicts in Detectron2 standard format. (See - `Using Custom Datasets `_ ) - """ - - def _convert_category_id(segment_info, meta): - if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]: - segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ - segment_info["category_id"] - ] - else: - segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][ - segment_info["category_id"] - ] - return segment_info - - assert os.path.exists( - gt_json - ), "Please run `python cityscapesscripts/preparation/createPanopticImgs.py` to generate label files." # noqa - with open(gt_json) as f: - json_info = json.load(f) - files = get_cityscapes_panoptic_files(image_dir, gt_dir, json_info) - ret = [] - for image_file, label_file, segments_info in files: - sem_label_file = ( - image_file.replace("leftImg8bit", "gtFine").split(".")[0] + "_labelTrainIds.png" - ) - segments_info = [_convert_category_id(x, meta) for x in segments_info] - ret.append( - { - "file_name": image_file, - "image_id": "_".join( - os.path.splitext(os.path.basename(image_file))[0].split("_")[:3] - ), - "sem_seg_file_name": sem_label_file, - "pan_seg_file_name": label_file, - "segments_info": segments_info, - } - ) - assert len(ret), f"No images found in {image_dir}!" - assert PathManager.isfile( - ret[0]["sem_seg_file_name"] - ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa - assert PathManager.isfile( - ret[0]["pan_seg_file_name"] - ), "Please generate panoptic annotation with python cityscapesscripts/preparation/createPanopticImgs.py" # noqa - return ret - - -_RAW_CITYSCAPES_PANOPTIC_SPLITS = { - "cityscapes_fine_panoptic_train": ( - "cityscapes/leftImg8bit/train", - "cityscapes/gtFine/cityscapes_panoptic_train", - "cityscapes/gtFine/cityscapes_panoptic_train.json", - ), - "cityscapes_fine_panoptic_val": ( - "cityscapes/leftImg8bit/val", - "cityscapes/gtFine/cityscapes_panoptic_val", - "cityscapes/gtFine/cityscapes_panoptic_val.json", - ), - # "cityscapes_fine_panoptic_test": not supported yet -} - - -def register_all_cityscapes_panoptic(root): - meta = {} - # The following metadata maps contiguous id from [0, #thing categories + - # #stuff categories) to their names and colors. We have to replica of the - # same name and color under "thing_*" and "stuff_*" because the current - # visualization function in D2 handles thing and class classes differently - # due to some heuristic used in Panoptic FPN. We keep the same naming to - # enable reusing existing visualization functions. - thing_classes = [k["name"] for k in CITYSCAPES_CATEGORIES] - thing_colors = [k["color"] for k in CITYSCAPES_CATEGORIES] - stuff_classes = [k["name"] for k in CITYSCAPES_CATEGORIES] - stuff_colors = [k["color"] for k in CITYSCAPES_CATEGORIES] - - meta["thing_classes"] = thing_classes - meta["thing_colors"] = thing_colors - meta["stuff_classes"] = stuff_classes - meta["stuff_colors"] = stuff_colors - - # There are three types of ids in cityscapes panoptic segmentation: - # (1) category id: like semantic segmentation, it is the class id for each - # pixel. Since there are some classes not used in evaluation, the category - # id is not always contiguous and thus we have two set of category ids: - # - original category id: category id in the original dataset, mainly - # used for evaluation. - # - contiguous category id: [0, #classes), in order to train the classifier - # (2) instance id: this id is used to differentiate different instances from - # the same category. For "stuff" classes, the instance id is always 0; for - # "thing" classes, the instance id starts from 1 and 0 is reserved for - # ignored instances (e.g. crowd annotation). - # (3) panoptic id: this is the compact id that encode both category and - # instance id by: category_id * 1000 + instance_id. - thing_dataset_id_to_contiguous_id = {} - stuff_dataset_id_to_contiguous_id = {} - - for k in CITYSCAPES_CATEGORIES: - if k["isthing"] == 1: - thing_dataset_id_to_contiguous_id[k["id"]] = k["trainId"] - else: - stuff_dataset_id_to_contiguous_id[k["id"]] = k["trainId"] - - meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id - meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id - - for key, (image_dir, gt_dir, gt_json) in _RAW_CITYSCAPES_PANOPTIC_SPLITS.items(): - image_dir = os.path.join(root, image_dir) - gt_dir = os.path.join(root, gt_dir) - gt_json = os.path.join(root, gt_json) - - DatasetCatalog.register( - key, lambda x=image_dir, y=gt_dir, z=gt_json: load_cityscapes_panoptic(x, y, z, meta) - ) - MetadataCatalog.get(key).set( - panoptic_root=gt_dir, - image_root=image_dir, - panoptic_json=gt_json, - gt_dir=gt_dir.replace("cityscapes_panoptic_", ""), - evaluator_type="cityscapes_panoptic_seg", - ignore_label=255, - label_divisor=1000, - **meta, - ) diff --git a/detectron2/detectron2/data/datasets/coco.py b/detectron2/detectron2/data/datasets/coco.py deleted file mode 100644 index 51721d6c7468f9d8320868a4d8fd7672fa410eb8..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/coco.py +++ /dev/null @@ -1,556 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import contextlib -import datetime -import io -import json -import logging -import numpy as np -import os -import shutil -import pycocotools.mask as mask_util -from fvcore.common.timer import Timer -from iopath.common.file_io import file_lock -from PIL import Image - -from detectron2.structures import Boxes, BoxMode, PolygonMasks, RotatedBoxes -from detectron2.utils.file_io import PathManager - -from .. import DatasetCatalog, MetadataCatalog - -""" -This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format". -""" - - -logger = logging.getLogger(__name__) - -__all__ = [ - "load_coco_json", - "load_sem_seg", - "convert_to_coco_json", - "register_coco_instances", -] - - -def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): - """ - Load a json file with COCO's instances annotation format. - Currently supports instance detection, instance segmentation, - and person keypoints annotations. - - Args: - json_file (str): full path to the json file in COCO instances annotation format. - image_root (str or path-like): the directory where the images in this json file exists. - dataset_name (str or None): the name of the dataset (e.g., coco_2017_train). - When provided, this function will also do the following: - - * Put "thing_classes" into the metadata associated with this dataset. - * Map the category ids into a contiguous range (needed by standard dataset format), - and add "thing_dataset_id_to_contiguous_id" to the metadata associated - with this dataset. - - This option should usually be provided, unless users need to load - the original json content and apply more processing manually. - extra_annotation_keys (list[str]): list of per-annotation keys that should also be - loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", - "category_id", "segmentation"). The values for these keys will be returned as-is. - For example, the densepose annotations are loaded in this way. - - Returns: - list[dict]: a list of dicts in Detectron2 standard dataset dicts format (See - `Using Custom Datasets `_ ) when `dataset_name` is not None. - If `dataset_name` is None, the returned `category_ids` may be - incontiguous and may not conform to the Detectron2 standard format. - - Notes: - 1. This function does not read the image files. - The results do not have the "image" field. - """ - from pycocotools.coco import COCO - - timer = Timer() - json_file = PathManager.get_local_path(json_file) - with contextlib.redirect_stdout(io.StringIO()): - coco_api = COCO(json_file) - if timer.seconds() > 1: - logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) - - id_map = None - if dataset_name is not None: - meta = MetadataCatalog.get(dataset_name) - cat_ids = sorted(coco_api.getCatIds()) - cats = coco_api.loadCats(cat_ids) - # The categories in a custom json file may not be sorted. - thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])] - meta.thing_classes = thing_classes - - # In COCO, certain category ids are artificially removed, - # and by convention they are always ignored. - # We deal with COCO's id issue and translate - # the category ids to contiguous ids in [0, 80). - - # It works by looking at the "categories" field in the json, therefore - # if users' own json also have incontiguous ids, we'll - # apply this mapping as well but print a warning. - if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): - if "coco" not in dataset_name: - logger.warning( - """ -Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. -""" - ) - id_map = {v: i for i, v in enumerate(cat_ids)} - meta.thing_dataset_id_to_contiguous_id = id_map - - # sort indices for reproducible results - img_ids = sorted(coco_api.imgs.keys()) - # imgs is a list of dicts, each looks something like: - # {'license': 4, - # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', - # 'file_name': 'COCO_val2014_000000001268.jpg', - # 'height': 427, - # 'width': 640, - # 'date_captured': '2013-11-17 05:57:24', - # 'id': 1268} - imgs = coco_api.loadImgs(img_ids) - # anns is a list[list[dict]], where each dict is an annotation - # record for an object. The inner list enumerates the objects in an image - # and the outer list enumerates over images. Example of anns[0]: - # [{'segmentation': [[192.81, - # 247.09, - # ... - # 219.03, - # 249.06]], - # 'area': 1035.749, - # 'iscrowd': 0, - # 'image_id': 1268, - # 'bbox': [192.81, 224.8, 74.73, 33.43], - # 'category_id': 16, - # 'id': 42986}, - # ...] - anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] - total_num_valid_anns = sum([len(x) for x in anns]) - total_num_anns = len(coco_api.anns) - if total_num_valid_anns < total_num_anns: - logger.warning( - f"{json_file} contains {total_num_anns} annotations, but only " - f"{total_num_valid_anns} of them match to images in the file." - ) - - if "minival" not in json_file: - # The popular valminusminival & minival annotations for COCO2014 contain this bug. - # However the ratio of buggy annotations there is tiny and does not affect accuracy. - # Therefore we explicitly white-list them. - ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] - assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( - json_file - ) - - imgs_anns = list(zip(imgs, anns)) - logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file)) - - dataset_dicts = [] - - ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or []) - - num_instances_without_valid_segmentation = 0 - - for img_dict, anno_dict_list in imgs_anns: - record = {} - record["file_name"] = os.path.join(image_root, img_dict["file_name"]) - record["height"] = img_dict["height"] - record["width"] = img_dict["width"] - image_id = record["image_id"] = img_dict["id"] - - objs = [] - for anno in anno_dict_list: - # Check that the image_id in this annotation is the same as - # the image_id we're looking at. - # This fails only when the data parsing logic or the annotation file is buggy. - - # The original COCO valminusminival2014 & minival2014 annotation files - # actually contains bugs that, together with certain ways of using COCO API, - # can trigger this assertion. - assert anno["image_id"] == image_id - - assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.' - - obj = {key: anno[key] for key in ann_keys if key in anno} - if "bbox" in obj and len(obj["bbox"]) == 0: - raise ValueError( - f"One annotation of image {image_id} contains empty 'bbox' value! " - "This json does not have valid COCO format." - ) - - segm = anno.get("segmentation", None) - if segm: # either list[list[float]] or dict(RLE) - if isinstance(segm, dict): - if isinstance(segm["counts"], list): - # convert to compressed RLE - segm = mask_util.frPyObjects(segm, *segm["size"]) - else: - # filter out invalid polygons (< 3 points) - segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] - if len(segm) == 0: - num_instances_without_valid_segmentation += 1 - continue # ignore this instance - obj["segmentation"] = segm - - keypts = anno.get("keypoints", None) - if keypts: # list[int] - for idx, v in enumerate(keypts): - if idx % 3 != 2: - # COCO's segmentation coordinates are floating points in [0, H or W], - # but keypoint coordinates are integers in [0, H-1 or W-1] - # Therefore we assume the coordinates are "pixel indices" and - # add 0.5 to convert to floating point coordinates. - keypts[idx] = v + 0.5 - obj["keypoints"] = keypts - - obj["bbox_mode"] = BoxMode.XYWH_ABS - if id_map: - annotation_category_id = obj["category_id"] - try: - obj["category_id"] = id_map[annotation_category_id] - except KeyError as e: - raise KeyError( - f"Encountered category_id={annotation_category_id} " - "but this id does not exist in 'categories' of the json file." - ) from e - objs.append(obj) - record["annotations"] = objs - dataset_dicts.append(record) - - if num_instances_without_valid_segmentation > 0: - logger.warning( - "Filtered out {} instances without valid segmentation. ".format( - num_instances_without_valid_segmentation - ) - + "There might be issues in your dataset generation process. Please " - "check https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html carefully" - ) - return dataset_dicts - - -def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"): - """ - Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are - treated as ground truth annotations and all files under "image_root" with "image_ext" extension - as input images. Ground truth and input images are matched using file paths relative to - "gt_root" and "image_root" respectively without taking into account file extensions. - This works for COCO as well as some other datasets. - - Args: - gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation - annotations are stored as images with integer values in pixels that represent - corresponding semantic labels. - image_root (str): the directory where the input images are. - gt_ext (str): file extension for ground truth annotations. - image_ext (str): file extension for input images. - - Returns: - list[dict]: - a list of dicts in detectron2 standard format without instance-level - annotation. - - Notes: - 1. This function does not read the image and ground truth files. - The results do not have the "image" and "sem_seg" fields. - """ - - # We match input images with ground truth based on their relative filepaths (without file - # extensions) starting from 'image_root' and 'gt_root' respectively. - def file2id(folder_path, file_path): - # extract relative path starting from `folder_path` - image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path)) - # remove file extension - image_id = os.path.splitext(image_id)[0] - return image_id - - input_files = sorted( - (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)), - key=lambda file_path: file2id(image_root, file_path), - ) - gt_files = sorted( - (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)), - key=lambda file_path: file2id(gt_root, file_path), - ) - - assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root) - - # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images - if len(input_files) != len(gt_files): - logger.warn( - "Directory {} and {} has {} and {} files, respectively.".format( - image_root, gt_root, len(input_files), len(gt_files) - ) - ) - input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files] - gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files] - intersect = list(set(input_basenames) & set(gt_basenames)) - # sort, otherwise each worker may obtain a list[dict] in different order - intersect = sorted(intersect) - logger.warn("Will use their intersection of {} files.".format(len(intersect))) - input_files = [os.path.join(image_root, f + image_ext) for f in intersect] - gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect] - - logger.info( - "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root) - ) - - dataset_dicts = [] - for img_path, gt_path in zip(input_files, gt_files): - record = {} - record["file_name"] = img_path - record["sem_seg_file_name"] = gt_path - dataset_dicts.append(record) - - return dataset_dicts - - -def convert_to_coco_dict(dataset_name): - """ - Convert an instance detection/segmentation or keypoint detection dataset - in detectron2's standard format into COCO json format. - - Generic dataset description can be found here: - https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset - - COCO data format description can be found here: - http://cocodataset.org/#format-data - - Args: - dataset_name (str): - name of the source dataset - Must be registered in DatastCatalog and in detectron2's standard format. - Must have corresponding metadata "thing_classes" - Returns: - coco_dict: serializable dict in COCO json format - """ - - dataset_dicts = DatasetCatalog.get(dataset_name) - metadata = MetadataCatalog.get(dataset_name) - - # unmap the category mapping ids for COCO - if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): - reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()} - reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id] # noqa - else: - reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa - - categories = [ - {"id": reverse_id_mapper(id), "name": name} - for id, name in enumerate(metadata.thing_classes) - ] - - logger.info("Converting dataset dicts into COCO format") - coco_images = [] - coco_annotations = [] - - for image_id, image_dict in enumerate(dataset_dicts): - coco_image = { - "id": image_dict.get("image_id", image_id), - "width": int(image_dict["width"]), - "height": int(image_dict["height"]), - "file_name": str(image_dict["file_name"]), - } - coco_images.append(coco_image) - - anns_per_image = image_dict.get("annotations", []) - for annotation in anns_per_image: - # create a new dict with only COCO fields - coco_annotation = {} - - # COCO requirement: XYWH box format for axis-align and XYWHA for rotated - bbox = annotation["bbox"] - if isinstance(bbox, np.ndarray): - if bbox.ndim != 1: - raise ValueError(f"bbox has to be 1-dimensional. Got shape={bbox.shape}.") - bbox = bbox.tolist() - if len(bbox) not in [4, 5]: - raise ValueError(f"bbox has to has length 4 or 5. Got {bbox}.") - from_bbox_mode = annotation["bbox_mode"] - to_bbox_mode = BoxMode.XYWH_ABS if len(bbox) == 4 else BoxMode.XYWHA_ABS - bbox = BoxMode.convert(bbox, from_bbox_mode, to_bbox_mode) - - # COCO requirement: instance area - if "segmentation" in annotation: - # Computing areas for instances by counting the pixels - segmentation = annotation["segmentation"] - # TODO: check segmentation type: RLE, BinaryMask or Polygon - if isinstance(segmentation, list): - polygons = PolygonMasks([segmentation]) - area = polygons.area()[0].item() - elif isinstance(segmentation, dict): # RLE - area = mask_util.area(segmentation).item() - else: - raise TypeError(f"Unknown segmentation type {type(segmentation)}!") - else: - # Computing areas using bounding boxes - if to_bbox_mode == BoxMode.XYWH_ABS: - bbox_xy = BoxMode.convert(bbox, to_bbox_mode, BoxMode.XYXY_ABS) - area = Boxes([bbox_xy]).area()[0].item() - else: - area = RotatedBoxes([bbox]).area()[0].item() - - if "keypoints" in annotation: - keypoints = annotation["keypoints"] # list[int] - for idx, v in enumerate(keypoints): - if idx % 3 != 2: - # COCO's segmentation coordinates are floating points in [0, H or W], - # but keypoint coordinates are integers in [0, H-1 or W-1] - # For COCO format consistency we substract 0.5 - # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163 - keypoints[idx] = v - 0.5 - if "num_keypoints" in annotation: - num_keypoints = annotation["num_keypoints"] - else: - num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) - - # COCO requirement: - # linking annotations to images - # "id" field must start with 1 - coco_annotation["id"] = len(coco_annotations) + 1 - coco_annotation["image_id"] = coco_image["id"] - coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] - coco_annotation["area"] = float(area) - coco_annotation["iscrowd"] = int(annotation.get("iscrowd", 0)) - coco_annotation["category_id"] = int(reverse_id_mapper(annotation["category_id"])) - - # Add optional fields - if "keypoints" in annotation: - coco_annotation["keypoints"] = keypoints - coco_annotation["num_keypoints"] = num_keypoints - - if "segmentation" in annotation: - seg = coco_annotation["segmentation"] = annotation["segmentation"] - if isinstance(seg, dict): # RLE - counts = seg["counts"] - if not isinstance(counts, str): - # make it json-serializable - seg["counts"] = counts.decode("ascii") - - coco_annotations.append(coco_annotation) - - logger.info( - "Conversion finished, " - f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}" - ) - - info = { - "date_created": str(datetime.datetime.now()), - "description": "Automatically generated COCO json file for Detectron2.", - } - coco_dict = { - "info": info, - "images": coco_images, - "categories": categories, - "licenses": None, - } - if len(coco_annotations) > 0: - coco_dict["annotations"] = coco_annotations - return coco_dict - - -def convert_to_coco_json(dataset_name, output_file, allow_cached=True): - """ - Converts dataset into COCO format and saves it to a json file. - dataset_name must be registered in DatasetCatalog and in detectron2's standard format. - - Args: - dataset_name: - reference from the config file to the catalogs - must be registered in DatasetCatalog and in detectron2's standard format - output_file: path of json file that will be saved to - allow_cached: if json file is already present then skip conversion - """ - - # TODO: The dataset or the conversion script *may* change, - # a checksum would be useful for validating the cached data - - PathManager.mkdirs(os.path.dirname(output_file)) - with file_lock(output_file): - if PathManager.exists(output_file) and allow_cached: - logger.warning( - f"Using previously cached COCO format annotations at '{output_file}'. " - "You need to clear the cache file if your dataset has been modified." - ) - else: - logger.info(f"Converting annotations of dataset '{dataset_name}' to COCO format ...)") - coco_dict = convert_to_coco_dict(dataset_name) - - logger.info(f"Caching COCO format annotations at '{output_file}' ...") - tmp_file = output_file + ".tmp" - with PathManager.open(tmp_file, "w") as f: - json.dump(coco_dict, f) - shutil.move(tmp_file, output_file) - - -def register_coco_instances(name, metadata, json_file, image_root): - """ - Register a dataset in COCO's json annotation format for - instance detection, instance segmentation and keypoint detection. - (i.e., Type 1 and 2 in http://cocodataset.org/#format-data. - `instances*.json` and `person_keypoints*.json` in the dataset). - - This is an example of how to register a new dataset. - You can do something similar to this function, to register new datasets. - - Args: - name (str): the name that identifies a dataset, e.g. "coco_2014_train". - metadata (dict): extra metadata associated with this dataset. You can - leave it as an empty dict. - json_file (str): path to the json instance annotation file. - image_root (str or path-like): directory which contains all the images. - """ - assert isinstance(name, str), name - assert isinstance(json_file, (str, os.PathLike)), json_file - assert isinstance(image_root, (str, os.PathLike)), image_root - # 1. register a function which returns dicts - if name not in DatasetCatalog: - DatasetCatalog.register(name, lambda: load_coco_json(json_file, image_root, name)) - - # 2. Optionally, add metadata about this dataset, - # since they might be useful in evaluation, visualization or logging - MetadataCatalog.get(name).set( - json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata - ) - - -def main() -> None: - global logger - """ - Test the COCO json dataset loader. - - Usage: - python -m detectron2.data.datasets.coco \ - path/to/json path/to/image_root dataset_name - - "dataset_name" can be "coco_2014_minival_100", or other - pre-registered ones - """ - import sys - - import detectron2.data.datasets # noqa # add pre-defined metadata - from detectron2.utils.logger import setup_logger - from detectron2.utils.visualizer import Visualizer - - logger = setup_logger(name=__name__) - assert sys.argv[3] in DatasetCatalog.list() - meta = MetadataCatalog.get(sys.argv[3]) - - dicts = load_coco_json(sys.argv[1], sys.argv[2], sys.argv[3]) - logger.info("Done loading {} samples.".format(len(dicts))) - - dirname = "coco-data-vis" - os.makedirs(dirname, exist_ok=True) - for d in dicts: - img = np.array(Image.open(d["file_name"])) - visualizer = Visualizer(img, metadata=meta) - vis = visualizer.draw_dataset_dict(d) - fpath = os.path.join(dirname, os.path.basename(d["file_name"])) - vis.save(fpath) - - -if __name__ == "__main__": - main() # pragma: no cover diff --git a/detectron2/detectron2/data/datasets/coco_panoptic.py b/detectron2/detectron2/data/datasets/coco_panoptic.py deleted file mode 100644 index b8dae44317b556610d7fed39017e082d7e855956..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/coco_panoptic.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import json -import os - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.utils.file_io import PathManager - -from .coco import load_coco_json, load_sem_seg - -__all__ = ["register_coco_panoptic", "register_coco_panoptic_separated"] - - -def load_coco_panoptic_json(json_file, image_dir, gt_dir, meta): - """ - Args: - image_dir (str): path to the raw dataset. e.g., "~/coco/train2017". - gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017". - json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json". - - Returns: - list[dict]: a list of dicts in Detectron2 standard format. (See - `Using Custom Datasets `_ ) - """ - - def _convert_category_id(segment_info, meta): - if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]: - segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ - segment_info["category_id"] - ] - segment_info["isthing"] = True - else: - segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][ - segment_info["category_id"] - ] - segment_info["isthing"] = False - return segment_info - - with PathManager.open(json_file) as f: - json_info = json.load(f) - - ret = [] - for ann in json_info["annotations"]: - image_id = int(ann["image_id"]) - # TODO: currently we assume image and label has the same filename but - # different extension, and images have extension ".jpg" for COCO. Need - # to make image extension a user-provided argument if we extend this - # function to support other COCO-like datasets. - image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg") - label_file = os.path.join(gt_dir, ann["file_name"]) - segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]] - ret.append( - { - "file_name": image_file, - "image_id": image_id, - "pan_seg_file_name": label_file, - "segments_info": segments_info, - } - ) - assert len(ret), f"No images found in {image_dir}!" - assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"] - assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"] - return ret - - -def register_coco_panoptic( - name, metadata, image_root, panoptic_root, panoptic_json, instances_json=None -): - """ - Register a "standard" version of COCO panoptic segmentation dataset named `name`. - The dictionaries in this registered dataset follows detectron2's standard format. - Hence it's called "standard". - - Args: - name (str): the name that identifies a dataset, - e.g. "coco_2017_train_panoptic" - metadata (dict): extra metadata associated with this dataset. - image_root (str): directory which contains all the images - panoptic_root (str): directory which contains panoptic annotation images in COCO format - panoptic_json (str): path to the json panoptic annotation file in COCO format - sem_seg_root (none): not used, to be consistent with - `register_coco_panoptic_separated`. - instances_json (str): path to the json instance annotation file - """ - panoptic_name = name - DatasetCatalog.register( - panoptic_name, - lambda: load_coco_panoptic_json(panoptic_json, image_root, panoptic_root, metadata), - ) - MetadataCatalog.get(panoptic_name).set( - panoptic_root=panoptic_root, - image_root=image_root, - panoptic_json=panoptic_json, - json_file=instances_json, - evaluator_type="coco_panoptic_seg", - ignore_label=255, - label_divisor=1000, - **metadata, - ) - - -def register_coco_panoptic_separated( - name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json -): - """ - Register a "separated" version of COCO panoptic segmentation dataset named `name`. - The annotations in this registered dataset will contain both instance annotations and - semantic annotations, each with its own contiguous ids. Hence it's called "separated". - - It follows the setting used by the PanopticFPN paper: - - 1. The instance annotations directly come from polygons in the COCO - instances annotation task, rather than from the masks in the COCO panoptic annotations. - - The two format have small differences: - Polygons in the instance annotations may have overlaps. - The mask annotations are produced by labeling the overlapped polygons - with depth ordering. - - 2. The semantic annotations are converted from panoptic annotations, where - all "things" are assigned a semantic id of 0. - All semantic categories will therefore have ids in contiguous - range [1, #stuff_categories]. - - This function will also register a pure semantic segmentation dataset - named ``name + '_stuffonly'``. - - Args: - name (str): the name that identifies a dataset, - e.g. "coco_2017_train_panoptic" - metadata (dict): extra metadata associated with this dataset. - image_root (str): directory which contains all the images - panoptic_root (str): directory which contains panoptic annotation images - panoptic_json (str): path to the json panoptic annotation file - sem_seg_root (str): directory which contains all the ground truth segmentation annotations. - instances_json (str): path to the json instance annotation file - """ - panoptic_name = name + "_separated" - DatasetCatalog.register( - panoptic_name, - lambda: merge_to_panoptic( - load_coco_json(instances_json, image_root, panoptic_name), - load_sem_seg(sem_seg_root, image_root), - ), - ) - MetadataCatalog.get(panoptic_name).set( - panoptic_root=panoptic_root, - image_root=image_root, - panoptic_json=panoptic_json, - sem_seg_root=sem_seg_root, - json_file=instances_json, # TODO rename - evaluator_type="coco_panoptic_seg", - ignore_label=255, - **metadata, - ) - - semantic_name = name + "_stuffonly" - DatasetCatalog.register(semantic_name, lambda: load_sem_seg(sem_seg_root, image_root)) - MetadataCatalog.get(semantic_name).set( - sem_seg_root=sem_seg_root, - image_root=image_root, - evaluator_type="sem_seg", - ignore_label=255, - **metadata, - ) - - -def merge_to_panoptic(detection_dicts, sem_seg_dicts): - """ - Create dataset dicts for panoptic segmentation, by - merging two dicts using "file_name" field to match their entries. - - Args: - detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation. - sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation. - - Returns: - list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in - both detection_dicts and sem_seg_dicts that correspond to the same image. - The function assumes that the same key in different dicts has the same value. - """ - results = [] - sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts} - assert len(sem_seg_file_to_entry) > 0 - - for det_dict in detection_dicts: - dic = copy.copy(det_dict) - dic.update(sem_seg_file_to_entry[dic["file_name"]]) - results.append(dic) - return results - - -if __name__ == "__main__": - """ - Test the COCO panoptic dataset loader. - - Usage: - python -m detectron2.data.datasets.coco_panoptic \ - path/to/image_root path/to/panoptic_root path/to/panoptic_json dataset_name 10 - - "dataset_name" can be "coco_2017_train_panoptic", or other - pre-registered ones - """ - from detectron2.utils.logger import setup_logger - from detectron2.utils.visualizer import Visualizer - import detectron2.data.datasets # noqa # add pre-defined metadata - import sys - from PIL import Image - import numpy as np - - logger = setup_logger(name=__name__) - assert sys.argv[4] in DatasetCatalog.list() - meta = MetadataCatalog.get(sys.argv[4]) - - dicts = load_coco_panoptic_json(sys.argv[3], sys.argv[1], sys.argv[2], meta.as_dict()) - logger.info("Done loading {} samples.".format(len(dicts))) - - dirname = "coco-data-vis" - os.makedirs(dirname, exist_ok=True) - num_imgs_to_vis = int(sys.argv[5]) - for i, d in enumerate(dicts): - img = np.array(Image.open(d["file_name"])) - visualizer = Visualizer(img, metadata=meta) - vis = visualizer.draw_dataset_dict(d) - fpath = os.path.join(dirname, os.path.basename(d["file_name"])) - vis.save(fpath) - if i + 1 >= num_imgs_to_vis: - break diff --git a/detectron2/detectron2/data/datasets/lvis.py b/detectron2/detectron2/data/datasets/lvis.py deleted file mode 100644 index 9eda86cec4b7b61e4e21602099426665ec948796..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/lvis.py +++ /dev/null @@ -1,250 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import os -from fvcore.common.timer import Timer - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode -from detectron2.utils.file_io import PathManager - -from .builtin_meta import _get_coco_instances_meta -from .lvis_v0_5_categories import LVIS_CATEGORIES as LVIS_V0_5_CATEGORIES -from .lvis_v1_categories import LVIS_CATEGORIES as LVIS_V1_CATEGORIES -from .lvis_v1_category_image_count import LVIS_CATEGORY_IMAGE_COUNT as LVIS_V1_CATEGORY_IMAGE_COUNT - -""" -This file contains functions to parse LVIS-format annotations into dicts in the -"Detectron2 format". -""" - -logger = logging.getLogger(__name__) - -__all__ = ["load_lvis_json", "register_lvis_instances", "get_lvis_instances_meta"] - - -def register_lvis_instances(name, metadata, json_file, image_root): - """ - Register a dataset in LVIS's json annotation format for instance detection and segmentation. - - Args: - name (str): a name that identifies the dataset, e.g. "lvis_v0.5_train". - metadata (dict): extra metadata associated with this dataset. It can be an empty dict. - json_file (str): path to the json instance annotation file. - image_root (str or path-like): directory which contains all the images. - """ - DatasetCatalog.register(name, lambda: load_lvis_json(json_file, image_root, name)) - MetadataCatalog.get(name).set( - json_file=json_file, image_root=image_root, evaluator_type="lvis", **metadata - ) - - -def load_lvis_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): - """ - Load a json file in LVIS's annotation format. - - Args: - json_file (str): full path to the LVIS json annotation file. - image_root (str): the directory where the images in this json file exists. - dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train"). - If provided, this function will put "thing_classes" into the metadata - associated with this dataset. - extra_annotation_keys (list[str]): list of per-annotation keys that should also be - loaded into the dataset dict (besides "bbox", "bbox_mode", "category_id", - "segmentation"). The values for these keys will be returned as-is. - - Returns: - list[dict]: a list of dicts in Detectron2 standard format. (See - `Using Custom Datasets `_ ) - - Notes: - 1. This function does not read the image files. - The results do not have the "image" field. - """ - from lvis import LVIS - - json_file = PathManager.get_local_path(json_file) - - timer = Timer() - lvis_api = LVIS(json_file) - if timer.seconds() > 1: - logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) - - if dataset_name is not None: - meta = get_lvis_instances_meta(dataset_name) - MetadataCatalog.get(dataset_name).set(**meta) - - # sort indices for reproducible results - img_ids = sorted(lvis_api.imgs.keys()) - # imgs is a list of dicts, each looks something like: - # {'license': 4, - # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', - # 'file_name': 'COCO_val2014_000000001268.jpg', - # 'height': 427, - # 'width': 640, - # 'date_captured': '2013-11-17 05:57:24', - # 'id': 1268} - imgs = lvis_api.load_imgs(img_ids) - # anns is a list[list[dict]], where each dict is an annotation - # record for an object. The inner list enumerates the objects in an image - # and the outer list enumerates over images. Example of anns[0]: - # [{'segmentation': [[192.81, - # 247.09, - # ... - # 219.03, - # 249.06]], - # 'area': 1035.749, - # 'image_id': 1268, - # 'bbox': [192.81, 224.8, 74.73, 33.43], - # 'category_id': 16, - # 'id': 42986}, - # ...] - anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids] - - # Sanity check that each annotation has a unique id - ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] - assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique".format( - json_file - ) - - imgs_anns = list(zip(imgs, anns)) - - logger.info("Loaded {} images in the LVIS format from {}".format(len(imgs_anns), json_file)) - - if extra_annotation_keys: - logger.info( - "The following extra annotation keys will be loaded: {} ".format(extra_annotation_keys) - ) - else: - extra_annotation_keys = [] - - def get_file_name(img_root, img_dict): - # Determine the path including the split folder ("train2017", "val2017", "test2017") from - # the coco_url field. Example: - # 'coco_url': 'http://images.cocodataset.org/train2017/000000155379.jpg' - split_folder, file_name = img_dict["coco_url"].split("/")[-2:] - return os.path.join(img_root + split_folder, file_name) - - dataset_dicts = [] - - for img_dict, anno_dict_list in imgs_anns: - record = {} - record["file_name"] = get_file_name(image_root, img_dict) - record["height"] = img_dict["height"] - record["width"] = img_dict["width"] - record["not_exhaustive_category_ids"] = img_dict.get("not_exhaustive_category_ids", []) - record["neg_category_ids"] = img_dict.get("neg_category_ids", []) - image_id = record["image_id"] = img_dict["id"] - - objs = [] - for anno in anno_dict_list: - # Check that the image_id in this annotation is the same as - # the image_id we're looking at. - # This fails only when the data parsing logic or the annotation file is buggy. - assert anno["image_id"] == image_id - obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS} - # LVIS data loader can be used to load COCO dataset categories. In this case `meta` - # variable will have a field with COCO-specific category mapping. - if dataset_name is not None and "thing_dataset_id_to_contiguous_id" in meta: - obj["category_id"] = meta["thing_dataset_id_to_contiguous_id"][anno["category_id"]] - else: - obj["category_id"] = anno["category_id"] - 1 # Convert 1-indexed to 0-indexed - segm = anno["segmentation"] # list[list[float]] - # filter out invalid polygons (< 3 points) - valid_segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] - assert len(segm) == len( - valid_segm - ), "Annotation contains an invalid polygon with < 3 points" - assert len(segm) > 0 - obj["segmentation"] = segm - for extra_ann_key in extra_annotation_keys: - obj[extra_ann_key] = anno[extra_ann_key] - objs.append(obj) - record["annotations"] = objs - dataset_dicts.append(record) - - return dataset_dicts - - -def get_lvis_instances_meta(dataset_name): - """ - Load LVIS metadata. - - Args: - dataset_name (str): LVIS dataset name without the split name (e.g., "lvis_v0.5"). - - Returns: - dict: LVIS metadata with keys: thing_classes - """ - if "cocofied" in dataset_name: - return _get_coco_instances_meta() - if "v0.5" in dataset_name: - return _get_lvis_instances_meta_v0_5() - elif "v1" in dataset_name: - return _get_lvis_instances_meta_v1() - raise ValueError("No built-in metadata for dataset {}".format(dataset_name)) - - -def _get_lvis_instances_meta_v0_5(): - assert len(LVIS_V0_5_CATEGORIES) == 1230 - cat_ids = [k["id"] for k in LVIS_V0_5_CATEGORIES] - assert min(cat_ids) == 1 and max(cat_ids) == len( - cat_ids - ), "Category ids are not in [1, #categories], as expected" - # Ensure that the category list is sorted by id - lvis_categories = sorted(LVIS_V0_5_CATEGORIES, key=lambda x: x["id"]) - thing_classes = [k["synonyms"][0] for k in lvis_categories] - meta = {"thing_classes": thing_classes} - return meta - - -def _get_lvis_instances_meta_v1(): - assert len(LVIS_V1_CATEGORIES) == 1203 - cat_ids = [k["id"] for k in LVIS_V1_CATEGORIES] - assert min(cat_ids) == 1 and max(cat_ids) == len( - cat_ids - ), "Category ids are not in [1, #categories], as expected" - # Ensure that the category list is sorted by id - lvis_categories = sorted(LVIS_V1_CATEGORIES, key=lambda x: x["id"]) - thing_classes = [k["synonyms"][0] for k in lvis_categories] - meta = { - "thing_classes": thing_classes, - "class_image_count": LVIS_V1_CATEGORY_IMAGE_COUNT, - } - return meta - - -def main() -> None: - global logger - """ - Test the LVIS json dataset loader. - - Usage: - python -m detectron2.data.datasets.lvis \ - path/to/json path/to/image_root dataset_name vis_limit - """ - import sys - - import detectron2.data.datasets # noqa # add pre-defined metadata - import numpy as np - from detectron2.utils.logger import setup_logger - from detectron2.utils.visualizer import Visualizer - from PIL import Image - - logger = setup_logger(name=__name__) - meta = MetadataCatalog.get(sys.argv[3]) - - dicts = load_lvis_json(sys.argv[1], sys.argv[2], sys.argv[3]) - logger.info("Done loading {} samples.".format(len(dicts))) - - dirname = "lvis-data-vis" - os.makedirs(dirname, exist_ok=True) - for d in dicts[: int(sys.argv[4])]: - img = np.array(Image.open(d["file_name"])) - visualizer = Visualizer(img, metadata=meta) - vis = visualizer.draw_dataset_dict(d) - fpath = os.path.join(dirname, os.path.basename(d["file_name"])) - vis.save(fpath) - - -if __name__ == "__main__": - main() # pragma: no cover diff --git a/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py b/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py deleted file mode 100644 index d3dab6198da614937b08682f4c9edf52bdf1d236..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# Autogen with -# with open("lvis_v0.5_val.json", "r") as f: -# a = json.load(f) -# c = a["categories"] -# for x in c: -# del x["image_count"] -# del x["instance_count"] -# LVIS_CATEGORIES = repr(c) + " # noqa" - -# fmt: off -LVIS_CATEGORIES = [{'frequency': 'r', 'id': 1, 'synset': 'acorn.n.01', 'synonyms': ['acorn'], 'def': 'nut from an oak tree', 'name': 'acorn'}, {'frequency': 'c', 'id': 2, 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'id': 3, 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'id': 4, 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'c', 'id': 5, 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'id': 6, 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'r', 'id': 7, 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'id': 8, 'synset': 'almond.n.02', 'synonyms': ['almond'], 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'id': 9, 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'r', 'id': 10, 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'id': 11, 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'id': 12, 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'id': 13, 'synset': 'apple.n.01', 'synonyms': ['apple'], 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'id': 14, 'synset': 'apple_juice.n.01', 'synonyms': ['apple_juice'], 'def': 'the juice of apples', 'name': 'apple_juice'}, {'frequency': 'r', 'id': 15, 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'id': 16, 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'id': 17, 'synset': 'apron.n.01', 'synonyms': ['apron'], 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'id': 18, 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'c', 'id': 19, 'synset': 'armband.n.02', 'synonyms': ['armband'], 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'id': 20, 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'id': 21, 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'id': 22, 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'id': 23, 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'id': 24, 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'id': 25, 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'id': 26, 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'id': 27, 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'c', 'id': 28, 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'id': 29, 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'id': 30, 'synset': 'awning.n.01', 'synonyms': ['awning'], 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'id': 31, 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'f', 'id': 32, 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'id': 33, 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'id': 34, 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'id': 35, 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'id': 36, 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'id': 37, 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'id': 38, 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'id': 39, 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'id': 40, 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'id': 41, 'synset': 'ball.n.06', 'synonyms': ['ball'], 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'id': 42, 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'id': 43, 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'id': 44, 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'id': 45, 'synset': 'banana.n.02', 'synonyms': ['banana'], 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'r', 'id': 46, 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'id': 47, 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'c', 'id': 48, 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'id': 49, 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'id': 50, 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'id': 51, 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'id': 52, 'synset': 'barge.n.01', 'synonyms': ['barge'], 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'id': 53, 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'id': 54, 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'id': 55, 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'id': 56, 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'id': 57, 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'id': 58, 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'id': 59, 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'id': 60, 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'id': 61, 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'id': 62, 'synset': 'basket.n.03', 'synonyms': ['basketball_hoop'], 'def': 'metal hoop supporting a net through which players try to throw the basketball', 'name': 'basketball_hoop'}, {'frequency': 'c', 'id': 63, 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'id': 64, 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'r', 'id': 65, 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'id': 66, 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'id': 67, 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'id': 68, 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'id': 69, 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'id': 70, 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'id': 71, 'synset': 'battery.n.02', 'synonyms': ['battery'], 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'id': 72, 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'id': 73, 'synset': 'bead.n.01', 'synonyms': ['bead'], 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'r', 'id': 74, 'synset': 'beaker.n.01', 'synonyms': ['beaker'], 'def': 'a flatbottomed jar made of glass or plastic; used for chemistry', 'name': 'beaker'}, {'frequency': 'c', 'id': 75, 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'id': 76, 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'id': 77, 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'id': 78, 'synset': 'bear.n.01', 'synonyms': ['bear'], 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'id': 79, 'synset': 'bed.n.01', 'synonyms': ['bed'], 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'c', 'id': 80, 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'id': 81, 'synset': 'beef.n.01', 'synonyms': ['cow'], 'def': 'cattle that are reared for their meat', 'name': 'cow'}, {'frequency': 'c', 'id': 82, 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'id': 83, 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'id': 84, 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'id': 85, 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'id': 86, 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'id': 87, 'synset': 'bell.n.01', 'synonyms': ['bell'], 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'id': 88, 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'id': 89, 'synset': 'belt.n.02', 'synonyms': ['belt'], 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'id': 90, 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'id': 91, 'synset': 'bench.n.01', 'synonyms': ['bench'], 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'id': 92, 'synset': 'beret.n.01', 'synonyms': ['beret'], 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'id': 93, 'synset': 'bib.n.02', 'synonyms': ['bib'], 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'id': 94, 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'id': 95, 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'id': 96, 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'c', 'id': 97, 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'id': 98, 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'id': 99, 'synset': 'bird.n.01', 'synonyms': ['bird'], 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'r', 'id': 100, 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'r', 'id': 101, 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'id': 102, 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'id': 103, 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'id': 104, 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'id': 105, 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'id': 106, 'synset': 'biscuit.n.01', 'synonyms': ['biscuit_(bread)'], 'def': 'small round bread leavened with baking-powder or soda', 'name': 'biscuit_(bread)'}, {'frequency': 'r', 'id': 107, 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'id': 108, 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'id': 109, 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'id': 110, 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'id': 111, 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'id': 112, 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'id': 113, 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'c', 'id': 114, 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'c', 'id': 115, 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'id': 116, 'synset': 'boar.n.02', 'synonyms': ['boar'], 'def': 'an uncastrated male hog', 'name': 'boar'}, {'frequency': 'r', 'id': 117, 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'id': 118, 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'c', 'id': 119, 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'r', 'id': 120, 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'id': 121, 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'id': 122, 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'id': 123, 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'id': 124, 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'id': 125, 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'id': 126, 'synset': 'book.n.01', 'synonyms': ['book'], 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'r', 'id': 127, 'synset': 'book_bag.n.01', 'synonyms': ['book_bag'], 'def': 'a bag in which students carry their books', 'name': 'book_bag'}, {'frequency': 'c', 'id': 128, 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'id': 129, 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'id': 130, 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'id': 131, 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'id': 132, 'synset': 'boot.n.01', 'synonyms': ['boot'], 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'id': 133, 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'id': 134, 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'id': 135, 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'id': 136, 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'id': 137, 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'id': 138, 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'id': 139, 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'id': 140, 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'id': 141, 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'id': 142, 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'r', 'id': 143, 'synset': 'bowling_pin.n.01', 'synonyms': ['bowling_pin'], 'def': 'a club-shaped wooden object used in bowling', 'name': 'bowling_pin'}, {'frequency': 'r', 'id': 144, 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'id': 145, 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'id': 146, 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'id': 147, 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'id': 148, 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'id': 149, 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'r', 'id': 150, 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'c', 'id': 151, 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'id': 152, 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'c', 'id': 153, 'synset': 'bristle_brush.n.01', 'synonyms': ['bristle_brush'], 'def': 'a brush that is made with the short stiff hairs of an animal or plant', 'name': 'bristle_brush'}, {'frequency': 'f', 'id': 154, 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'id': 155, 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'id': 156, 'synset': 'broom.n.01', 'synonyms': ['broom'], 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'id': 157, 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'id': 158, 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'id': 159, 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'id': 160, 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'id': 161, 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'id': 162, 'synset': 'bull.n.11', 'synonyms': ['bull'], 'def': 'mature male cow', 'name': 'bull'}, {'frequency': 'r', 'id': 163, 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'id': 164, 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'id': 165, 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'id': 166, 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'id': 167, 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'id': 168, 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'r', 'id': 169, 'synset': 'bully_beef.n.01', 'synonyms': ['corned_beef', 'corn_beef'], 'def': 'beef cured or pickled in brine', 'name': 'corned_beef'}, {'frequency': 'f', 'id': 170, 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'id': 171, 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'id': 172, 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'id': 173, 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'id': 174, 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'id': 175, 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'c', 'id': 176, 'synset': 'butcher_knife.n.01', 'synonyms': ['butcher_knife'], 'def': 'a large sharp knife for cutting or trimming meat', 'name': 'butcher_knife'}, {'frequency': 'c', 'id': 177, 'synset': 'butter.n.01', 'synonyms': ['butter'], 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'id': 178, 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'id': 179, 'synset': 'button.n.01', 'synonyms': ['button'], 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'id': 180, 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'id': 181, 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'r', 'id': 182, 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'id': 183, 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'id': 184, 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'id': 185, 'synset': 'cake.n.03', 'synonyms': ['cake'], 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'id': 186, 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'id': 187, 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'id': 188, 'synset': 'calf.n.01', 'synonyms': ['calf'], 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'id': 189, 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'id': 190, 'synset': 'camel.n.01', 'synonyms': ['camel'], 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'id': 191, 'synset': 'camera.n.01', 'synonyms': ['camera'], 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'id': 192, 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'id': 193, 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'id': 194, 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'id': 195, 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'r', 'id': 196, 'synset': 'candelabrum.n.01', 'synonyms': ['candelabrum', 'candelabra'], 'def': 'branched candlestick; ornamental; has several lights', 'name': 'candelabrum'}, {'frequency': 'f', 'id': 197, 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'id': 198, 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'id': 199, 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'id': 200, 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'id': 201, 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'id': 202, 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'r', 'id': 203, 'synset': 'cannon.n.02', 'synonyms': ['cannon'], 'def': 'heavy gun fired from a tank', 'name': 'cannon'}, {'frequency': 'c', 'id': 204, 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'r', 'id': 205, 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'id': 206, 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'c', 'id': 207, 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'id': 208, 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'r', 'id': 209, 'synset': 'cape.n.02', 'synonyms': ['cape'], 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'id': 210, 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'id': 211, 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'id': 212, 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'def': 'a wheeled vehicle adapted to the rails of railroad', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'id': 213, 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'id': 214, 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'id': 215, 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'id': 216, 'synset': 'card.n.03', 'synonyms': ['card'], 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'r', 'id': 217, 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'id': 218, 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'id': 219, 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'id': 220, 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'id': 221, 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'c', 'id': 222, 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'id': 223, 'synset': 'cart.n.01', 'synonyms': ['cart'], 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'id': 224, 'synset': 'carton.n.02', 'synonyms': ['carton'], 'def': 'a box made of cardboard; opens by flaps on top', 'name': 'carton'}, {'frequency': 'c', 'id': 225, 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'id': 226, 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'id': 227, 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'id': 228, 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'id': 229, 'synset': 'cat.n.01', 'synonyms': ['cat'], 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'c', 'id': 230, 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'r', 'id': 231, 'synset': 'caviar.n.01', 'synonyms': ['caviar', 'caviare'], 'def': "salted roe of sturgeon or other large fish; usually served as an hors d'oeuvre", 'name': 'caviar'}, {'frequency': 'c', 'id': 232, 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'id': 233, 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'c', 'id': 234, 'synset': 'celery.n.01', 'synonyms': ['celery'], 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'id': 235, 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'id': 236, 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'id': 237, 'synset': 'chair.n.01', 'synonyms': ['chair'], 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'id': 238, 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'id': 239, 'synset': 'champagne.n.01', 'synonyms': ['champagne'], 'def': 'a white sparkling wine produced in Champagne or resembling that produced there', 'name': 'champagne'}, {'frequency': 'f', 'id': 240, 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'id': 241, 'synset': 'chap.n.04', 'synonyms': ['chap'], 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'id': 242, 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'id': 243, 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'id': 244, 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'id': 245, 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'r', 'id': 246, 'synset': 'chest_of_drawers.n.01', 'synonyms': ['chest_of_drawers_(furniture)', 'bureau_(furniture)', 'chest_(furniture)'], 'def': 'furniture with drawers for keeping clothes', 'name': 'chest_of_drawers_(furniture)'}, {'frequency': 'c', 'id': 247, 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'id': 248, 'synset': 'chicken_wire.n.01', 'synonyms': ['chicken_wire'], 'def': 'a galvanized wire network with a hexagonal mesh; used to build fences', 'name': 'chicken_wire'}, {'frequency': 'r', 'id': 249, 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'r', 'id': 250, 'synset': 'chihuahua.n.03', 'synonyms': ['Chihuahua'], 'def': 'an old breed of tiny short-haired dog with protruding eyes from Mexico', 'name': 'Chihuahua'}, {'frequency': 'r', 'id': 251, 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'id': 252, 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'id': 253, 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'id': 254, 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'id': 255, 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'id': 256, 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'id': 257, 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'id': 258, 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'id': 259, 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'id': 260, 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'def': 'necklace that fits tightly around the neck', 'name': 'choker'}, {'frequency': 'f', 'id': 261, 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'c', 'id': 262, 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'id': 263, 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'id': 264, 'synset': 'chute.n.02', 'synonyms': ['slide'], 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'id': 265, 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'id': 266, 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'c', 'id': 267, 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'id': 268, 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'id': 269, 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'id': 270, 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'r', 'id': 271, 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'id': 272, 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'id': 273, 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'id': 274, 'synset': 'clip.n.03', 'synonyms': ['clip'], 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'id': 275, 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'f', 'id': 276, 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'id': 277, 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'id': 278, 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'id': 279, 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'id': 280, 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'id': 281, 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'id': 282, 'synset': 'coat.n.01', 'synonyms': ['coat'], 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'id': 283, 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'r', 'id': 284, 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'id': 285, 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'c', 'id': 286, 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'r', 'id': 287, 'synset': 'coffee_filter.n.01', 'synonyms': ['coffee_filter'], 'def': 'filter (usually of paper) that passes the coffee and retains the coffee grounds', 'name': 'coffee_filter'}, {'frequency': 'f', 'id': 288, 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'id': 289, 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'id': 290, 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'id': 291, 'synset': 'coil.n.05', 'synonyms': ['coil'], 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'id': 292, 'synset': 'coin.n.01', 'synonyms': ['coin'], 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'r', 'id': 293, 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'id': 294, 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'id': 295, 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'id': 296, 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'id': 297, 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'id': 298, 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'f', 'id': 299, 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'r', 'id': 300, 'synset': 'concrete_mixer.n.01', 'synonyms': ['concrete_mixer', 'cement_mixer'], 'def': 'a machine with a large revolving drum in which cement/concrete is mixed', 'name': 'concrete_mixer'}, {'frequency': 'f', 'id': 301, 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'id': 302, 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'id': 303, 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'id': 304, 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'c', 'id': 305, 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'id': 306, 'synset': 'cookie_jar.n.01', 'synonyms': ['cookie_jar', 'cooky_jar'], 'def': 'a jar in which cookies are kept (and sometimes money is hidden)', 'name': 'cookie_jar'}, {'frequency': 'r', 'id': 307, 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'id': 308, 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'c', 'id': 309, 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'id': 310, 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'r', 'id': 311, 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'c', 'id': 312, 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'def': 'ears of corn that can be prepared and served for human food', 'name': 'edible_corn'}, {'frequency': 'r', 'id': 313, 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'id': 314, 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'id': 315, 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'id': 316, 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'r', 'id': 317, 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'r', 'id': 318, 'synset': 'cos.n.02', 'synonyms': ['romaine_lettuce'], 'def': 'lettuce with long dark-green leaves in a loosely packed elongated head', 'name': 'romaine_lettuce'}, {'frequency': 'c', 'id': 319, 'synset': 'costume.n.04', 'synonyms': ['costume'], 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'id': 320, 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'id': 321, 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'r', 'id': 322, 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'id': 323, 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'r', 'id': 324, 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'c', 'id': 325, 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'id': 326, 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'id': 327, 'synset': 'crate.n.01', 'synonyms': ['crate'], 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'r', 'id': 328, 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'id': 329, 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'r', 'id': 330, 'synset': 'credit_card.n.01', 'synonyms': ['credit_card', 'charge_card', 'debit_card'], 'def': 'a card, usually plastic, used to pay for goods and services', 'name': 'credit_card'}, {'frequency': 'c', 'id': 331, 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'id': 332, 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'id': 333, 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'def': 'an earthen jar (made of baked clay)', 'name': 'crock_pot'}, {'frequency': 'f', 'id': 334, 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'id': 335, 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'r', 'id': 336, 'synset': 'crow.n.01', 'synonyms': ['crow'], 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'c', 'id': 337, 'synset': 'crown.n.04', 'synonyms': ['crown'], 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'id': 338, 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'id': 339, 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'id': 340, 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'c', 'id': 341, 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'r', 'id': 342, 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'id': 343, 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'r', 'id': 344, 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'id': 345, 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'id': 346, 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'id': 347, 'synset': 'cup.n.01', 'synonyms': ['cup'], 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'id': 348, 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'def': 'a metal vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'c', 'id': 349, 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'id': 350, 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'id': 351, 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'id': 352, 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'id': 353, 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'id': 354, 'synset': 'custard.n.01', 'synonyms': ['custard'], 'def': 'sweetened mixture of milk and eggs baked or boiled or frozen', 'name': 'custard'}, {'frequency': 'c', 'id': 355, 'synset': 'cutter.n.06', 'synonyms': ['cutting_tool'], 'def': 'a cutting implement; a tool for cutting', 'name': 'cutting_tool'}, {'frequency': 'r', 'id': 356, 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'id': 357, 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'id': 358, 'synset': 'dachshund.n.01', 'synonyms': ['dachshund', 'dachsie', 'badger_dog'], 'def': 'small long-bodied short-legged breed of dog having a short sleek coat and long drooping ears', 'name': 'dachshund'}, {'frequency': 'r', 'id': 359, 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'id': 360, 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'id': 361, 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'id': 362, 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'id': 363, 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'id': 364, 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'id': 365, 'synset': 'desk.n.01', 'synonyms': ['desk'], 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'id': 366, 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'id': 367, 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'id': 368, 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'def': 'a daily written record of (usually personal) experiences and observations', 'name': 'diary'}, {'frequency': 'r', 'id': 369, 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'id': 370, 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'id': 371, 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'id': 372, 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'c', 'id': 373, 'synset': 'dish.n.01', 'synonyms': ['dish'], 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'id': 374, 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'id': 375, 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'def': 'a cloth for washing dishes', 'name': 'dishrag'}, {'frequency': 'c', 'id': 376, 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'id': 377, 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'id': 378, 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid'], 'def': 'a low-sudsing detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'r', 'id': 379, 'synset': 'diskette.n.01', 'synonyms': ['diskette', 'floppy', 'floppy_disk'], 'def': 'a small plastic magnetic disk enclosed in a stiff envelope used to store data', 'name': 'diskette'}, {'frequency': 'c', 'id': 380, 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'c', 'id': 381, 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'id': 382, 'synset': 'dog.n.01', 'synonyms': ['dog'], 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'id': 383, 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'c', 'id': 384, 'synset': 'doll.n.01', 'synonyms': ['doll'], 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'id': 385, 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'id': 386, 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'id': 387, 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'r', 'id': 388, 'synset': 'domino.n.03', 'synonyms': ['eye_mask'], 'def': 'a mask covering the upper part of the face but with holes for the eyes', 'name': 'eye_mask'}, {'frequency': 'r', 'id': 389, 'synset': 'doorbell.n.01', 'synonyms': ['doorbell', 'buzzer'], 'def': 'a button at an outer door that gives a ringing or buzzing signal when pushed', 'name': 'doorbell'}, {'frequency': 'f', 'id': 390, 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'id': 391, 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'id': 392, 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'id': 393, 'synset': 'dove.n.01', 'synonyms': ['dove'], 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'id': 394, 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'id': 395, 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'id': 396, 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'id': 397, 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'id': 398, 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'c', 'id': 399, 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'c', 'id': 400, 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'id': 401, 'synset': 'drill.n.01', 'synonyms': ['drill'], 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'id': 402, 'synset': 'drinking_fountain.n.01', 'synonyms': ['drinking_fountain'], 'def': 'a public fountain to provide a jet of drinking water', 'name': 'drinking_fountain'}, {'frequency': 'r', 'id': 403, 'synset': 'drone.n.04', 'synonyms': ['drone'], 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'id': 404, 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'id': 405, 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'id': 406, 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'id': 407, 'synset': 'duck.n.01', 'synonyms': ['duck'], 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'r', 'id': 408, 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'id': 409, 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'id': 410, 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'def': 'a large cylindrical bag of heavy cloth', 'name': 'duffel_bag'}, {'frequency': 'r', 'id': 411, 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'id': 412, 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'id': 413, 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'r', 'id': 414, 'synset': 'dutch_oven.n.02', 'synonyms': ['Dutch_oven'], 'def': 'iron or earthenware cooking pot; used for stews', 'name': 'Dutch_oven'}, {'frequency': 'c', 'id': 415, 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'id': 416, 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'id': 417, 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'id': 418, 'synset': 'earring.n.01', 'synonyms': ['earring'], 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'id': 419, 'synset': 'easel.n.01', 'synonyms': ['easel'], 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'id': 420, 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'id': 421, 'synset': 'eel.n.01', 'synonyms': ['eel'], 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'id': 422, 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'id': 423, 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'id': 424, 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'id': 425, 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'id': 426, 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'id': 427, 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'id': 428, 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'id': 429, 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'r', 'id': 430, 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'id': 431, 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'id': 432, 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'id': 433, 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'id': 434, 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'id': 435, 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'id': 436, 'synset': 'fan.n.01', 'synonyms': ['fan'], 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'id': 437, 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'id': 438, 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'id': 439, 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'id': 440, 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'r', 'id': 441, 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'id': 442, 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'id': 443, 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'id': 444, 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'id': 445, 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'id': 446, 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'id': 447, 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'c', 'id': 448, 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'c', 'id': 449, 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'id': 450, 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'id': 451, 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'id': 452, 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'c', 'id': 453, 'synset': 'fish.n.01', 'synonyms': ['fish'], 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'r', 'id': 454, 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'id': 455, 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'r', 'id': 456, 'synset': 'fishing_boat.n.01', 'synonyms': ['fishing_boat', 'fishing_vessel'], 'def': 'a vessel for fishing', 'name': 'fishing_boat'}, {'frequency': 'c', 'id': 457, 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'id': 458, 'synset': 'flag.n.01', 'synonyms': ['flag'], 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'id': 459, 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'id': 460, 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'id': 461, 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'r', 'id': 462, 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'id': 463, 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'id': 464, 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'id': 465, 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'id': 466, 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'id': 467, 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'id': 468, 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'r', 'id': 469, 'synset': 'foal.n.01', 'synonyms': ['foal'], 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'id': 470, 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'id': 471, 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'id': 472, 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'id': 473, 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'id': 474, 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'id': 475, 'synset': 'fork.n.01', 'synonyms': ['fork'], 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'r', 'id': 476, 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'r', 'id': 477, 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'r', 'id': 478, 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'id': 479, 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'def': 'anything that freshens', 'name': 'freshener'}, {'frequency': 'f', 'id': 480, 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'id': 481, 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'id': 482, 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'r', 'id': 483, 'synset': 'fruit_salad.n.01', 'synonyms': ['fruit_salad'], 'def': 'salad composed of fruits', 'name': 'fruit_salad'}, {'frequency': 'c', 'id': 484, 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'id': 485, 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'id': 486, 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'c', 'id': 487, 'synset': 'futon.n.01', 'synonyms': ['futon'], 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'id': 488, 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'id': 489, 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'id': 490, 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'id': 491, 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'id': 492, 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'id': 493, 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'id': 494, 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'id': 495, 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'r', 'id': 496, 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'id': 497, 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'id': 498, 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'c', 'id': 499, 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'id': 500, 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'id': 501, 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'id': 502, 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'id': 503, 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'id': 504, 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'id': 505, 'synset': 'globe.n.03', 'synonyms': ['globe'], 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'id': 506, 'synset': 'glove.n.02', 'synonyms': ['glove'], 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'id': 507, 'synset': 'goat.n.01', 'synonyms': ['goat'], 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'id': 508, 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'id': 509, 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'r', 'id': 510, 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'id': 511, 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'id': 512, 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'id': 513, 'synset': 'goose.n.01', 'synonyms': ['goose'], 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'id': 514, 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'id': 515, 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'r', 'id': 516, 'synset': 'gown.n.04', 'synonyms': ['surgical_gown', 'scrubs_(surgical_clothing)'], 'def': 'protective garment worn by surgeons during operations', 'name': 'surgical_gown'}, {'frequency': 'f', 'id': 517, 'synset': 'grape.n.01', 'synonyms': ['grape'], 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'r', 'id': 518, 'synset': 'grasshopper.n.01', 'synonyms': ['grasshopper'], 'def': 'plant-eating insect with hind legs adapted for leaping', 'name': 'grasshopper'}, {'frequency': 'c', 'id': 519, 'synset': 'grater.n.01', 'synonyms': ['grater'], 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'id': 520, 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'id': 521, 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'c', 'id': 522, 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'c', 'id': 523, 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'id': 524, 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'r', 'id': 525, 'synset': 'grillroom.n.01', 'synonyms': ['grillroom', 'grill_(restaurant)'], 'def': 'a restaurant where food is cooked on a grill', 'name': 'grillroom'}, {'frequency': 'r', 'id': 526, 'synset': 'grinder.n.04', 'synonyms': ['grinder_(tool)'], 'def': 'a machine tool that polishes metal', 'name': 'grinder_(tool)'}, {'frequency': 'r', 'id': 527, 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'id': 528, 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'id': 529, 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'r', 'id': 530, 'synset': 'guacamole.n.01', 'synonyms': ['guacamole'], 'def': 'a dip made of mashed avocado mixed with chopped onions and other seasonings', 'name': 'guacamole'}, {'frequency': 'f', 'id': 531, 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'id': 532, 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'id': 533, 'synset': 'gun.n.01', 'synonyms': ['gun'], 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'r', 'id': 534, 'synset': 'hair_spray.n.01', 'synonyms': ['hair_spray'], 'def': 'substance sprayed on the hair to hold it in place', 'name': 'hair_spray'}, {'frequency': 'c', 'id': 535, 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'id': 536, 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'id': 537, 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'f', 'id': 538, 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'id': 539, 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'id': 540, 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'r', 'id': 541, 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'id': 542, 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'r', 'id': 543, 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'c', 'id': 544, 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'id': 545, 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'id': 546, 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'id': 547, 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'id': 548, 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'id': 549, 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'id': 550, 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'id': 551, 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'id': 552, 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'id': 553, 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'id': 554, 'synset': 'hat.n.01', 'synonyms': ['hat'], 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'id': 555, 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'r', 'id': 556, 'synset': 'hatch.n.03', 'synonyms': ['hatch'], 'def': 'a movable barrier covering a hatchway', 'name': 'hatch'}, {'frequency': 'c', 'id': 557, 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'def': 'a garment that covers the head and face', 'name': 'veil'}, {'frequency': 'f', 'id': 558, 'synset': 'headband.n.01', 'synonyms': ['headband'], 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'id': 559, 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'id': 560, 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'id': 561, 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'id': 562, 'synset': 'headset.n.01', 'synonyms': ['headset'], 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'id': 563, 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'r', 'id': 564, 'synset': 'hearing_aid.n.02', 'synonyms': ['hearing_aid'], 'def': 'an acoustic device used to direct sound to the ear of a hearing-impaired person', 'name': 'hearing_aid'}, {'frequency': 'c', 'id': 565, 'synset': 'heart.n.02', 'synonyms': ['heart'], 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'id': 566, 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'id': 567, 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'id': 568, 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'id': 569, 'synset': 'heron.n.02', 'synonyms': ['heron'], 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'id': 570, 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'id': 571, 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'id': 572, 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'id': 573, 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'id': 574, 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'id': 575, 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'id': 576, 'synset': 'honey.n.01', 'synonyms': ['honey'], 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'id': 577, 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'id': 578, 'synset': 'hook.n.05', 'synonyms': ['hook'], 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'f', 'id': 579, 'synset': 'horse.n.01', 'synonyms': ['horse'], 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'id': 580, 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'id': 581, 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'id': 582, 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'id': 583, 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'id': 584, 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'id': 585, 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'r', 'id': 586, 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'id': 587, 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'c', 'id': 588, 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'id': 589, 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'id': 590, 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'id': 591, 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'id': 592, 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'id': 593, 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'r', 'id': 594, 'synset': 'ice_tea.n.01', 'synonyms': ['ice_tea', 'iced_tea'], 'def': 'strong tea served over ice', 'name': 'ice_tea'}, {'frequency': 'c', 'id': 595, 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'id': 596, 'synset': 'incense.n.01', 'synonyms': ['incense'], 'def': 'a substance that produces a fragrant odor when burned', 'name': 'incense'}, {'frequency': 'r', 'id': 597, 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'c', 'id': 598, 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'id': 599, 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'r', 'id': 600, 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'id': 601, 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'r', 'id': 602, 'synset': 'jam.n.01', 'synonyms': ['jam'], 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'id': 603, 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'id': 604, 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'id': 605, 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'id': 606, 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'id': 607, 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'c', 'id': 608, 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'id': 609, 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'r', 'id': 610, 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'id': 611, 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'id': 612, 'synset': 'keg.n.02', 'synonyms': ['keg'], 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'id': 613, 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'id': 614, 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'id': 615, 'synset': 'key.n.01', 'synonyms': ['key'], 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'id': 616, 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'r', 'id': 617, 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'id': 618, 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'id': 619, 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'c', 'id': 620, 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'id': 621, 'synset': 'kite.n.03', 'synonyms': ['kite'], 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'id': 622, 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'id': 623, 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'id': 624, 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'id': 625, 'synset': 'knife.n.01', 'synonyms': ['knife'], 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'id': 626, 'synset': 'knight.n.02', 'synonyms': ['knight_(chess_piece)', 'horse_(chess_piece)'], 'def': 'a chess game piece shaped to resemble the head of a horse', 'name': 'knight_(chess_piece)'}, {'frequency': 'r', 'id': 627, 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'id': 628, 'synset': 'knob.n.02', 'synonyms': ['knob'], 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'id': 629, 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'id': 630, 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'id': 631, 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'id': 632, 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'id': 633, 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'r', 'id': 634, 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'c', 'id': 635, 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'id': 636, 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'id': 637, 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'id': 638, 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'id': 639, 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'id': 640, 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'id': 641, 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'id': 642, 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'id': 643, 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'c', 'id': 644, 'synset': 'latch.n.02', 'synonyms': ['latch'], 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'id': 645, 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'id': 646, 'synset': 'leather.n.01', 'synonyms': ['leather'], 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'id': 647, 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'id': 648, 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'f', 'id': 649, 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'id': 650, 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'id': 651, 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'id': 652, 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'id': 653, 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'id': 654, 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'id': 655, 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'def': 'glass bulb or tube shaped electric device that emits light (DO NOT MARK LAMPS AS A WHOLE)', 'name': 'lightbulb'}, {'frequency': 'r', 'id': 656, 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'c', 'id': 657, 'synset': 'lime.n.06', 'synonyms': ['lime'], 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'id': 658, 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'r', 'id': 659, 'synset': 'linen.n.02', 'synonyms': ['linen_paper'], 'def': 'a high-quality paper made of linen fibers or with a linen finish', 'name': 'linen_paper'}, {'frequency': 'c', 'id': 660, 'synset': 'lion.n.01', 'synonyms': ['lion'], 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'id': 661, 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'c', 'id': 662, 'synset': 'lipstick.n.01', 'synonyms': ['lipstick', 'lip_rouge'], 'def': 'makeup that is used to color the lips', 'name': 'lipstick'}, {'frequency': 'r', 'id': 663, 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'def': 'an alcoholic beverage that is distilled rather than fermented', 'name': 'liquor'}, {'frequency': 'r', 'id': 664, 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'r', 'id': 665, 'synset': 'loafer.n.02', 'synonyms': ['Loafer_(type_of_shoe)'], 'def': 'a low leather step-in shoe', 'name': 'Loafer_(type_of_shoe)'}, {'frequency': 'f', 'id': 666, 'synset': 'log.n.01', 'synonyms': ['log'], 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'id': 667, 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'c', 'id': 668, 'synset': 'lotion.n.01', 'synonyms': ['lotion'], 'def': 'any of various cosmetic preparations that are applied to the skin', 'name': 'lotion'}, {'frequency': 'f', 'id': 669, 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'id': 670, 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'id': 671, 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'id': 672, 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'id': 673, 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'r', 'id': 674, 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'c', 'id': 675, 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'id': 676, 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'id': 677, 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'c', 'id': 678, 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'id': 679, 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'id': 680, 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'c', 'id': 681, 'synset': 'map.n.01', 'synonyms': ['map'], 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'c', 'id': 682, 'synset': 'marker.n.03', 'synonyms': ['marker'], 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'id': 683, 'synset': 'martini.n.01', 'synonyms': ['martini'], 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'id': 684, 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'id': 685, 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'id': 686, 'synset': 'masher.n.02', 'synonyms': ['masher'], 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'id': 687, 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'id': 688, 'synset': 'mast.n.01', 'synonyms': ['mast'], 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'id': 689, 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'id': 690, 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'id': 691, 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'id': 692, 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'id': 693, 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'id': 694, 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'id': 695, 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'r', 'id': 696, 'synset': 'melon.n.01', 'synonyms': ['melon'], 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'id': 697, 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'id': 698, 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'id': 699, 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'id': 700, 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'c', 'id': 701, 'synset': 'milk.n.01', 'synonyms': ['milk'], 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'f', 'id': 702, 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'id': 703, 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'id': 704, 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'id': 705, 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'id': 706, 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'id': 707, 'synset': 'money.n.03', 'synonyms': ['money'], 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'id': 708, 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'id': 709, 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'id': 710, 'synset': 'motor.n.01', 'synonyms': ['motor'], 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'id': 711, 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'id': 712, 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'r', 'id': 713, 'synset': 'motorboat.n.01', 'synonyms': ['motorboat', 'powerboat'], 'def': 'a boat propelled by an internal-combustion engine', 'name': 'motorboat'}, {'frequency': 'f', 'id': 714, 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'id': 715, 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'r', 'id': 716, 'synset': 'mouse.n.01', 'synonyms': ['mouse_(animal_rodent)'], 'def': 'a small rodent with pointed snouts and small ears on elongated bodies with slender usually hairless tails', 'name': 'mouse_(animal_rodent)'}, {'frequency': 'f', 'id': 717, 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'def': 'a computer input device that controls an on-screen pointer', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'id': 718, 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'id': 719, 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'id': 720, 'synset': 'mug.n.04', 'synonyms': ['mug'], 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'id': 721, 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'id': 722, 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'r', 'id': 723, 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'id': 724, 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'r', 'id': 725, 'synset': 'nameplate.n.01', 'synonyms': ['nameplate'], 'def': 'a plate bearing a name', 'name': 'nameplate'}, {'frequency': 'f', 'id': 726, 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'id': 727, 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'id': 728, 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'id': 729, 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'r', 'id': 730, 'synset': 'needle.n.03', 'synonyms': ['needle'], 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'id': 731, 'synset': 'nest.n.01', 'synonyms': ['nest'], 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'r', 'id': 732, 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'id': 733, 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'id': 734, 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'r', 'id': 735, 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'id': 736, 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'id': 737, 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'c', 'id': 738, 'synset': 'nut.n.03', 'synonyms': ['nut'], 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'id': 739, 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'c', 'id': 740, 'synset': 'oar.n.01', 'synonyms': ['oar'], 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'id': 741, 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'id': 742, 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'id': 743, 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'id': 744, 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'id': 745, 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'id': 746, 'synset': 'onion.n.01', 'synonyms': ['onion'], 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'id': 747, 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'id': 748, 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'r', 'id': 749, 'synset': 'oregano.n.01', 'synonyms': ['oregano', 'marjoram'], 'def': 'aromatic Eurasian perennial herb used in cooking and baking', 'name': 'oregano'}, {'frequency': 'c', 'id': 750, 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'c', 'id': 751, 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'def': 'thick cushion used as a seat', 'name': 'ottoman'}, {'frequency': 'c', 'id': 752, 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'id': 753, 'synset': 'owl.n.01', 'synonyms': ['owl'], 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'id': 754, 'synset': 'packet.n.03', 'synonyms': ['packet'], 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'id': 755, 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'id': 756, 'synset': 'pad.n.04', 'synonyms': ['pad'], 'def': 'a flat mass of soft material used for protection, stuffing, or comfort', 'name': 'pad'}, {'frequency': 'c', 'id': 757, 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'id': 758, 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'r', 'id': 759, 'synset': 'paintbox.n.01', 'synonyms': ['paintbox'], 'def': "a box containing a collection of cubes or tubes of artists' paint", 'name': 'paintbox'}, {'frequency': 'c', 'id': 760, 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'id': 761, 'synset': 'painting.n.01', 'synonyms': ['painting'], 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'c', 'id': 762, 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'id': 763, 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'id': 764, 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'id': 765, 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'id': 766, 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'id': 767, 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'id': 768, 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'r', 'id': 769, 'synset': 'paper_clip.n.01', 'synonyms': ['paperclip'], 'def': 'a wire or plastic clip for holding sheets of paper together', 'name': 'paperclip'}, {'frequency': 'f', 'id': 770, 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'id': 771, 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'id': 772, 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'id': 773, 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'id': 774, 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'r', 'id': 775, 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'id': 776, 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'r', 'id': 777, 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'r', 'id': 778, 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'id': 779, 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'id': 780, 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'id': 781, 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'id': 782, 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'r', 'id': 783, 'synset': 'passport.n.02', 'synonyms': ['passport'], 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'id': 784, 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'id': 785, 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'id': 786, 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'id': 787, 'synset': 'peach.n.03', 'synonyms': ['peach'], 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'id': 788, 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'c', 'id': 789, 'synset': 'pear.n.01', 'synonyms': ['pear'], 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'r', 'id': 790, 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'id': 791, 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'id': 792, 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'id': 793, 'synset': 'pen.n.01', 'synonyms': ['pen'], 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'c', 'id': 794, 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'id': 795, 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'id': 796, 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'id': 797, 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'id': 798, 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'id': 799, 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'id': 800, 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'c', 'id': 801, 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'id': 802, 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'id': 803, 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'id': 804, 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'id': 805, 'synset': 'person.n.01', 'synonyms': ['baby', 'child', 'boy', 'girl', 'man', 'woman', 'person', 'human'], 'def': 'a human being', 'name': 'baby'}, {'frequency': 'r', 'id': 806, 'synset': 'pet.n.01', 'synonyms': ['pet'], 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'r', 'id': 807, 'synset': 'petfood.n.01', 'synonyms': ['petfood', 'pet-food'], 'def': 'food prepared for animal pets', 'name': 'petfood'}, {'frequency': 'r', 'id': 808, 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'id': 809, 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'id': 810, 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'c', 'id': 811, 'synset': 'piano.n.01', 'synonyms': ['piano'], 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'id': 812, 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'id': 813, 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'id': 814, 'synset': 'pie.n.01', 'synonyms': ['pie'], 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'id': 815, 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'id': 816, 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'id': 817, 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'id': 818, 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'id': 819, 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'id': 820, 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'id': 821, 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'id': 822, 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'id': 823, 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'id': 824, 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'id': 825, 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'r', 'id': 826, 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'id': 827, 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'id': 828, 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'id': 829, 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'id': 830, 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'id': 831, 'synset': 'plate.n.04', 'synonyms': ['plate'], 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'id': 832, 'synset': 'platter.n.01', 'synonyms': ['platter'], 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'id': 833, 'synset': 'playing_card.n.01', 'synonyms': ['playing_card'], 'def': 'one of a pack of cards that are used to play card games', 'name': 'playing_card'}, {'frequency': 'r', 'id': 834, 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'id': 835, 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'id': 836, 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'id': 837, 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'id': 838, 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'id': 839, 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'id': 840, 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'r', 'id': 841, 'synset': 'police_van.n.01', 'synonyms': ['police_van', 'police_wagon', 'paddy_wagon', 'patrol_wagon'], 'def': 'van used by police to transport prisoners', 'name': 'police_van'}, {'frequency': 'f', 'id': 842, 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'id': 843, 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'id': 844, 'synset': 'pony.n.05', 'synonyms': ['pony'], 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'id': 845, 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'id': 846, 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'r', 'id': 847, 'synset': 'portrait.n.02', 'synonyms': ['portrait', 'portrayal'], 'def': 'any likeness of a person, in any medium', 'name': 'portrait'}, {'frequency': 'c', 'id': 848, 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'id': 849, 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'id': 850, 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'id': 851, 'synset': 'pot.n.01', 'synonyms': ['pot'], 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'id': 852, 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'id': 853, 'synset': 'potato.n.01', 'synonyms': ['potato'], 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'id': 854, 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'id': 855, 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'id': 856, 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'r', 'id': 857, 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'id': 858, 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'f', 'id': 859, 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'id': 860, 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'id': 861, 'synset': 'projector.n.02', 'synonyms': ['projector'], 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'id': 862, 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'id': 863, 'synset': 'prune.n.01', 'synonyms': ['prune'], 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'id': 864, 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'id': 865, 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'id': 866, 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'id': 867, 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'id': 868, 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'id': 869, 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'id': 870, 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'r', 'id': 871, 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'id': 872, 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'id': 873, 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'id': 874, 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'id': 875, 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'id': 876, 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'id': 877, 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'id': 878, 'synset': 'radar.n.01', 'synonyms': ['radar'], 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'c', 'id': 879, 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'id': 880, 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'id': 881, 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'id': 882, 'synset': 'raft.n.01', 'synonyms': ['raft'], 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'id': 883, 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'id': 884, 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'id': 885, 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'id': 886, 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'id': 887, 'synset': 'rat.n.01', 'synonyms': ['rat'], 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'id': 888, 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'id': 889, 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'id': 890, 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'def': 'car mirror that reflects the view out of the rear window', 'name': 'rearview_mirror'}, {'frequency': 'c', 'id': 891, 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'id': 892, 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'r', 'id': 893, 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'r', 'id': 894, 'synset': 'red_cabbage.n.02', 'synonyms': ['red_cabbage'], 'def': 'compact head of purplish-red leaves', 'name': 'red_cabbage'}, {'frequency': 'f', 'id': 895, 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'id': 896, 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'id': 897, 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'id': 898, 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'r', 'id': 899, 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'id': 900, 'synset': 'ring.n.08', 'synonyms': ['ring'], 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'id': 901, 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'id': 902, 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'id': 903, 'synset': 'robe.n.01', 'synonyms': ['robe'], 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'id': 904, 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'id': 905, 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'id': 906, 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'id': 907, 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'id': 908, 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'id': 909, 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'id': 910, 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'id': 911, 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'id': 912, 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'id': 913, 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'id': 914, 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'id': 915, 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'id': 916, 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'c', 'id': 917, 'synset': 'sail.n.01', 'synonyms': ['sail'], 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'c', 'id': 918, 'synset': 'salad.n.01', 'synonyms': ['salad'], 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'id': 919, 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'r', 'id': 920, 'synset': 'salami.n.01', 'synonyms': ['salami'], 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'r', 'id': 921, 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'id': 922, 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'r', 'id': 923, 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'id': 924, 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'id': 925, 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'id': 926, 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'id': 927, 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'id': 928, 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'id': 929, 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'id': 930, 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'id': 931, 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'id': 932, 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'id': 933, 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'id': 934, 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'id': 935, 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'id': 936, 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'id': 937, 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'c', 'id': 938, 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'c', 'id': 939, 'synset': 'scrambled_eggs.n.01', 'synonyms': ['scrambled_eggs'], 'def': 'eggs beaten and cooked to a soft firm consistency while stirring', 'name': 'scrambled_eggs'}, {'frequency': 'r', 'id': 940, 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'r', 'id': 941, 'synset': 'scratcher.n.03', 'synonyms': ['scratcher'], 'def': 'a device used for scratching', 'name': 'scratcher'}, {'frequency': 'c', 'id': 942, 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'c', 'id': 943, 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'id': 944, 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'r', 'id': 945, 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'r', 'id': 946, 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'id': 947, 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'id': 948, 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'r', 'id': 949, 'synset': 'seedling.n.01', 'synonyms': ['seedling'], 'def': 'young plant or tree grown from a seed', 'name': 'seedling'}, {'frequency': 'c', 'id': 950, 'synset': 'serving_dish.n.01', 'synonyms': ['serving_dish'], 'def': 'a dish used for serving food', 'name': 'serving_dish'}, {'frequency': 'r', 'id': 951, 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'r', 'id': 952, 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'id': 953, 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'r', 'id': 954, 'synset': 'shark.n.01', 'synonyms': ['shark'], 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'id': 955, 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'id': 956, 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'id': 957, 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'id': 958, 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'id': 959, 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'id': 960, 'synset': 'shears.n.01', 'synonyms': ['shears'], 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'id': 961, 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'id': 962, 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'id': 963, 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'r', 'id': 964, 'synset': 'shield.n.02', 'synonyms': ['shield'], 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'id': 965, 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'id': 966, 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'c', 'id': 967, 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'id': 968, 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'id': 969, 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'id': 970, 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'c', 'id': 971, 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'id': 972, 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'id': 973, 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'f', 'id': 974, 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'id': 975, 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'r', 'id': 976, 'synset': 'sieve.n.01', 'synonyms': ['sieve', 'screen_(sieve)'], 'def': 'a strainer for separating lumps from powdered material or grading particles', 'name': 'sieve'}, {'frequency': 'f', 'id': 977, 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'id': 978, 'synset': 'silo.n.01', 'synonyms': ['silo'], 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'id': 979, 'synset': 'sink.n.01', 'synonyms': ['sink'], 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'id': 980, 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'id': 981, 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'id': 982, 'synset': 'ski.n.01', 'synonyms': ['ski'], 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'id': 983, 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'id': 984, 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'id': 985, 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'id': 986, 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'c', 'id': 987, 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'id': 988, 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'id': 989, 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'id': 990, 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'id': 991, 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'id': 992, 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'id': 993, 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'id': 994, 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'id': 995, 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'id': 996, 'synset': 'soap.n.01', 'synonyms': ['soap'], 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'id': 997, 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'id': 998, 'synset': 'sock.n.01', 'synonyms': ['sock'], 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'r', 'id': 999, 'synset': 'soda_fountain.n.02', 'synonyms': ['soda_fountain'], 'def': 'an apparatus for dispensing soda water', 'name': 'soda_fountain'}, {'frequency': 'r', 'id': 1000, 'synset': 'soda_water.n.01', 'synonyms': ['carbonated_water', 'club_soda', 'seltzer', 'sparkling_water'], 'def': 'effervescent beverage artificially charged with carbon dioxide', 'name': 'carbonated_water'}, {'frequency': 'f', 'id': 1001, 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'id': 1002, 'synset': 'softball.n.01', 'synonyms': ['softball'], 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'id': 1003, 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'id': 1004, 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'c', 'id': 1005, 'synset': 'soup.n.01', 'synonyms': ['soup'], 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'id': 1006, 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'id': 1007, 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'id': 1008, 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'id': 1009, 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'id': 1010, 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'id': 1011, 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'id': 1012, 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'id': 1013, 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'id': 1014, 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'id': 1015, 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'r', 'id': 1016, 'synset': 'spider.n.01', 'synonyms': ['spider'], 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'c', 'id': 1017, 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'id': 1018, 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'id': 1019, 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'id': 1020, 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'id': 1021, 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'c', 'id': 1022, 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'r', 'id': 1023, 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'id': 1024, 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'id': 1025, 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'id': 1026, 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'r', 'id': 1027, 'synset': 'steamer.n.02', 'synonyms': ['steamer_(kitchen_appliance)'], 'def': 'a cooking utensil that can be used to cook food by steaming it', 'name': 'steamer_(kitchen_appliance)'}, {'frequency': 'f', 'id': 1028, 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'id': 1029, 'synset': 'stencil.n.01', 'synonyms': ['stencil'], 'def': 'a sheet of material (metal, plastic, etc.) that has been perforated with a pattern; ink or paint can pass through the perforations to create the printed pattern on the surface below', 'name': 'stencil'}, {'frequency': 'r', 'id': 1030, 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'id': 1031, 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'id': 1032, 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'id': 1033, 'synset': 'stew.n.02', 'synonyms': ['stew'], 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'id': 1034, 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'id': 1035, 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'c', 'id': 1036, 'synset': 'stocking.n.01', 'synonyms': ['stockings_(leg_wear)'], 'def': 'close-fitting hosiery to cover the foot and leg; come in matched pairs', 'name': 'stockings_(leg_wear)'}, {'frequency': 'f', 'id': 1037, 'synset': 'stool.n.01', 'synonyms': ['stool'], 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'id': 1038, 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'id': 1039, 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'id': 1040, 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'id': 1041, 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'id': 1042, 'synset': 'strap.n.01', 'synonyms': ['strap'], 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'id': 1043, 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'id': 1044, 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'id': 1045, 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'id': 1046, 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'id': 1047, 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'id': 1048, 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'def': 'a pointed tool for writing or drawing or engraving', 'name': 'stylus'}, {'frequency': 'r', 'id': 1049, 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'id': 1050, 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'id': 1051, 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'c', 'id': 1052, 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'id': 1053, 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'id': 1054, 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'id': 1055, 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'r', 'id': 1056, 'synset': 'sunscreen.n.01', 'synonyms': ['sunscreen', 'sunblock'], 'def': 'a cream spread on the skin; contains a chemical to filter out ultraviolet light and so protect from sunburn', 'name': 'sunscreen'}, {'frequency': 'f', 'id': 1057, 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'id': 1058, 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'id': 1059, 'synset': 'swab.n.02', 'synonyms': ['mop'], 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'id': 1060, 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'id': 1061, 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'id': 1062, 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'id': 1063, 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'id': 1064, 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'id': 1065, 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'id': 1066, 'synset': 'sword.n.01', 'synonyms': ['sword'], 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'id': 1067, 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'id': 1068, 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'id': 1069, 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'id': 1070, 'synset': 'table.n.02', 'synonyms': ['table'], 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'id': 1071, 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'id': 1072, 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'id': 1073, 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'id': 1074, 'synset': 'taco.n.02', 'synonyms': ['taco'], 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'id': 1075, 'synset': 'tag.n.02', 'synonyms': ['tag'], 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'id': 1076, 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'id': 1077, 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'id': 1078, 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'c', 'id': 1079, 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'id': 1080, 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'c', 'id': 1081, 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'id': 1082, 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'id': 1083, 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'id': 1084, 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'id': 1085, 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'id': 1086, 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'r', 'id': 1087, 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'id': 1088, 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'id': 1089, 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'c', 'id': 1090, 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'id': 1091, 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'id': 1092, 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'def': 'electronic device for communicating by voice over long distances', 'name': 'telephone'}, {'frequency': 'c', 'id': 1093, 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'id': 1094, 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'id': 1095, 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'id': 1096, 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'id': 1097, 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'id': 1098, 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'id': 1099, 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'id': 1100, 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'id': 1101, 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'id': 1102, 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'c', 'id': 1103, 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'id': 1104, 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'id': 1105, 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'id': 1106, 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'id': 1107, 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'id': 1108, 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'id': 1109, 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'id': 1110, 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'id': 1111, 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'r', 'id': 1112, 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'id': 1113, 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'id': 1114, 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'id': 1115, 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'c', 'id': 1116, 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'id': 1117, 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'id': 1118, 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'id': 1119, 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'c', 'id': 1120, 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'id': 1121, 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'id': 1122, 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'id': 1123, 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'c', 'id': 1124, 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'c', 'id': 1125, 'synset': 'top.n.09', 'synonyms': ['cover'], 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'id': 1126, 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'id': 1127, 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'id': 1128, 'synset': 'towel.n.01', 'synonyms': ['towel'], 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'id': 1129, 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'id': 1130, 'synset': 'toy.n.03', 'synonyms': ['toy'], 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'id': 1131, 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'id': 1132, 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'r', 'id': 1133, 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'c', 'id': 1134, 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'id': 1135, 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'id': 1136, 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'id': 1137, 'synset': 'tray.n.01', 'synonyms': ['tray'], 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'id': 1138, 'synset': 'tree_house.n.01', 'synonyms': ['tree_house'], 'def': '(NOT A TREE) a PLAYHOUSE built in the branches of a tree', 'name': 'tree_house'}, {'frequency': 'r', 'id': 1139, 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'id': 1140, 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'r', 'id': 1141, 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'c', 'id': 1142, 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'id': 1143, 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'id': 1144, 'synset': 'truck.n.01', 'synonyms': ['truck'], 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'id': 1145, 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'id': 1146, 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'id': 1147, 'synset': 'tub.n.02', 'synonyms': ['vat'], 'def': 'a large open vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'id': 1148, 'synset': 'turban.n.01', 'synonyms': ['turban'], 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'r', 'id': 1149, 'synset': 'turkey.n.01', 'synonyms': ['turkey_(bird)'], 'def': 'large gallinaceous bird with fan-shaped tail; widely domesticated for food', 'name': 'turkey_(bird)'}, {'frequency': 'c', 'id': 1150, 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'id': 1151, 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'id': 1152, 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'r', 'id': 1153, 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'r', 'id': 1154, 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'id': 1155, 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'c', 'id': 1156, 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'id': 1157, 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'c', 'id': 1158, 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'r', 'id': 1159, 'synset': 'urn.n.01', 'synonyms': ['urn'], 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'id': 1160, 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'c', 'id': 1161, 'synset': 'valve.n.03', 'synonyms': ['valve'], 'def': 'control consisting of a mechanical device for controlling the flow of a fluid', 'name': 'valve'}, {'frequency': 'f', 'id': 1162, 'synset': 'vase.n.01', 'synonyms': ['vase'], 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'id': 1163, 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'id': 1164, 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'c', 'id': 1165, 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'id': 1166, 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'id': 1167, 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'id': 1168, 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'r', 'id': 1169, 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'id': 1170, 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'id': 1171, 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'id': 1172, 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'id': 1173, 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'id': 1174, 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'id': 1175, 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'id': 1176, 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'id': 1177, 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'c', 'id': 1178, 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'id': 1179, 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'id': 1180, 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'id': 1181, 'synset': 'wasabi.n.02', 'synonyms': ['wasabi'], 'def': 'the thick green root of the wasabi plant that the Japanese use in cooking and that tastes like strong horseradish', 'name': 'wasabi'}, {'frequency': 'c', 'id': 1182, 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'id': 1183, 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'id': 1184, 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'id': 1185, 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'id': 1186, 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'id': 1187, 'synset': 'water_filter.n.01', 'synonyms': ['water_filter'], 'def': 'a filter to remove impurities from the water supply', 'name': 'water_filter'}, {'frequency': 'r', 'id': 1188, 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'r', 'id': 1189, 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'id': 1190, 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'id': 1191, 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'id': 1192, 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'id': 1193, 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'id': 1194, 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'c', 'id': 1195, 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'id': 1196, 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'id': 1197, 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'id': 1198, 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'id': 1199, 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'id': 1200, 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'id': 1201, 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'id': 1202, 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'id': 1203, 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'r', 'id': 1204, 'synset': 'whiskey.n.01', 'synonyms': ['whiskey'], 'def': 'a liquor made from fermented mash of grain', 'name': 'whiskey'}, {'frequency': 'r', 'id': 1205, 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'r', 'id': 1206, 'synset': 'wick.n.02', 'synonyms': ['wick'], 'def': 'a loosely woven cord in a candle or oil lamp that is lit on fire', 'name': 'wick'}, {'frequency': 'c', 'id': 1207, 'synset': 'wig.n.01', 'synonyms': ['wig'], 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'id': 1208, 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'id': 1209, 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'def': 'a mill that is powered by the wind', 'name': 'windmill'}, {'frequency': 'c', 'id': 1210, 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'id': 1211, 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'id': 1212, 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'id': 1213, 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'r', 'id': 1214, 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'id': 1215, 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'r', 'id': 1216, 'synset': 'wing_chair.n.01', 'synonyms': ['wing_chair'], 'def': 'easy chair having wings on each side of a high back', 'name': 'wing_chair'}, {'frequency': 'c', 'id': 1217, 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'id': 1218, 'synset': 'wok.n.01', 'synonyms': ['wok'], 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'id': 1219, 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'id': 1220, 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'id': 1221, 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'id': 1222, 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'c', 'id': 1223, 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'id': 1224, 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'r', 'id': 1225, 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'r', 'id': 1226, 'synset': 'yak.n.02', 'synonyms': ['yak'], 'def': 'large long-haired wild ox of Tibet often domesticated', 'name': 'yak'}, {'frequency': 'c', 'id': 1227, 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'r', 'id': 1228, 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'id': 1229, 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'id': 1230, 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}] # noqa -# fmt: on diff --git a/detectron2/detectron2/data/datasets/lvis_v1_categories.py b/detectron2/detectron2/data/datasets/lvis_v1_categories.py deleted file mode 100644 index 7374e6968bb006f5d8c49e75d9d3b31ea3d77d05..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/lvis_v1_categories.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# Autogen with -# with open("lvis_v1_val.json", "r") as f: -# a = json.load(f) -# c = a["categories"] -# for x in c: -# del x["image_count"] -# del x["instance_count"] -# LVIS_CATEGORIES = repr(c) + " # noqa" -# with open("/tmp/lvis_categories.py", "wt") as f: -# f.write(f"LVIS_CATEGORIES = {LVIS_CATEGORIES}") -# Then paste the contents of that file below - -# fmt: off -LVIS_CATEGORIES = [{'frequency': 'c', 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'id': 1, 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'id': 2, 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'id': 3, 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'f', 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'id': 4, 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'id': 5, 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'c', 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'id': 6, 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'synset': 'almond.n.02', 'synonyms': ['almond'], 'id': 7, 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'id': 8, 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'c', 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'id': 9, 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'id': 10, 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'id': 11, 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'synset': 'apple.n.01', 'synonyms': ['apple'], 'id': 12, 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'id': 13, 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'id': 14, 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'synset': 'apron.n.01', 'synonyms': ['apron'], 'id': 15, 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'id': 16, 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'r', 'synset': 'arctic.n.02', 'synonyms': ['arctic_(type_of_shoe)', 'galosh', 'golosh', 'rubber_(type_of_shoe)', 'gumshoe'], 'id': 17, 'def': 'a waterproof overshoe that protects shoes from water or snow', 'name': 'arctic_(type_of_shoe)'}, {'frequency': 'c', 'synset': 'armband.n.02', 'synonyms': ['armband'], 'id': 18, 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'id': 19, 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'id': 20, 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'id': 21, 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'id': 22, 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'id': 23, 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'id': 24, 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'id': 25, 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'id': 26, 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'f', 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'id': 27, 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'id': 28, 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'synset': 'awning.n.01', 'synonyms': ['awning'], 'id': 29, 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'id': 30, 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'r', 'synset': 'baboon.n.01', 'synonyms': ['baboon'], 'id': 31, 'def': 'large terrestrial monkeys having doglike muzzles', 'name': 'baboon'}, {'frequency': 'f', 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'id': 32, 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'id': 33, 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'id': 34, 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'id': 35, 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'id': 36, 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'id': 37, 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'id': 38, 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'id': 39, 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'id': 40, 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'synset': 'ball.n.06', 'synonyms': ['ball'], 'id': 41, 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'id': 42, 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'id': 43, 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'id': 44, 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'synset': 'banana.n.02', 'synonyms': ['banana'], 'id': 45, 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'c', 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'id': 46, 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'id': 47, 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'f', 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'id': 48, 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'id': 49, 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'id': 50, 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'id': 51, 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'synset': 'barge.n.01', 'synonyms': ['barge'], 'id': 52, 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'id': 53, 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'id': 54, 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'id': 55, 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'id': 56, 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'id': 57, 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'id': 58, 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'id': 59, 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'id': 60, 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'id': 61, 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'id': 62, 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'id': 63, 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'c', 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'id': 64, 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'id': 65, 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'id': 66, 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'id': 67, 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'id': 68, 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'id': 69, 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'synset': 'battery.n.02', 'synonyms': ['battery'], 'id': 70, 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'id': 71, 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'synset': 'bead.n.01', 'synonyms': ['bead'], 'id': 72, 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'c', 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'id': 73, 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'id': 74, 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'id': 75, 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'synset': 'bear.n.01', 'synonyms': ['bear'], 'id': 76, 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'synset': 'bed.n.01', 'synonyms': ['bed'], 'id': 77, 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'r', 'synset': 'bedpan.n.01', 'synonyms': ['bedpan'], 'id': 78, 'def': 'a shallow vessel used by a bedridden patient for defecation and urination', 'name': 'bedpan'}, {'frequency': 'f', 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'id': 79, 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'synset': 'beef.n.01', 'synonyms': ['cow'], 'id': 80, 'def': 'cattle/cow', 'name': 'cow'}, {'frequency': 'f', 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'id': 81, 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'id': 82, 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'id': 83, 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'id': 84, 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'id': 85, 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'synset': 'bell.n.01', 'synonyms': ['bell'], 'id': 86, 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'id': 87, 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'synset': 'belt.n.02', 'synonyms': ['belt'], 'id': 88, 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'id': 89, 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'synset': 'bench.n.01', 'synonyms': ['bench'], 'id': 90, 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'synset': 'beret.n.01', 'synonyms': ['beret'], 'id': 91, 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'synset': 'bib.n.02', 'synonyms': ['bib'], 'id': 92, 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'id': 93, 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'id': 94, 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'id': 95, 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'f', 'synset': 'billboard.n.01', 'synonyms': ['billboard'], 'id': 96, 'def': 'large outdoor signboard', 'name': 'billboard'}, {'frequency': 'c', 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'id': 97, 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'id': 98, 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'synset': 'bird.n.01', 'synonyms': ['bird'], 'id': 99, 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'c', 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'id': 100, 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'c', 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'id': 101, 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'id': 102, 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'id': 103, 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'id': 104, 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'id': 105, 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'id': 106, 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'id': 107, 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'synset': 'blackberry.n.01', 'synonyms': ['blackberry'], 'id': 108, 'def': 'large sweet black or very dark purple edible aggregate fruit', 'name': 'blackberry'}, {'frequency': 'f', 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'id': 109, 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'id': 110, 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'id': 111, 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'id': 112, 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'id': 113, 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'f', 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'id': 114, 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'f', 'synset': 'blouse.n.01', 'synonyms': ['blouse'], 'id': 115, 'def': 'a top worn by women', 'name': 'blouse'}, {'frequency': 'f', 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'id': 116, 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'id': 117, 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'id': 118, 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'r', 'synset': 'bob.n.05', 'synonyms': ['bob', 'bobber', 'bobfloat'], 'id': 119, 'def': 'a small float usually made of cork; attached to a fishing line', 'name': 'bob'}, {'frequency': 'c', 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'id': 120, 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'c', 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'id': 121, 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'id': 122, 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'id': 123, 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'id': 124, 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'id': 125, 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'id': 126, 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'synset': 'book.n.01', 'synonyms': ['book'], 'id': 127, 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'c', 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'id': 128, 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'id': 129, 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'id': 130, 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'id': 131, 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'synset': 'boot.n.01', 'synonyms': ['boot'], 'id': 132, 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'id': 133, 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'id': 134, 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'id': 135, 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'id': 136, 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'id': 137, 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'id': 138, 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'id': 139, 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'id': 140, 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'id': 141, 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'id': 142, 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'f', 'synset': 'box.n.01', 'synonyms': ['box'], 'id': 143, 'def': 'a (usually rectangular) container; may have a lid', 'name': 'box'}, {'frequency': 'r', 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'id': 144, 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'id': 145, 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'id': 146, 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'id': 147, 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'id': 148, 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'id': 149, 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'f', 'synset': 'bread.n.01', 'synonyms': ['bread'], 'id': 150, 'def': 'food made from dough of flour or meal and usually raised with yeast or baking powder and then baked', 'name': 'bread'}, {'frequency': 'r', 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'id': 151, 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'f', 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'id': 152, 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'id': 153, 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'f', 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'id': 154, 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'id': 155, 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'synset': 'broom.n.01', 'synonyms': ['broom'], 'id': 156, 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'id': 157, 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'id': 158, 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'id': 159, 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'id': 160, 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'id': 161, 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'synset': 'bull.n.11', 'synonyms': ['horned_cow'], 'id': 162, 'def': 'a cow with horns', 'name': 'bull'}, {'frequency': 'c', 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'id': 163, 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'id': 164, 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'id': 165, 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'id': 166, 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'id': 167, 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'id': 168, 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'f', 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'id': 169, 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'id': 170, 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'id': 171, 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'id': 172, 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'id': 173, 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'id': 174, 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'f', 'synset': 'butter.n.01', 'synonyms': ['butter'], 'id': 175, 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'id': 176, 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'synset': 'button.n.01', 'synonyms': ['button'], 'id': 177, 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'id': 178, 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'id': 179, 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'c', 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'id': 180, 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'id': 181, 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'id': 182, 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'synset': 'cake.n.03', 'synonyms': ['cake'], 'id': 183, 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'id': 184, 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'id': 185, 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'synset': 'calf.n.01', 'synonyms': ['calf'], 'id': 186, 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'id': 187, 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'synset': 'camel.n.01', 'synonyms': ['camel'], 'id': 188, 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'synset': 'camera.n.01', 'synonyms': ['camera'], 'id': 189, 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'id': 190, 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'id': 191, 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'id': 192, 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'id': 193, 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'f', 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'id': 194, 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'id': 195, 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'id': 196, 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'id': 197, 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'id': 198, 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'id': 199, 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'c', 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'id': 200, 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'c', 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'id': 201, 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'id': 202, 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'f', 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'id': 203, 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'id': 204, 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'c', 'synset': 'cape.n.02', 'synonyms': ['cape'], 'id': 205, 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'id': 206, 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'id': 207, 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'id': 208, 'def': 'a wheeled vehicle adapted to the rails of railroad (mark each individual railcar separately)', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'id': 209, 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'id': 210, 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'id': 211, 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'synset': 'card.n.03', 'synonyms': ['card'], 'id': 212, 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'c', 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'id': 213, 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'id': 214, 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'id': 215, 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'id': 216, 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'id': 217, 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'f', 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'id': 218, 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'synset': 'cart.n.01', 'synonyms': ['cart'], 'id': 219, 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'synset': 'carton.n.02', 'synonyms': ['carton'], 'id': 220, 'def': 'a container made of cardboard for holding food or drink', 'name': 'carton'}, {'frequency': 'c', 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'id': 221, 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'id': 222, 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'id': 223, 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'id': 224, 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'synset': 'cat.n.01', 'synonyms': ['cat'], 'id': 225, 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'f', 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'id': 226, 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'c', 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'id': 227, 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'id': 228, 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'f', 'synset': 'celery.n.01', 'synonyms': ['celery'], 'id': 229, 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'id': 230, 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'id': 231, 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'synset': 'chair.n.01', 'synonyms': ['chair'], 'id': 232, 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'id': 233, 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'synset': 'chalice.n.01', 'synonyms': ['chalice'], 'id': 234, 'def': 'a bowl-shaped drinking vessel; especially the Eucharistic cup', 'name': 'chalice'}, {'frequency': 'f', 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'id': 235, 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'synset': 'chap.n.04', 'synonyms': ['chap'], 'id': 236, 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'id': 237, 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'id': 238, 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'id': 239, 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'id': 240, 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'c', 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'id': 241, 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'id': 242, 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'c', 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'id': 243, 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'id': 244, 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'id': 245, 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'id': 246, 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'id': 247, 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'id': 248, 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'id': 249, 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'id': 250, 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'id': 251, 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'id': 252, 'def': 'shirt collar, animal collar, or tight-fitting necklace', 'name': 'choker'}, {'frequency': 'f', 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'id': 253, 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'f', 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'id': 254, 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'id': 255, 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'synset': 'chute.n.02', 'synonyms': ['slide'], 'id': 256, 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'id': 257, 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'id': 258, 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'f', 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'id': 259, 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'id': 260, 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'id': 261, 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'id': 262, 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'c', 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'id': 263, 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'id': 264, 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'synset': 'cleat.n.02', 'synonyms': ['cleat_(for_securing_rope)'], 'id': 265, 'def': 'a fastener (usually with two projecting horns) around which a rope can be secured', 'name': 'cleat_(for_securing_rope)'}, {'frequency': 'r', 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'id': 266, 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'synset': 'clip.n.03', 'synonyms': ['clip'], 'id': 267, 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'id': 268, 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'r', 'synset': 'clipper.n.03', 'synonyms': ['clippers_(for_plants)'], 'id': 269, 'def': 'shears for cutting grass or shrubbery (often used in the plural)', 'name': 'clippers_(for_plants)'}, {'frequency': 'r', 'synset': 'cloak.n.02', 'synonyms': ['cloak'], 'id': 270, 'def': 'a loose outer garment', 'name': 'cloak'}, {'frequency': 'f', 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'id': 271, 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'id': 272, 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'id': 273, 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'id': 274, 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'id': 275, 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'id': 276, 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'synset': 'coat.n.01', 'synonyms': ['coat'], 'id': 277, 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'id': 278, 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'c', 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'id': 279, 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'id': 280, 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'r', 'synset': 'cockroach.n.01', 'synonyms': ['cockroach'], 'id': 281, 'def': 'any of numerous chiefly nocturnal insects; some are domestic pests', 'name': 'cockroach'}, {'frequency': 'r', 'synset': 'cocoa.n.01', 'synonyms': ['cocoa_(beverage)', 'hot_chocolate_(beverage)', 'drinking_chocolate'], 'id': 282, 'def': 'a beverage made from cocoa powder and milk and sugar; usually drunk hot', 'name': 'cocoa_(beverage)'}, {'frequency': 'c', 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'id': 283, 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'f', 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'id': 284, 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'id': 285, 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'id': 286, 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'synset': 'coil.n.05', 'synonyms': ['coil'], 'id': 287, 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'synset': 'coin.n.01', 'synonyms': ['coin'], 'id': 288, 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'c', 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'id': 289, 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'id': 290, 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'id': 291, 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'id': 292, 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'id': 293, 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'id': 294, 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'r', 'synset': 'compass.n.01', 'synonyms': ['compass'], 'id': 295, 'def': 'navigational instrument for finding directions', 'name': 'compass'}, {'frequency': 'f', 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'id': 296, 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'f', 'synset': 'condiment.n.01', 'synonyms': ['condiment'], 'id': 297, 'def': 'a preparation (a sauce or relish or spice) to enhance flavor or enjoyment', 'name': 'condiment'}, {'frequency': 'f', 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'id': 298, 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'id': 299, 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'id': 300, 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'id': 301, 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'r', 'synset': 'cooker.n.01', 'synonyms': ['cooker'], 'id': 302, 'def': 'a utensil for cooking', 'name': 'cooker'}, {'frequency': 'f', 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'id': 303, 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'id': 304, 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'id': 305, 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'f', 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'id': 306, 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'id': 307, 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'c', 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'id': 308, 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'f', 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'id': 309, 'def': 'ears or kernels of corn that can be prepared and served for human food (only mark individual ears or kernels)', 'name': 'edible_corn'}, {'frequency': 'r', 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'id': 310, 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'id': 311, 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'id': 312, 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'id': 313, 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'c', 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'id': 314, 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'c', 'synset': 'costume.n.04', 'synonyms': ['costume'], 'id': 315, 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'id': 316, 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'id': 317, 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'c', 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'id': 318, 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'id': 319, 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'c', 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'id': 320, 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'r', 'synset': 'crab.n.05', 'synonyms': ['crabmeat'], 'id': 321, 'def': 'the edible flesh of any of various crabs', 'name': 'crabmeat'}, {'frequency': 'c', 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'id': 322, 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'id': 323, 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'synset': 'crate.n.01', 'synonyms': ['crate'], 'id': 324, 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'c', 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'id': 325, 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'id': 326, 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'c', 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'id': 327, 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'id': 328, 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'id': 329, 'def': 'an earthen jar (made of baked clay) or a modern electric crockpot', 'name': 'crock_pot'}, {'frequency': 'f', 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'id': 330, 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'id': 331, 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'c', 'synset': 'crow.n.01', 'synonyms': ['crow'], 'id': 332, 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'r', 'synset': 'crowbar.n.01', 'synonyms': ['crowbar', 'wrecking_bar', 'pry_bar'], 'id': 333, 'def': 'a heavy iron lever with one end forged into a wedge', 'name': 'crowbar'}, {'frequency': 'c', 'synset': 'crown.n.04', 'synonyms': ['crown'], 'id': 334, 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'id': 335, 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'id': 336, 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'id': 337, 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'f', 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'id': 338, 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'c', 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'id': 339, 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'id': 340, 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'c', 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'id': 341, 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'id': 342, 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'id': 343, 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'synset': 'cup.n.01', 'synonyms': ['cup'], 'id': 344, 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'id': 345, 'def': 'a metal award or cup-shaped vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'f', 'synset': 'cupboard.n.01', 'synonyms': ['cupboard', 'closet'], 'id': 346, 'def': 'a small room (or recess) or cabinet used for storage space', 'name': 'cupboard'}, {'frequency': 'f', 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'id': 347, 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'id': 348, 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'id': 349, 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'id': 350, 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'id': 351, 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'id': 352, 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'id': 353, 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'id': 354, 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'synset': 'dalmatian.n.02', 'synonyms': ['dalmatian'], 'id': 355, 'def': 'a large breed having a smooth white coat with black or brown spots', 'name': 'dalmatian'}, {'frequency': 'c', 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'id': 356, 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'id': 357, 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'id': 358, 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'id': 359, 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'id': 360, 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'synset': 'desk.n.01', 'synonyms': ['desk'], 'id': 361, 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'id': 362, 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'id': 363, 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'id': 364, 'def': 'yearly planner book', 'name': 'diary'}, {'frequency': 'r', 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'id': 365, 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'id': 366, 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'id': 367, 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'id': 368, 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'f', 'synset': 'dish.n.01', 'synonyms': ['dish'], 'id': 369, 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'id': 370, 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'id': 371, 'def': 'a cloth for washing dishes or cleaning in general', 'name': 'dishrag'}, {'frequency': 'f', 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'id': 372, 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'id': 373, 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid', 'dishsoap'], 'id': 374, 'def': 'dishsoap or dish detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'f', 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'id': 375, 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'r', 'synset': 'diving_board.n.01', 'synonyms': ['diving_board'], 'id': 376, 'def': 'a springboard from which swimmers can dive', 'name': 'diving_board'}, {'frequency': 'f', 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'id': 377, 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'synset': 'dog.n.01', 'synonyms': ['dog'], 'id': 378, 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'id': 379, 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'f', 'synset': 'doll.n.01', 'synonyms': ['doll'], 'id': 380, 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'id': 381, 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'synset': 'dollhouse.n.01', 'synonyms': ['dollhouse', "doll's_house"], 'id': 382, 'def': "a house so small that it is likened to a child's plaything", 'name': 'dollhouse'}, {'frequency': 'c', 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'id': 383, 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'id': 384, 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'f', 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'id': 385, 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'id': 386, 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'id': 387, 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'synset': 'dove.n.01', 'synonyms': ['dove'], 'id': 388, 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'id': 389, 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'id': 390, 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'id': 391, 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'id': 392, 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'id': 393, 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'f', 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'id': 394, 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'f', 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'id': 395, 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'synset': 'drill.n.01', 'synonyms': ['drill'], 'id': 396, 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'synset': 'drone.n.04', 'synonyms': ['drone'], 'id': 397, 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'id': 398, 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'id': 399, 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'id': 400, 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'synset': 'duck.n.01', 'synonyms': ['duck'], 'id': 401, 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'c', 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'id': 402, 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'id': 403, 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'id': 404, 'def': 'a large cylindrical bag of heavy cloth (does not include suitcases)', 'name': 'duffel_bag'}, {'frequency': 'r', 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'id': 405, 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'id': 406, 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'id': 407, 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'c', 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'id': 408, 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'id': 409, 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'id': 410, 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'synset': 'earring.n.01', 'synonyms': ['earring'], 'id': 411, 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'synset': 'easel.n.01', 'synonyms': ['easel'], 'id': 412, 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'id': 413, 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'synset': 'eel.n.01', 'synonyms': ['eel'], 'id': 414, 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'id': 415, 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'id': 416, 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'id': 417, 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'id': 418, 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'id': 419, 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'id': 420, 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'id': 421, 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'id': 422, 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'c', 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'id': 423, 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'id': 424, 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'id': 425, 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'id': 426, 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'id': 427, 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'id': 428, 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'synset': 'fan.n.01', 'synonyms': ['fan'], 'id': 429, 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'id': 430, 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'id': 431, 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'id': 432, 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'id': 433, 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'c', 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'id': 434, 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'id': 435, 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'id': 436, 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'id': 437, 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'id': 438, 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'id': 439, 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'id': 440, 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'f', 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'id': 441, 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'f', 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'id': 442, 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'id': 443, 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'id': 444, 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'id': 445, 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'r', 'synset': 'first-aid_kit.n.01', 'synonyms': ['first-aid_kit'], 'id': 446, 'def': 'kit consisting of a set of bandages and medicines for giving first aid', 'name': 'first-aid_kit'}, {'frequency': 'f', 'synset': 'fish.n.01', 'synonyms': ['fish'], 'id': 447, 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'c', 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'id': 448, 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'id': 449, 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'c', 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'id': 450, 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'synset': 'flag.n.01', 'synonyms': ['flag'], 'id': 451, 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'id': 452, 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'id': 453, 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'id': 454, 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'c', 'synset': 'flap.n.01', 'synonyms': ['flap'], 'id': 455, 'def': 'any broad thin covering attached at one edge, such as a mud flap next to a wheel or a flap on an airplane wing', 'name': 'flap'}, {'frequency': 'r', 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'id': 456, 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'id': 457, 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'id': 458, 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'id': 459, 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'id': 460, 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'id': 461, 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'id': 462, 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'c', 'synset': 'foal.n.01', 'synonyms': ['foal'], 'id': 463, 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'id': 464, 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'id': 465, 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'id': 466, 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'id': 467, 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'id': 468, 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'synset': 'fork.n.01', 'synonyms': ['fork'], 'id': 469, 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'c', 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'id': 470, 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'c', 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'id': 471, 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'c', 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'id': 472, 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'id': 473, 'def': 'anything that freshens air by removing or covering odor', 'name': 'freshener'}, {'frequency': 'f', 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'id': 474, 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'id': 475, 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'id': 476, 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'f', 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'id': 477, 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'id': 478, 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'id': 479, 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'r', 'synset': 'futon.n.01', 'synonyms': ['futon'], 'id': 480, 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'id': 481, 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'id': 482, 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'id': 483, 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'id': 484, 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'id': 485, 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'id': 486, 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'id': 487, 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'id': 488, 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'c', 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'id': 489, 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'id': 490, 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'id': 491, 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'r', 'synset': 'generator.n.02', 'synonyms': ['generator'], 'id': 492, 'def': 'engine that converts mechanical energy into electrical energy by electromagnetic induction', 'name': 'generator'}, {'frequency': 'c', 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'id': 493, 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'id': 494, 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'id': 495, 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'id': 496, 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'id': 497, 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'id': 498, 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'synset': 'globe.n.03', 'synonyms': ['globe'], 'id': 499, 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'synset': 'glove.n.02', 'synonyms': ['glove'], 'id': 500, 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'synset': 'goat.n.01', 'synonyms': ['goat'], 'id': 501, 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'id': 502, 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'id': 503, 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'c', 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'id': 504, 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'id': 505, 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'id': 506, 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'synset': 'goose.n.01', 'synonyms': ['goose'], 'id': 507, 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'id': 508, 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'id': 509, 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'f', 'synset': 'grape.n.01', 'synonyms': ['grape'], 'id': 510, 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'c', 'synset': 'grater.n.01', 'synonyms': ['grater'], 'id': 511, 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'id': 512, 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'id': 513, 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'f', 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'id': 514, 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'f', 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'id': 515, 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'id': 516, 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'f', 'synset': 'grill.n.02', 'synonyms': ['grill', 'grille', 'grillwork', 'radiator_grille'], 'id': 517, 'def': 'a framework of metal bars used as a partition or a grate', 'name': 'grill'}, {'frequency': 'r', 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'id': 518, 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'id': 519, 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'id': 520, 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'f', 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'id': 521, 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'id': 522, 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'synset': 'gun.n.01', 'synonyms': ['gun'], 'id': 523, 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'f', 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'id': 524, 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'id': 525, 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'id': 526, 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'r', 'synset': 'halter.n.03', 'synonyms': ['halter_top'], 'id': 527, 'def': "a woman's top that fastens behind the back and neck leaving the back and arms uncovered", 'name': 'halter_top'}, {'frequency': 'f', 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'id': 528, 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'id': 529, 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'id': 530, 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'c', 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'id': 531, 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'id': 532, 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'c', 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'id': 533, 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'f', 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'id': 534, 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'id': 535, 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'id': 536, 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'id': 537, 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'id': 538, 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'id': 539, 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'id': 540, 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'id': 541, 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'id': 542, 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'id': 543, 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'synset': 'hat.n.01', 'synonyms': ['hat'], 'id': 544, 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'id': 545, 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'c', 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'id': 546, 'def': 'a garment that covers the head OR face', 'name': 'veil'}, {'frequency': 'f', 'synset': 'headband.n.01', 'synonyms': ['headband'], 'id': 547, 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'id': 548, 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'id': 549, 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'id': 550, 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'synset': 'headset.n.01', 'synonyms': ['headset'], 'id': 551, 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'id': 552, 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'c', 'synset': 'heart.n.02', 'synonyms': ['heart'], 'id': 553, 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'id': 554, 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'id': 555, 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'id': 556, 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'synset': 'heron.n.02', 'synonyms': ['heron'], 'id': 557, 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'id': 558, 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'id': 559, 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'id': 560, 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'id': 561, 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'id': 562, 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'id': 563, 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'synset': 'honey.n.01', 'synonyms': ['honey'], 'id': 564, 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'id': 565, 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'synset': 'hook.n.05', 'synonyms': ['hook'], 'id': 566, 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'r', 'synset': 'hookah.n.01', 'synonyms': ['hookah', 'narghile', 'nargileh', 'sheesha', 'shisha', 'water_pipe'], 'id': 567, 'def': 'a tobacco pipe with a long flexible tube connected to a container where the smoke is cooled by passing through water', 'name': 'hookah'}, {'frequency': 'r', 'synset': 'hornet.n.01', 'synonyms': ['hornet'], 'id': 568, 'def': 'large stinging wasp', 'name': 'hornet'}, {'frequency': 'f', 'synset': 'horse.n.01', 'synonyms': ['horse'], 'id': 569, 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'id': 570, 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'id': 571, 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'id': 572, 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'id': 573, 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'id': 574, 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'id': 575, 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'c', 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'id': 576, 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'id': 577, 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'f', 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'id': 578, 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'id': 579, 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'id': 580, 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'id': 581, 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'id': 582, 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'id': 583, 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'c', 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'id': 584, 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'id': 585, 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'f', 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'id': 586, 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'id': 587, 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'c', 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'id': 588, 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'id': 589, 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'c', 'synset': 'jam.n.01', 'synonyms': ['jam'], 'id': 590, 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'synset': 'jar.n.01', 'synonyms': ['jar'], 'id': 591, 'def': 'a vessel (usually cylindrical) with a wide mouth and without handles', 'name': 'jar'}, {'frequency': 'f', 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'id': 592, 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'id': 593, 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'id': 594, 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'id': 595, 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'id': 596, 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'r', 'synset': 'jewel.n.01', 'synonyms': ['jewel', 'gem', 'precious_stone'], 'id': 597, 'def': 'a precious or semiprecious stone incorporated into a piece of jewelry', 'name': 'jewel'}, {'frequency': 'c', 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'id': 598, 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'id': 599, 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'c', 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'id': 600, 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'id': 601, 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'synset': 'keg.n.02', 'synonyms': ['keg'], 'id': 602, 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'id': 603, 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'id': 604, 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'synset': 'key.n.01', 'synonyms': ['key'], 'id': 605, 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'id': 606, 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'c', 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'id': 607, 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'id': 608, 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'id': 609, 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'r', 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'id': 610, 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'synset': 'kite.n.03', 'synonyms': ['kite'], 'id': 611, 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'id': 612, 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'id': 613, 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'id': 614, 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'synset': 'knife.n.01', 'synonyms': ['knife'], 'id': 615, 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'id': 616, 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'synset': 'knob.n.02', 'synonyms': ['knob'], 'id': 617, 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'id': 618, 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'id': 619, 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'id': 620, 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'id': 621, 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'id': 622, 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'c', 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'id': 623, 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'f', 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'id': 624, 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'id': 625, 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'id': 626, 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'id': 627, 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'id': 628, 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'id': 629, 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'id': 630, 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'id': 631, 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'id': 632, 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'f', 'synset': 'latch.n.02', 'synonyms': ['latch'], 'id': 633, 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'id': 634, 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'synset': 'leather.n.01', 'synonyms': ['leather'], 'id': 635, 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'id': 636, 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'id': 637, 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'r', 'synset': 'legume.n.02', 'synonyms': ['legume'], 'id': 638, 'def': 'the fruit or seed of bean or pea plants', 'name': 'legume'}, {'frequency': 'f', 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'id': 639, 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'id': 640, 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'id': 641, 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'id': 642, 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'id': 643, 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'id': 644, 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'id': 645, 'def': 'lightblub/source of light', 'name': 'lightbulb'}, {'frequency': 'r', 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'id': 646, 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'f', 'synset': 'lime.n.06', 'synonyms': ['lime'], 'id': 647, 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'id': 648, 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'c', 'synset': 'lion.n.01', 'synonyms': ['lion'], 'id': 649, 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'id': 650, 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'r', 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'id': 651, 'def': 'liquor or beer', 'name': 'liquor'}, {'frequency': 'c', 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'id': 652, 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'f', 'synset': 'log.n.01', 'synonyms': ['log'], 'id': 653, 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'id': 654, 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'f', 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'id': 655, 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'id': 656, 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'id': 657, 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'id': 658, 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'id': 659, 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'c', 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'id': 660, 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'f', 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'id': 661, 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'synset': 'mallard.n.01', 'synonyms': ['mallard'], 'id': 662, 'def': 'wild dabbling duck from which domestic ducks are descended', 'name': 'mallard'}, {'frequency': 'r', 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'id': 663, 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'id': 664, 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'r', 'synset': 'manatee.n.01', 'synonyms': ['manatee'], 'id': 665, 'def': 'sirenian mammal of tropical coastal waters of America', 'name': 'manatee'}, {'frequency': 'c', 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'id': 666, 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'id': 667, 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'id': 668, 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'f', 'synset': 'map.n.01', 'synonyms': ['map'], 'id': 669, 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'f', 'synset': 'marker.n.03', 'synonyms': ['marker'], 'id': 670, 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'synset': 'martini.n.01', 'synonyms': ['martini'], 'id': 671, 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'id': 672, 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'id': 673, 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'synset': 'masher.n.02', 'synonyms': ['masher'], 'id': 674, 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'id': 675, 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'synset': 'mast.n.01', 'synonyms': ['mast'], 'id': 676, 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'id': 677, 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'id': 678, 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'id': 679, 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'id': 680, 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'id': 681, 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'id': 682, 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'id': 683, 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'c', 'synset': 'melon.n.01', 'synonyms': ['melon'], 'id': 684, 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'id': 685, 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'id': 686, 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'id': 687, 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'id': 688, 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'f', 'synset': 'milk.n.01', 'synonyms': ['milk'], 'id': 689, 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'r', 'synset': 'milk_can.n.01', 'synonyms': ['milk_can'], 'id': 690, 'def': 'can for transporting milk', 'name': 'milk_can'}, {'frequency': 'r', 'synset': 'milkshake.n.01', 'synonyms': ['milkshake'], 'id': 691, 'def': 'frothy drink of milk and flavoring and sometimes fruit or ice cream', 'name': 'milkshake'}, {'frequency': 'f', 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'id': 692, 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'id': 693, 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'id': 694, 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'id': 695, 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'id': 696, 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'synset': 'money.n.03', 'synonyms': ['money'], 'id': 697, 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'id': 698, 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'id': 699, 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'synset': 'motor.n.01', 'synonyms': ['motor'], 'id': 700, 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'id': 701, 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'id': 702, 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'f', 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'id': 703, 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'id': 704, 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'f', 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'id': 705, 'def': 'a computer input device that controls an on-screen pointer (does not include trackpads / touchpads)', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'id': 706, 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'id': 707, 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'synset': 'mug.n.04', 'synonyms': ['mug'], 'id': 708, 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'id': 709, 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'id': 710, 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'c', 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'id': 711, 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'id': 712, 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'f', 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'id': 713, 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'id': 714, 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'id': 715, 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'id': 716, 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'c', 'synset': 'needle.n.03', 'synonyms': ['needle'], 'id': 717, 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'synset': 'nest.n.01', 'synonyms': ['nest'], 'id': 718, 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'f', 'synset': 'newspaper.n.01', 'synonyms': ['newspaper', 'paper_(newspaper)'], 'id': 719, 'def': 'a daily or weekly publication on folded sheets containing news, articles, and advertisements', 'name': 'newspaper'}, {'frequency': 'c', 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'id': 720, 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'id': 721, 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'id': 722, 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'c', 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'id': 723, 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'id': 724, 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'id': 725, 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'f', 'synset': 'nut.n.03', 'synonyms': ['nut'], 'id': 726, 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'id': 727, 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'f', 'synset': 'oar.n.01', 'synonyms': ['oar'], 'id': 728, 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'id': 729, 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'id': 730, 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'id': 731, 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'id': 732, 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'id': 733, 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'synset': 'onion.n.01', 'synonyms': ['onion'], 'id': 734, 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'id': 735, 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'id': 736, 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'c', 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'id': 737, 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'f', 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'id': 738, 'def': 'a thick standalone cushion used as a seat or footrest, often next to a chair', 'name': 'ottoman'}, {'frequency': 'f', 'synset': 'oven.n.01', 'synonyms': ['oven'], 'id': 739, 'def': 'kitchen appliance used for baking or roasting', 'name': 'oven'}, {'frequency': 'c', 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'id': 740, 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'synset': 'owl.n.01', 'synonyms': ['owl'], 'id': 741, 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'synset': 'packet.n.03', 'synonyms': ['packet'], 'id': 742, 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'id': 743, 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'synset': 'pad.n.04', 'synonyms': ['pad'], 'id': 744, 'def': 'mostly arm/knee pads labeled', 'name': 'pad'}, {'frequency': 'f', 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'id': 745, 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'id': 746, 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'c', 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'id': 747, 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'synset': 'painting.n.01', 'synonyms': ['painting'], 'id': 748, 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'f', 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'id': 749, 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'id': 750, 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'id': 751, 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'id': 752, 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'id': 753, 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'id': 754, 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'id': 755, 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'f', 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'id': 756, 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'id': 757, 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'id': 758, 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'id': 759, 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'id': 760, 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'c', 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'id': 761, 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'id': 762, 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'c', 'synset': 'parasol.n.01', 'synonyms': ['parasol', 'sunshade'], 'id': 763, 'def': 'a handheld collapsible source of shade', 'name': 'parasol'}, {'frequency': 'r', 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'id': 764, 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'c', 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'id': 765, 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'id': 766, 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'id': 767, 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'id': 768, 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'id': 769, 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'c', 'synset': 'passport.n.02', 'synonyms': ['passport'], 'id': 770, 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'id': 771, 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'id': 772, 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'id': 773, 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'synset': 'peach.n.03', 'synonyms': ['peach'], 'id': 774, 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'id': 775, 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'f', 'synset': 'pear.n.01', 'synonyms': ['pear'], 'id': 776, 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'c', 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'id': 777, 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'synset': 'peg.n.04', 'synonyms': ['wooden_leg', 'pegleg'], 'id': 778, 'def': 'a prosthesis that replaces a missing leg', 'name': 'wooden_leg'}, {'frequency': 'r', 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'id': 779, 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'id': 780, 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'synset': 'pen.n.01', 'synonyms': ['pen'], 'id': 781, 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'f', 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'id': 782, 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'id': 783, 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'id': 784, 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'id': 785, 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'id': 786, 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'id': 787, 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'id': 788, 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'f', 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'id': 789, 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'id': 790, 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'id': 791, 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'id': 792, 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'synset': 'person.n.01', 'synonyms': ['person', 'baby', 'child', 'boy', 'girl', 'man', 'woman', 'human'], 'id': 793, 'def': 'a human being', 'name': 'person'}, {'frequency': 'c', 'synset': 'pet.n.01', 'synonyms': ['pet'], 'id': 794, 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'c', 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'id': 795, 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'id': 796, 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'id': 797, 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'f', 'synset': 'piano.n.01', 'synonyms': ['piano'], 'id': 798, 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'id': 799, 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'id': 800, 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'synset': 'pie.n.01', 'synonyms': ['pie'], 'id': 801, 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'id': 802, 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'id': 803, 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'id': 804, 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'id': 805, 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'id': 806, 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'id': 807, 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'id': 808, 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'id': 809, 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'id': 810, 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'id': 811, 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'id': 812, 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'c', 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'id': 813, 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'id': 814, 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'id': 815, 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'id': 816, 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'id': 817, 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'synset': 'plate.n.04', 'synonyms': ['plate'], 'id': 818, 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'synset': 'platter.n.01', 'synonyms': ['platter'], 'id': 819, 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'id': 820, 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'id': 821, 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'id': 822, 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'synset': 'plume.n.02', 'synonyms': ['plume'], 'id': 823, 'def': 'a feather or cluster of feathers worn as an ornament', 'name': 'plume'}, {'frequency': 'r', 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'id': 824, 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'id': 825, 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'id': 826, 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'id': 827, 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'f', 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'id': 828, 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'id': 829, 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'synset': 'pony.n.05', 'synonyms': ['pony'], 'id': 830, 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'id': 831, 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'id': 832, 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'c', 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'id': 833, 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'id': 834, 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'id': 835, 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'synset': 'pot.n.01', 'synonyms': ['pot'], 'id': 836, 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'id': 837, 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'synset': 'potato.n.01', 'synonyms': ['potato'], 'id': 838, 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'id': 839, 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'id': 840, 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'id': 841, 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'c', 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'id': 842, 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'id': 843, 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'c', 'synset': 'pretzel.n.01', 'synonyms': ['pretzel'], 'id': 844, 'def': 'glazed and salted cracker typically in the shape of a loose knot', 'name': 'pretzel'}, {'frequency': 'f', 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'id': 845, 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'id': 846, 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'synset': 'projector.n.02', 'synonyms': ['projector'], 'id': 847, 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'id': 848, 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'synset': 'prune.n.01', 'synonyms': ['prune'], 'id': 849, 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'id': 850, 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'id': 851, 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'id': 852, 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'id': 853, 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'id': 854, 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'id': 855, 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'id': 856, 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'c', 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'id': 857, 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'id': 858, 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'id': 859, 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'id': 860, 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'id': 861, 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'id': 862, 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'id': 863, 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'synset': 'radar.n.01', 'synonyms': ['radar'], 'id': 864, 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'f', 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'id': 865, 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'id': 866, 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'id': 867, 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'synset': 'raft.n.01', 'synonyms': ['raft'], 'id': 868, 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'id': 869, 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'id': 870, 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'id': 871, 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'id': 872, 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'synset': 'rat.n.01', 'synonyms': ['rat'], 'id': 873, 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'id': 874, 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'id': 875, 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'id': 876, 'def': 'vehicle mirror (side or rearview)', 'name': 'rearview_mirror'}, {'frequency': 'c', 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'id': 877, 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'id': 878, 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'c', 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'id': 879, 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'f', 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'id': 880, 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'id': 881, 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'id': 882, 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'id': 883, 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'c', 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'id': 884, 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'synset': 'ring.n.08', 'synonyms': ['ring'], 'id': 885, 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'id': 886, 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'id': 887, 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'synset': 'robe.n.01', 'synonyms': ['robe'], 'id': 888, 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'id': 889, 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'synset': 'rodent.n.01', 'synonyms': ['rodent'], 'id': 890, 'def': 'relatively small placental mammals having a single pair of constantly growing incisor teeth specialized for gnawing', 'name': 'rodent'}, {'frequency': 'r', 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'id': 891, 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'id': 892, 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'id': 893, 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'id': 894, 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'id': 895, 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'id': 896, 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'id': 897, 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'id': 898, 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'id': 899, 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'id': 900, 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'id': 901, 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'id': 902, 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'f', 'synset': 'sail.n.01', 'synonyms': ['sail'], 'id': 903, 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'f', 'synset': 'salad.n.01', 'synonyms': ['salad'], 'id': 904, 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'id': 905, 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'c', 'synset': 'salami.n.01', 'synonyms': ['salami'], 'id': 906, 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'c', 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'id': 907, 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'id': 908, 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'c', 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'id': 909, 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'id': 910, 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'id': 911, 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'id': 912, 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'id': 913, 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'id': 914, 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'id': 915, 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'id': 916, 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'id': 917, 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'id': 918, 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'id': 919, 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'id': 920, 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'id': 921, 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'id': 922, 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'id': 923, 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'f', 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'id': 924, 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'r', 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'id': 925, 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'c', 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'id': 926, 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'f', 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'id': 927, 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'id': 928, 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'c', 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'id': 929, 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'c', 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'id': 930, 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'id': 931, 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'id': 932, 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'c', 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'id': 933, 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'c', 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'id': 934, 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'id': 935, 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'c', 'synset': 'shark.n.01', 'synonyms': ['shark'], 'id': 936, 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'id': 937, 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'id': 938, 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'id': 939, 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'id': 940, 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'id': 941, 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'synset': 'shears.n.01', 'synonyms': ['shears'], 'id': 942, 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'id': 943, 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'id': 944, 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'id': 945, 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'c', 'synset': 'shield.n.02', 'synonyms': ['shield'], 'id': 946, 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'id': 947, 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'id': 948, 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'f', 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'id': 949, 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'id': 950, 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'id': 951, 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'id': 952, 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'f', 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'id': 953, 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'id': 954, 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'id': 955, 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'r', 'synset': 'shower_cap.n.01', 'synonyms': ['shower_cap'], 'id': 956, 'def': 'a tight cap worn to keep hair dry while showering', 'name': 'shower_cap'}, {'frequency': 'f', 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'id': 957, 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'id': 958, 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'f', 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'id': 959, 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'synset': 'silo.n.01', 'synonyms': ['silo'], 'id': 960, 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'synset': 'sink.n.01', 'synonyms': ['sink'], 'id': 961, 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'id': 962, 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'id': 963, 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'synset': 'ski.n.01', 'synonyms': ['ski'], 'id': 964, 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'id': 965, 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'id': 966, 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'id': 967, 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'id': 968, 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'r', 'synset': 'skullcap.n.01', 'synonyms': ['skullcap'], 'id': 969, 'def': 'rounded brimless cap fitting the crown of the head', 'name': 'skullcap'}, {'frequency': 'c', 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'id': 970, 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'id': 971, 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'id': 972, 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'id': 973, 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'id': 974, 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'id': 975, 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'id': 976, 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'id': 977, 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'id': 978, 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'synset': 'soap.n.01', 'synonyms': ['soap'], 'id': 979, 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'id': 980, 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'synset': 'sock.n.01', 'synonyms': ['sock'], 'id': 981, 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'f', 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'id': 982, 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'synset': 'softball.n.01', 'synonyms': ['softball'], 'id': 983, 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'id': 984, 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'id': 985, 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'f', 'synset': 'soup.n.01', 'synonyms': ['soup'], 'id': 986, 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'id': 987, 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'id': 988, 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'id': 989, 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'id': 990, 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'id': 991, 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'id': 992, 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'id': 993, 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'id': 994, 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'id': 995, 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'id': 996, 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'c', 'synset': 'spider.n.01', 'synonyms': ['spider'], 'id': 997, 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'r', 'synset': 'spiny_lobster.n.02', 'synonyms': ['crawfish', 'crayfish'], 'id': 998, 'def': 'large edible marine crustacean having a spiny carapace but lacking the large pincers of true lobsters', 'name': 'crawfish'}, {'frequency': 'c', 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'id': 999, 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'id': 1000, 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'id': 1001, 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'id': 1002, 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'synset': 'squid.n.01', 'synonyms': ['squid_(food)', 'calamari', 'calamary'], 'id': 1003, 'def': '(Italian cuisine) squid prepared as food', 'name': 'squid_(food)'}, {'frequency': 'c', 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'id': 1004, 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'r', 'synset': 'stagecoach.n.01', 'synonyms': ['stagecoach'], 'id': 1005, 'def': 'a large coach-and-four formerly used to carry passengers and mail on regular routes between towns', 'name': 'stagecoach'}, {'frequency': 'c', 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'id': 1006, 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'c', 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'id': 1007, 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'id': 1008, 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'id': 1009, 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'id': 1010, 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'f', 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'id': 1011, 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'id': 1012, 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'id': 1013, 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'id': 1014, 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'synset': 'stew.n.02', 'synonyms': ['stew'], 'id': 1015, 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'id': 1016, 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'id': 1017, 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'f', 'synset': 'stool.n.01', 'synonyms': ['stool'], 'id': 1018, 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'id': 1019, 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'id': 1020, 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'id': 1021, 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'id': 1022, 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'synset': 'strap.n.01', 'synonyms': ['strap'], 'id': 1023, 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'id': 1024, 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'id': 1025, 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'id': 1026, 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'id': 1027, 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'id': 1028, 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'id': 1029, 'def': 'a pointed tool for writing or drawing or engraving, including pens', 'name': 'stylus'}, {'frequency': 'r', 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'id': 1030, 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'id': 1031, 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'id': 1032, 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'f', 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'id': 1033, 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'id': 1034, 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'id': 1035, 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'id': 1036, 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'f', 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'id': 1037, 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'id': 1038, 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'synset': 'swab.n.02', 'synonyms': ['mop'], 'id': 1039, 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'id': 1040, 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'id': 1041, 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'id': 1042, 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'id': 1043, 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'id': 1044, 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'id': 1045, 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'synset': 'sword.n.01', 'synonyms': ['sword'], 'id': 1046, 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'id': 1047, 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'id': 1048, 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'id': 1049, 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'synset': 'table.n.02', 'synonyms': ['table'], 'id': 1050, 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'id': 1051, 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'id': 1052, 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'id': 1053, 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'synset': 'taco.n.02', 'synonyms': ['taco'], 'id': 1054, 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'synset': 'tag.n.02', 'synonyms': ['tag'], 'id': 1055, 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'id': 1056, 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'id': 1057, 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'id': 1058, 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'f', 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'id': 1059, 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'id': 1060, 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'f', 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'id': 1061, 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'id': 1062, 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'id': 1063, 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'id': 1064, 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'id': 1065, 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'id': 1066, 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'c', 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'id': 1067, 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'id': 1068, 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'id': 1069, 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'f', 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'id': 1070, 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'id': 1071, 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'id': 1072, 'def': 'electronic device for communicating by voice over long distances (includes wired and wireless/cell phones)', 'name': 'telephone'}, {'frequency': 'c', 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'id': 1073, 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'id': 1074, 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'id': 1075, 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'id': 1076, 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'id': 1077, 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'id': 1078, 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'id': 1079, 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'id': 1080, 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'id': 1081, 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'id': 1082, 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'f', 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'id': 1083, 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'id': 1084, 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'id': 1085, 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'id': 1086, 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'id': 1087, 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'id': 1088, 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'id': 1089, 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'id': 1090, 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'id': 1091, 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'c', 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'id': 1092, 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'id': 1093, 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'id': 1094, 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'id': 1095, 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'f', 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'id': 1096, 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'id': 1097, 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'id': 1098, 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'id': 1099, 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'f', 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'id': 1100, 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'id': 1101, 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'id': 1102, 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'id': 1103, 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'f', 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'id': 1104, 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'f', 'synset': 'top.n.09', 'synonyms': ['cover'], 'id': 1105, 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'id': 1106, 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'id': 1107, 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'synset': 'towel.n.01', 'synonyms': ['towel'], 'id': 1108, 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'id': 1109, 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'synset': 'toy.n.03', 'synonyms': ['toy'], 'id': 1110, 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'id': 1111, 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'id': 1112, 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'c', 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'id': 1113, 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'f', 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'id': 1114, 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'id': 1115, 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'id': 1116, 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'synset': 'tray.n.01', 'synonyms': ['tray'], 'id': 1117, 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'id': 1118, 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'id': 1119, 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'c', 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'id': 1120, 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'f', 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'id': 1121, 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'id': 1122, 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'synset': 'truck.n.01', 'synonyms': ['truck'], 'id': 1123, 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'id': 1124, 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'id': 1125, 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'synset': 'tub.n.02', 'synonyms': ['vat'], 'id': 1126, 'def': 'a large vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'synset': 'turban.n.01', 'synonyms': ['turban'], 'id': 1127, 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'c', 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'id': 1128, 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'id': 1129, 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'id': 1130, 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'c', 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'id': 1131, 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'c', 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'id': 1132, 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'id': 1133, 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'f', 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'id': 1134, 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'id': 1135, 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'f', 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'id': 1136, 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'c', 'synset': 'urn.n.01', 'synonyms': ['urn'], 'id': 1137, 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'id': 1138, 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'f', 'synset': 'vase.n.01', 'synonyms': ['vase'], 'id': 1139, 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'id': 1140, 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'id': 1141, 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'f', 'synset': 'vest.n.01', 'synonyms': ['vest', 'waistcoat'], 'id': 1142, 'def': "a man's sleeveless garment worn underneath a coat", 'name': 'vest'}, {'frequency': 'c', 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'id': 1143, 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'id': 1144, 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'id': 1145, 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'id': 1146, 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'c', 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'id': 1147, 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'id': 1148, 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'id': 1149, 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'id': 1150, 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'id': 1151, 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'id': 1152, 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'id': 1153, 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'id': 1154, 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'id': 1155, 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'f', 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'id': 1156, 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'id': 1157, 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'id': 1158, 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'synset': 'washbasin.n.01', 'synonyms': ['washbasin', 'basin_(for_washing)', 'washbowl', 'washstand', 'handbasin'], 'id': 1159, 'def': 'a bathroom sink that is permanently installed and connected to a water supply and drainpipe; where you can wash your hands and face', 'name': 'washbasin'}, {'frequency': 'c', 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'id': 1160, 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'id': 1161, 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'id': 1162, 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'id': 1163, 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'id': 1164, 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'id': 1165, 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'c', 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'id': 1166, 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'id': 1167, 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'id': 1168, 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'id': 1169, 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'id': 1170, 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'id': 1171, 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'f', 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'id': 1172, 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'id': 1173, 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'id': 1174, 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'id': 1175, 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'id': 1176, 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'id': 1177, 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'id': 1178, 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'id': 1179, 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'id': 1180, 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'c', 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'id': 1181, 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'c', 'synset': 'wig.n.01', 'synonyms': ['wig'], 'id': 1182, 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'id': 1183, 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'id': 1184, 'def': 'A mill or turbine that is powered by wind', 'name': 'windmill'}, {'frequency': 'c', 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'id': 1185, 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'id': 1186, 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'id': 1187, 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'id': 1188, 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'c', 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'id': 1189, 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'id': 1190, 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'f', 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'id': 1191, 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'synset': 'wok.n.01', 'synonyms': ['wok'], 'id': 1192, 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'id': 1193, 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'id': 1194, 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'id': 1195, 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'id': 1196, 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'f', 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'id': 1197, 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'id': 1198, 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'c', 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'id': 1199, 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'c', 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'id': 1200, 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'c', 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'id': 1201, 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'id': 1202, 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'id': 1203, 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}] # noqa -# fmt: on diff --git a/detectron2/detectron2/data/datasets/lvis_v1_category_image_count.py b/detectron2/detectron2/data/datasets/lvis_v1_category_image_count.py deleted file mode 100644 index 31bf0cfcd5096ab87835db86a28671d474514c40..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/lvis_v1_category_image_count.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# Autogen with -# with open("lvis_v1_train.json", "r") as f: -# a = json.load(f) -# c = a["categories"] -# for x in c: -# del x["name"] -# del x["instance_count"] -# del x["def"] -# del x["synonyms"] -# del x["frequency"] -# del x["synset"] -# LVIS_CATEGORY_IMAGE_COUNT = repr(c) + " # noqa" -# with open("/tmp/lvis_category_image_count.py", "wt") as f: -# f.write(f"LVIS_CATEGORY_IMAGE_COUNT = {LVIS_CATEGORY_IMAGE_COUNT}") -# Then paste the contents of that file below - -# fmt: off -LVIS_CATEGORY_IMAGE_COUNT = [{'id': 1, 'image_count': 64}, {'id': 2, 'image_count': 364}, {'id': 3, 'image_count': 1911}, {'id': 4, 'image_count': 149}, {'id': 5, 'image_count': 29}, {'id': 6, 'image_count': 26}, {'id': 7, 'image_count': 59}, {'id': 8, 'image_count': 22}, {'id': 9, 'image_count': 12}, {'id': 10, 'image_count': 28}, {'id': 11, 'image_count': 505}, {'id': 12, 'image_count': 1207}, {'id': 13, 'image_count': 4}, {'id': 14, 'image_count': 10}, {'id': 15, 'image_count': 500}, {'id': 16, 'image_count': 33}, {'id': 17, 'image_count': 3}, {'id': 18, 'image_count': 44}, {'id': 19, 'image_count': 561}, {'id': 20, 'image_count': 8}, {'id': 21, 'image_count': 9}, {'id': 22, 'image_count': 33}, {'id': 23, 'image_count': 1883}, {'id': 24, 'image_count': 98}, {'id': 25, 'image_count': 70}, {'id': 26, 'image_count': 46}, {'id': 27, 'image_count': 117}, {'id': 28, 'image_count': 41}, {'id': 29, 'image_count': 1395}, {'id': 30, 'image_count': 7}, {'id': 31, 'image_count': 1}, {'id': 32, 'image_count': 314}, {'id': 33, 'image_count': 31}, {'id': 34, 'image_count': 1905}, {'id': 35, 'image_count': 1859}, {'id': 36, 'image_count': 1623}, {'id': 37, 'image_count': 47}, {'id': 38, 'image_count': 3}, {'id': 39, 'image_count': 3}, {'id': 40, 'image_count': 1}, {'id': 41, 'image_count': 305}, {'id': 42, 'image_count': 6}, {'id': 43, 'image_count': 210}, {'id': 44, 'image_count': 36}, {'id': 45, 'image_count': 1787}, {'id': 46, 'image_count': 17}, {'id': 47, 'image_count': 51}, {'id': 48, 'image_count': 138}, {'id': 49, 'image_count': 3}, {'id': 50, 'image_count': 1470}, {'id': 51, 'image_count': 3}, {'id': 52, 'image_count': 2}, {'id': 53, 'image_count': 186}, {'id': 54, 'image_count': 76}, {'id': 55, 'image_count': 26}, {'id': 56, 'image_count': 303}, {'id': 57, 'image_count': 738}, {'id': 58, 'image_count': 1799}, {'id': 59, 'image_count': 1934}, {'id': 60, 'image_count': 1609}, {'id': 61, 'image_count': 1622}, {'id': 62, 'image_count': 41}, {'id': 63, 'image_count': 4}, {'id': 64, 'image_count': 11}, {'id': 65, 'image_count': 270}, {'id': 66, 'image_count': 349}, {'id': 67, 'image_count': 42}, {'id': 68, 'image_count': 823}, {'id': 69, 'image_count': 6}, {'id': 70, 'image_count': 48}, {'id': 71, 'image_count': 3}, {'id': 72, 'image_count': 42}, {'id': 73, 'image_count': 24}, {'id': 74, 'image_count': 16}, {'id': 75, 'image_count': 605}, {'id': 76, 'image_count': 646}, {'id': 77, 'image_count': 1765}, {'id': 78, 'image_count': 2}, {'id': 79, 'image_count': 125}, {'id': 80, 'image_count': 1420}, {'id': 81, 'image_count': 140}, {'id': 82, 'image_count': 4}, {'id': 83, 'image_count': 322}, {'id': 84, 'image_count': 60}, {'id': 85, 'image_count': 2}, {'id': 86, 'image_count': 231}, {'id': 87, 'image_count': 333}, {'id': 88, 'image_count': 1941}, {'id': 89, 'image_count': 367}, {'id': 90, 'image_count': 1922}, {'id': 91, 'image_count': 18}, {'id': 92, 'image_count': 81}, {'id': 93, 'image_count': 1}, {'id': 94, 'image_count': 1852}, {'id': 95, 'image_count': 430}, {'id': 96, 'image_count': 247}, {'id': 97, 'image_count': 94}, {'id': 98, 'image_count': 21}, {'id': 99, 'image_count': 1821}, {'id': 100, 'image_count': 16}, {'id': 101, 'image_count': 12}, {'id': 102, 'image_count': 25}, {'id': 103, 'image_count': 41}, {'id': 104, 'image_count': 244}, {'id': 105, 'image_count': 7}, {'id': 106, 'image_count': 1}, {'id': 107, 'image_count': 40}, {'id': 108, 'image_count': 40}, {'id': 109, 'image_count': 104}, {'id': 110, 'image_count': 1671}, {'id': 111, 'image_count': 49}, {'id': 112, 'image_count': 243}, {'id': 113, 'image_count': 2}, {'id': 114, 'image_count': 242}, {'id': 115, 'image_count': 271}, {'id': 116, 'image_count': 104}, {'id': 117, 'image_count': 8}, {'id': 118, 'image_count': 1758}, {'id': 119, 'image_count': 1}, {'id': 120, 'image_count': 48}, {'id': 121, 'image_count': 14}, {'id': 122, 'image_count': 40}, {'id': 123, 'image_count': 1}, {'id': 124, 'image_count': 37}, {'id': 125, 'image_count': 1510}, {'id': 126, 'image_count': 6}, {'id': 127, 'image_count': 1903}, {'id': 128, 'image_count': 70}, {'id': 129, 'image_count': 86}, {'id': 130, 'image_count': 7}, {'id': 131, 'image_count': 5}, {'id': 132, 'image_count': 1406}, {'id': 133, 'image_count': 1901}, {'id': 134, 'image_count': 15}, {'id': 135, 'image_count': 28}, {'id': 136, 'image_count': 6}, {'id': 137, 'image_count': 494}, {'id': 138, 'image_count': 234}, {'id': 139, 'image_count': 1922}, {'id': 140, 'image_count': 1}, {'id': 141, 'image_count': 35}, {'id': 142, 'image_count': 5}, {'id': 143, 'image_count': 1828}, {'id': 144, 'image_count': 8}, {'id': 145, 'image_count': 63}, {'id': 146, 'image_count': 1668}, {'id': 147, 'image_count': 4}, {'id': 148, 'image_count': 95}, {'id': 149, 'image_count': 17}, {'id': 150, 'image_count': 1567}, {'id': 151, 'image_count': 2}, {'id': 152, 'image_count': 103}, {'id': 153, 'image_count': 50}, {'id': 154, 'image_count': 1309}, {'id': 155, 'image_count': 6}, {'id': 156, 'image_count': 92}, {'id': 157, 'image_count': 19}, {'id': 158, 'image_count': 37}, {'id': 159, 'image_count': 4}, {'id': 160, 'image_count': 709}, {'id': 161, 'image_count': 9}, {'id': 162, 'image_count': 82}, {'id': 163, 'image_count': 15}, {'id': 164, 'image_count': 3}, {'id': 165, 'image_count': 61}, {'id': 166, 'image_count': 51}, {'id': 167, 'image_count': 5}, {'id': 168, 'image_count': 13}, {'id': 169, 'image_count': 642}, {'id': 170, 'image_count': 24}, {'id': 171, 'image_count': 255}, {'id': 172, 'image_count': 9}, {'id': 173, 'image_count': 1808}, {'id': 174, 'image_count': 31}, {'id': 175, 'image_count': 158}, {'id': 176, 'image_count': 80}, {'id': 177, 'image_count': 1884}, {'id': 178, 'image_count': 158}, {'id': 179, 'image_count': 2}, {'id': 180, 'image_count': 12}, {'id': 181, 'image_count': 1659}, {'id': 182, 'image_count': 7}, {'id': 183, 'image_count': 834}, {'id': 184, 'image_count': 57}, {'id': 185, 'image_count': 174}, {'id': 186, 'image_count': 95}, {'id': 187, 'image_count': 27}, {'id': 188, 'image_count': 22}, {'id': 189, 'image_count': 1391}, {'id': 190, 'image_count': 90}, {'id': 191, 'image_count': 40}, {'id': 192, 'image_count': 445}, {'id': 193, 'image_count': 21}, {'id': 194, 'image_count': 1132}, {'id': 195, 'image_count': 177}, {'id': 196, 'image_count': 4}, {'id': 197, 'image_count': 17}, {'id': 198, 'image_count': 84}, {'id': 199, 'image_count': 55}, {'id': 200, 'image_count': 30}, {'id': 201, 'image_count': 25}, {'id': 202, 'image_count': 2}, {'id': 203, 'image_count': 125}, {'id': 204, 'image_count': 1135}, {'id': 205, 'image_count': 19}, {'id': 206, 'image_count': 72}, {'id': 207, 'image_count': 1926}, {'id': 208, 'image_count': 159}, {'id': 209, 'image_count': 7}, {'id': 210, 'image_count': 1}, {'id': 211, 'image_count': 13}, {'id': 212, 'image_count': 35}, {'id': 213, 'image_count': 18}, {'id': 214, 'image_count': 8}, {'id': 215, 'image_count': 6}, {'id': 216, 'image_count': 35}, {'id': 217, 'image_count': 1222}, {'id': 218, 'image_count': 103}, {'id': 219, 'image_count': 28}, {'id': 220, 'image_count': 63}, {'id': 221, 'image_count': 28}, {'id': 222, 'image_count': 5}, {'id': 223, 'image_count': 7}, {'id': 224, 'image_count': 14}, {'id': 225, 'image_count': 1918}, {'id': 226, 'image_count': 133}, {'id': 227, 'image_count': 16}, {'id': 228, 'image_count': 27}, {'id': 229, 'image_count': 110}, {'id': 230, 'image_count': 1895}, {'id': 231, 'image_count': 4}, {'id': 232, 'image_count': 1927}, {'id': 233, 'image_count': 8}, {'id': 234, 'image_count': 1}, {'id': 235, 'image_count': 263}, {'id': 236, 'image_count': 10}, {'id': 237, 'image_count': 2}, {'id': 238, 'image_count': 3}, {'id': 239, 'image_count': 87}, {'id': 240, 'image_count': 9}, {'id': 241, 'image_count': 71}, {'id': 242, 'image_count': 13}, {'id': 243, 'image_count': 18}, {'id': 244, 'image_count': 2}, {'id': 245, 'image_count': 5}, {'id': 246, 'image_count': 45}, {'id': 247, 'image_count': 1}, {'id': 248, 'image_count': 23}, {'id': 249, 'image_count': 32}, {'id': 250, 'image_count': 4}, {'id': 251, 'image_count': 1}, {'id': 252, 'image_count': 858}, {'id': 253, 'image_count': 661}, {'id': 254, 'image_count': 168}, {'id': 255, 'image_count': 210}, {'id': 256, 'image_count': 65}, {'id': 257, 'image_count': 4}, {'id': 258, 'image_count': 2}, {'id': 259, 'image_count': 159}, {'id': 260, 'image_count': 31}, {'id': 261, 'image_count': 811}, {'id': 262, 'image_count': 1}, {'id': 263, 'image_count': 42}, {'id': 264, 'image_count': 27}, {'id': 265, 'image_count': 2}, {'id': 266, 'image_count': 5}, {'id': 267, 'image_count': 95}, {'id': 268, 'image_count': 32}, {'id': 269, 'image_count': 1}, {'id': 270, 'image_count': 1}, {'id': 271, 'image_count': 1844}, {'id': 272, 'image_count': 897}, {'id': 273, 'image_count': 31}, {'id': 274, 'image_count': 23}, {'id': 275, 'image_count': 1}, {'id': 276, 'image_count': 202}, {'id': 277, 'image_count': 746}, {'id': 278, 'image_count': 44}, {'id': 279, 'image_count': 14}, {'id': 280, 'image_count': 26}, {'id': 281, 'image_count': 1}, {'id': 282, 'image_count': 2}, {'id': 283, 'image_count': 25}, {'id': 284, 'image_count': 238}, {'id': 285, 'image_count': 592}, {'id': 286, 'image_count': 26}, {'id': 287, 'image_count': 5}, {'id': 288, 'image_count': 42}, {'id': 289, 'image_count': 13}, {'id': 290, 'image_count': 46}, {'id': 291, 'image_count': 1}, {'id': 292, 'image_count': 8}, {'id': 293, 'image_count': 34}, {'id': 294, 'image_count': 5}, {'id': 295, 'image_count': 1}, {'id': 296, 'image_count': 1871}, {'id': 297, 'image_count': 717}, {'id': 298, 'image_count': 1010}, {'id': 299, 'image_count': 679}, {'id': 300, 'image_count': 3}, {'id': 301, 'image_count': 4}, {'id': 302, 'image_count': 1}, {'id': 303, 'image_count': 166}, {'id': 304, 'image_count': 2}, {'id': 305, 'image_count': 266}, {'id': 306, 'image_count': 101}, {'id': 307, 'image_count': 6}, {'id': 308, 'image_count': 14}, {'id': 309, 'image_count': 133}, {'id': 310, 'image_count': 2}, {'id': 311, 'image_count': 38}, {'id': 312, 'image_count': 95}, {'id': 313, 'image_count': 1}, {'id': 314, 'image_count': 12}, {'id': 315, 'image_count': 49}, {'id': 316, 'image_count': 5}, {'id': 317, 'image_count': 5}, {'id': 318, 'image_count': 16}, {'id': 319, 'image_count': 216}, {'id': 320, 'image_count': 12}, {'id': 321, 'image_count': 1}, {'id': 322, 'image_count': 54}, {'id': 323, 'image_count': 5}, {'id': 324, 'image_count': 245}, {'id': 325, 'image_count': 12}, {'id': 326, 'image_count': 7}, {'id': 327, 'image_count': 35}, {'id': 328, 'image_count': 36}, {'id': 329, 'image_count': 32}, {'id': 330, 'image_count': 1027}, {'id': 331, 'image_count': 10}, {'id': 332, 'image_count': 12}, {'id': 333, 'image_count': 1}, {'id': 334, 'image_count': 67}, {'id': 335, 'image_count': 71}, {'id': 336, 'image_count': 30}, {'id': 337, 'image_count': 48}, {'id': 338, 'image_count': 249}, {'id': 339, 'image_count': 13}, {'id': 340, 'image_count': 29}, {'id': 341, 'image_count': 14}, {'id': 342, 'image_count': 236}, {'id': 343, 'image_count': 15}, {'id': 344, 'image_count': 1521}, {'id': 345, 'image_count': 25}, {'id': 346, 'image_count': 249}, {'id': 347, 'image_count': 139}, {'id': 348, 'image_count': 2}, {'id': 349, 'image_count': 2}, {'id': 350, 'image_count': 1890}, {'id': 351, 'image_count': 1240}, {'id': 352, 'image_count': 1}, {'id': 353, 'image_count': 9}, {'id': 354, 'image_count': 1}, {'id': 355, 'image_count': 3}, {'id': 356, 'image_count': 11}, {'id': 357, 'image_count': 4}, {'id': 358, 'image_count': 236}, {'id': 359, 'image_count': 44}, {'id': 360, 'image_count': 19}, {'id': 361, 'image_count': 1100}, {'id': 362, 'image_count': 7}, {'id': 363, 'image_count': 69}, {'id': 364, 'image_count': 2}, {'id': 365, 'image_count': 8}, {'id': 366, 'image_count': 5}, {'id': 367, 'image_count': 227}, {'id': 368, 'image_count': 6}, {'id': 369, 'image_count': 106}, {'id': 370, 'image_count': 81}, {'id': 371, 'image_count': 17}, {'id': 372, 'image_count': 134}, {'id': 373, 'image_count': 312}, {'id': 374, 'image_count': 8}, {'id': 375, 'image_count': 271}, {'id': 376, 'image_count': 2}, {'id': 377, 'image_count': 103}, {'id': 378, 'image_count': 1938}, {'id': 379, 'image_count': 574}, {'id': 380, 'image_count': 120}, {'id': 381, 'image_count': 2}, {'id': 382, 'image_count': 2}, {'id': 383, 'image_count': 13}, {'id': 384, 'image_count': 29}, {'id': 385, 'image_count': 1710}, {'id': 386, 'image_count': 66}, {'id': 387, 'image_count': 1008}, {'id': 388, 'image_count': 1}, {'id': 389, 'image_count': 3}, {'id': 390, 'image_count': 1942}, {'id': 391, 'image_count': 19}, {'id': 392, 'image_count': 1488}, {'id': 393, 'image_count': 46}, {'id': 394, 'image_count': 106}, {'id': 395, 'image_count': 115}, {'id': 396, 'image_count': 19}, {'id': 397, 'image_count': 2}, {'id': 398, 'image_count': 1}, {'id': 399, 'image_count': 28}, {'id': 400, 'image_count': 9}, {'id': 401, 'image_count': 192}, {'id': 402, 'image_count': 12}, {'id': 403, 'image_count': 21}, {'id': 404, 'image_count': 247}, {'id': 405, 'image_count': 6}, {'id': 406, 'image_count': 64}, {'id': 407, 'image_count': 7}, {'id': 408, 'image_count': 40}, {'id': 409, 'image_count': 542}, {'id': 410, 'image_count': 2}, {'id': 411, 'image_count': 1898}, {'id': 412, 'image_count': 36}, {'id': 413, 'image_count': 4}, {'id': 414, 'image_count': 1}, {'id': 415, 'image_count': 191}, {'id': 416, 'image_count': 6}, {'id': 417, 'image_count': 41}, {'id': 418, 'image_count': 39}, {'id': 419, 'image_count': 46}, {'id': 420, 'image_count': 1}, {'id': 421, 'image_count': 1451}, {'id': 422, 'image_count': 1878}, {'id': 423, 'image_count': 11}, {'id': 424, 'image_count': 82}, {'id': 425, 'image_count': 18}, {'id': 426, 'image_count': 1}, {'id': 427, 'image_count': 7}, {'id': 428, 'image_count': 3}, {'id': 429, 'image_count': 575}, {'id': 430, 'image_count': 1907}, {'id': 431, 'image_count': 8}, {'id': 432, 'image_count': 4}, {'id': 433, 'image_count': 32}, {'id': 434, 'image_count': 11}, {'id': 435, 'image_count': 4}, {'id': 436, 'image_count': 54}, {'id': 437, 'image_count': 202}, {'id': 438, 'image_count': 32}, {'id': 439, 'image_count': 3}, {'id': 440, 'image_count': 130}, {'id': 441, 'image_count': 119}, {'id': 442, 'image_count': 141}, {'id': 443, 'image_count': 29}, {'id': 444, 'image_count': 525}, {'id': 445, 'image_count': 1323}, {'id': 446, 'image_count': 2}, {'id': 447, 'image_count': 113}, {'id': 448, 'image_count': 16}, {'id': 449, 'image_count': 7}, {'id': 450, 'image_count': 35}, {'id': 451, 'image_count': 1908}, {'id': 452, 'image_count': 353}, {'id': 453, 'image_count': 18}, {'id': 454, 'image_count': 14}, {'id': 455, 'image_count': 77}, {'id': 456, 'image_count': 8}, {'id': 457, 'image_count': 37}, {'id': 458, 'image_count': 1}, {'id': 459, 'image_count': 346}, {'id': 460, 'image_count': 19}, {'id': 461, 'image_count': 1779}, {'id': 462, 'image_count': 23}, {'id': 463, 'image_count': 25}, {'id': 464, 'image_count': 67}, {'id': 465, 'image_count': 19}, {'id': 466, 'image_count': 28}, {'id': 467, 'image_count': 4}, {'id': 468, 'image_count': 27}, {'id': 469, 'image_count': 1861}, {'id': 470, 'image_count': 11}, {'id': 471, 'image_count': 13}, {'id': 472, 'image_count': 13}, {'id': 473, 'image_count': 32}, {'id': 474, 'image_count': 1767}, {'id': 475, 'image_count': 42}, {'id': 476, 'image_count': 17}, {'id': 477, 'image_count': 128}, {'id': 478, 'image_count': 1}, {'id': 479, 'image_count': 9}, {'id': 480, 'image_count': 10}, {'id': 481, 'image_count': 4}, {'id': 482, 'image_count': 9}, {'id': 483, 'image_count': 18}, {'id': 484, 'image_count': 41}, {'id': 485, 'image_count': 28}, {'id': 486, 'image_count': 3}, {'id': 487, 'image_count': 65}, {'id': 488, 'image_count': 9}, {'id': 489, 'image_count': 23}, {'id': 490, 'image_count': 24}, {'id': 491, 'image_count': 1}, {'id': 492, 'image_count': 2}, {'id': 493, 'image_count': 59}, {'id': 494, 'image_count': 48}, {'id': 495, 'image_count': 17}, {'id': 496, 'image_count': 1877}, {'id': 497, 'image_count': 18}, {'id': 498, 'image_count': 1920}, {'id': 499, 'image_count': 50}, {'id': 500, 'image_count': 1890}, {'id': 501, 'image_count': 99}, {'id': 502, 'image_count': 1530}, {'id': 503, 'image_count': 3}, {'id': 504, 'image_count': 11}, {'id': 505, 'image_count': 19}, {'id': 506, 'image_count': 3}, {'id': 507, 'image_count': 63}, {'id': 508, 'image_count': 5}, {'id': 509, 'image_count': 6}, {'id': 510, 'image_count': 233}, {'id': 511, 'image_count': 54}, {'id': 512, 'image_count': 36}, {'id': 513, 'image_count': 10}, {'id': 514, 'image_count': 124}, {'id': 515, 'image_count': 101}, {'id': 516, 'image_count': 3}, {'id': 517, 'image_count': 363}, {'id': 518, 'image_count': 3}, {'id': 519, 'image_count': 30}, {'id': 520, 'image_count': 18}, {'id': 521, 'image_count': 199}, {'id': 522, 'image_count': 97}, {'id': 523, 'image_count': 32}, {'id': 524, 'image_count': 121}, {'id': 525, 'image_count': 16}, {'id': 526, 'image_count': 12}, {'id': 527, 'image_count': 2}, {'id': 528, 'image_count': 214}, {'id': 529, 'image_count': 48}, {'id': 530, 'image_count': 26}, {'id': 531, 'image_count': 13}, {'id': 532, 'image_count': 4}, {'id': 533, 'image_count': 11}, {'id': 534, 'image_count': 123}, {'id': 535, 'image_count': 7}, {'id': 536, 'image_count': 200}, {'id': 537, 'image_count': 91}, {'id': 538, 'image_count': 9}, {'id': 539, 'image_count': 72}, {'id': 540, 'image_count': 1886}, {'id': 541, 'image_count': 4}, {'id': 542, 'image_count': 1}, {'id': 543, 'image_count': 1}, {'id': 544, 'image_count': 1932}, {'id': 545, 'image_count': 4}, {'id': 546, 'image_count': 56}, {'id': 547, 'image_count': 854}, {'id': 548, 'image_count': 755}, {'id': 549, 'image_count': 1843}, {'id': 550, 'image_count': 96}, {'id': 551, 'image_count': 7}, {'id': 552, 'image_count': 74}, {'id': 553, 'image_count': 66}, {'id': 554, 'image_count': 57}, {'id': 555, 'image_count': 44}, {'id': 556, 'image_count': 1905}, {'id': 557, 'image_count': 4}, {'id': 558, 'image_count': 90}, {'id': 559, 'image_count': 1635}, {'id': 560, 'image_count': 8}, {'id': 561, 'image_count': 5}, {'id': 562, 'image_count': 50}, {'id': 563, 'image_count': 545}, {'id': 564, 'image_count': 20}, {'id': 565, 'image_count': 193}, {'id': 566, 'image_count': 285}, {'id': 567, 'image_count': 3}, {'id': 568, 'image_count': 1}, {'id': 569, 'image_count': 1904}, {'id': 570, 'image_count': 294}, {'id': 571, 'image_count': 3}, {'id': 572, 'image_count': 5}, {'id': 573, 'image_count': 24}, {'id': 574, 'image_count': 2}, {'id': 575, 'image_count': 2}, {'id': 576, 'image_count': 16}, {'id': 577, 'image_count': 8}, {'id': 578, 'image_count': 154}, {'id': 579, 'image_count': 66}, {'id': 580, 'image_count': 1}, {'id': 581, 'image_count': 24}, {'id': 582, 'image_count': 1}, {'id': 583, 'image_count': 4}, {'id': 584, 'image_count': 75}, {'id': 585, 'image_count': 6}, {'id': 586, 'image_count': 126}, {'id': 587, 'image_count': 24}, {'id': 588, 'image_count': 22}, {'id': 589, 'image_count': 1872}, {'id': 590, 'image_count': 16}, {'id': 591, 'image_count': 423}, {'id': 592, 'image_count': 1927}, {'id': 593, 'image_count': 38}, {'id': 594, 'image_count': 3}, {'id': 595, 'image_count': 1945}, {'id': 596, 'image_count': 35}, {'id': 597, 'image_count': 1}, {'id': 598, 'image_count': 13}, {'id': 599, 'image_count': 9}, {'id': 600, 'image_count': 14}, {'id': 601, 'image_count': 37}, {'id': 602, 'image_count': 3}, {'id': 603, 'image_count': 4}, {'id': 604, 'image_count': 100}, {'id': 605, 'image_count': 195}, {'id': 606, 'image_count': 1}, {'id': 607, 'image_count': 12}, {'id': 608, 'image_count': 24}, {'id': 609, 'image_count': 489}, {'id': 610, 'image_count': 10}, {'id': 611, 'image_count': 1689}, {'id': 612, 'image_count': 42}, {'id': 613, 'image_count': 81}, {'id': 614, 'image_count': 894}, {'id': 615, 'image_count': 1868}, {'id': 616, 'image_count': 7}, {'id': 617, 'image_count': 1567}, {'id': 618, 'image_count': 10}, {'id': 619, 'image_count': 8}, {'id': 620, 'image_count': 7}, {'id': 621, 'image_count': 629}, {'id': 622, 'image_count': 89}, {'id': 623, 'image_count': 15}, {'id': 624, 'image_count': 134}, {'id': 625, 'image_count': 4}, {'id': 626, 'image_count': 1802}, {'id': 627, 'image_count': 595}, {'id': 628, 'image_count': 1210}, {'id': 629, 'image_count': 48}, {'id': 630, 'image_count': 418}, {'id': 631, 'image_count': 1846}, {'id': 632, 'image_count': 5}, {'id': 633, 'image_count': 221}, {'id': 634, 'image_count': 10}, {'id': 635, 'image_count': 7}, {'id': 636, 'image_count': 76}, {'id': 637, 'image_count': 22}, {'id': 638, 'image_count': 10}, {'id': 639, 'image_count': 341}, {'id': 640, 'image_count': 1}, {'id': 641, 'image_count': 705}, {'id': 642, 'image_count': 1900}, {'id': 643, 'image_count': 188}, {'id': 644, 'image_count': 227}, {'id': 645, 'image_count': 861}, {'id': 646, 'image_count': 6}, {'id': 647, 'image_count': 115}, {'id': 648, 'image_count': 5}, {'id': 649, 'image_count': 43}, {'id': 650, 'image_count': 14}, {'id': 651, 'image_count': 6}, {'id': 652, 'image_count': 15}, {'id': 653, 'image_count': 1167}, {'id': 654, 'image_count': 15}, {'id': 655, 'image_count': 994}, {'id': 656, 'image_count': 28}, {'id': 657, 'image_count': 2}, {'id': 658, 'image_count': 338}, {'id': 659, 'image_count': 334}, {'id': 660, 'image_count': 15}, {'id': 661, 'image_count': 102}, {'id': 662, 'image_count': 1}, {'id': 663, 'image_count': 8}, {'id': 664, 'image_count': 1}, {'id': 665, 'image_count': 1}, {'id': 666, 'image_count': 28}, {'id': 667, 'image_count': 91}, {'id': 668, 'image_count': 260}, {'id': 669, 'image_count': 131}, {'id': 670, 'image_count': 128}, {'id': 671, 'image_count': 3}, {'id': 672, 'image_count': 10}, {'id': 673, 'image_count': 39}, {'id': 674, 'image_count': 2}, {'id': 675, 'image_count': 925}, {'id': 676, 'image_count': 354}, {'id': 677, 'image_count': 31}, {'id': 678, 'image_count': 10}, {'id': 679, 'image_count': 215}, {'id': 680, 'image_count': 71}, {'id': 681, 'image_count': 43}, {'id': 682, 'image_count': 28}, {'id': 683, 'image_count': 34}, {'id': 684, 'image_count': 16}, {'id': 685, 'image_count': 273}, {'id': 686, 'image_count': 2}, {'id': 687, 'image_count': 999}, {'id': 688, 'image_count': 4}, {'id': 689, 'image_count': 107}, {'id': 690, 'image_count': 2}, {'id': 691, 'image_count': 1}, {'id': 692, 'image_count': 454}, {'id': 693, 'image_count': 9}, {'id': 694, 'image_count': 1901}, {'id': 695, 'image_count': 61}, {'id': 696, 'image_count': 91}, {'id': 697, 'image_count': 46}, {'id': 698, 'image_count': 1402}, {'id': 699, 'image_count': 74}, {'id': 700, 'image_count': 421}, {'id': 701, 'image_count': 226}, {'id': 702, 'image_count': 10}, {'id': 703, 'image_count': 1720}, {'id': 704, 'image_count': 261}, {'id': 705, 'image_count': 1337}, {'id': 706, 'image_count': 293}, {'id': 707, 'image_count': 62}, {'id': 708, 'image_count': 814}, {'id': 709, 'image_count': 407}, {'id': 710, 'image_count': 6}, {'id': 711, 'image_count': 16}, {'id': 712, 'image_count': 7}, {'id': 713, 'image_count': 1791}, {'id': 714, 'image_count': 2}, {'id': 715, 'image_count': 1915}, {'id': 716, 'image_count': 1940}, {'id': 717, 'image_count': 13}, {'id': 718, 'image_count': 16}, {'id': 719, 'image_count': 448}, {'id': 720, 'image_count': 12}, {'id': 721, 'image_count': 18}, {'id': 722, 'image_count': 4}, {'id': 723, 'image_count': 71}, {'id': 724, 'image_count': 189}, {'id': 725, 'image_count': 74}, {'id': 726, 'image_count': 103}, {'id': 727, 'image_count': 3}, {'id': 728, 'image_count': 110}, {'id': 729, 'image_count': 5}, {'id': 730, 'image_count': 9}, {'id': 731, 'image_count': 15}, {'id': 732, 'image_count': 25}, {'id': 733, 'image_count': 7}, {'id': 734, 'image_count': 647}, {'id': 735, 'image_count': 824}, {'id': 736, 'image_count': 100}, {'id': 737, 'image_count': 47}, {'id': 738, 'image_count': 121}, {'id': 739, 'image_count': 731}, {'id': 740, 'image_count': 73}, {'id': 741, 'image_count': 49}, {'id': 742, 'image_count': 23}, {'id': 743, 'image_count': 4}, {'id': 744, 'image_count': 62}, {'id': 745, 'image_count': 118}, {'id': 746, 'image_count': 99}, {'id': 747, 'image_count': 40}, {'id': 748, 'image_count': 1036}, {'id': 749, 'image_count': 105}, {'id': 750, 'image_count': 21}, {'id': 751, 'image_count': 229}, {'id': 752, 'image_count': 7}, {'id': 753, 'image_count': 72}, {'id': 754, 'image_count': 9}, {'id': 755, 'image_count': 10}, {'id': 756, 'image_count': 328}, {'id': 757, 'image_count': 468}, {'id': 758, 'image_count': 1}, {'id': 759, 'image_count': 2}, {'id': 760, 'image_count': 24}, {'id': 761, 'image_count': 11}, {'id': 762, 'image_count': 72}, {'id': 763, 'image_count': 17}, {'id': 764, 'image_count': 10}, {'id': 765, 'image_count': 17}, {'id': 766, 'image_count': 489}, {'id': 767, 'image_count': 47}, {'id': 768, 'image_count': 93}, {'id': 769, 'image_count': 1}, {'id': 770, 'image_count': 12}, {'id': 771, 'image_count': 228}, {'id': 772, 'image_count': 5}, {'id': 773, 'image_count': 76}, {'id': 774, 'image_count': 71}, {'id': 775, 'image_count': 30}, {'id': 776, 'image_count': 109}, {'id': 777, 'image_count': 14}, {'id': 778, 'image_count': 1}, {'id': 779, 'image_count': 8}, {'id': 780, 'image_count': 26}, {'id': 781, 'image_count': 339}, {'id': 782, 'image_count': 153}, {'id': 783, 'image_count': 2}, {'id': 784, 'image_count': 3}, {'id': 785, 'image_count': 8}, {'id': 786, 'image_count': 47}, {'id': 787, 'image_count': 8}, {'id': 788, 'image_count': 6}, {'id': 789, 'image_count': 116}, {'id': 790, 'image_count': 69}, {'id': 791, 'image_count': 13}, {'id': 792, 'image_count': 6}, {'id': 793, 'image_count': 1928}, {'id': 794, 'image_count': 79}, {'id': 795, 'image_count': 14}, {'id': 796, 'image_count': 7}, {'id': 797, 'image_count': 20}, {'id': 798, 'image_count': 114}, {'id': 799, 'image_count': 221}, {'id': 800, 'image_count': 502}, {'id': 801, 'image_count': 62}, {'id': 802, 'image_count': 87}, {'id': 803, 'image_count': 4}, {'id': 804, 'image_count': 1912}, {'id': 805, 'image_count': 7}, {'id': 806, 'image_count': 186}, {'id': 807, 'image_count': 18}, {'id': 808, 'image_count': 4}, {'id': 809, 'image_count': 3}, {'id': 810, 'image_count': 7}, {'id': 811, 'image_count': 1413}, {'id': 812, 'image_count': 7}, {'id': 813, 'image_count': 12}, {'id': 814, 'image_count': 248}, {'id': 815, 'image_count': 4}, {'id': 816, 'image_count': 1881}, {'id': 817, 'image_count': 529}, {'id': 818, 'image_count': 1932}, {'id': 819, 'image_count': 50}, {'id': 820, 'image_count': 3}, {'id': 821, 'image_count': 28}, {'id': 822, 'image_count': 10}, {'id': 823, 'image_count': 5}, {'id': 824, 'image_count': 5}, {'id': 825, 'image_count': 18}, {'id': 826, 'image_count': 14}, {'id': 827, 'image_count': 1890}, {'id': 828, 'image_count': 660}, {'id': 829, 'image_count': 8}, {'id': 830, 'image_count': 25}, {'id': 831, 'image_count': 10}, {'id': 832, 'image_count': 218}, {'id': 833, 'image_count': 36}, {'id': 834, 'image_count': 16}, {'id': 835, 'image_count': 808}, {'id': 836, 'image_count': 479}, {'id': 837, 'image_count': 1404}, {'id': 838, 'image_count': 307}, {'id': 839, 'image_count': 57}, {'id': 840, 'image_count': 28}, {'id': 841, 'image_count': 80}, {'id': 842, 'image_count': 11}, {'id': 843, 'image_count': 92}, {'id': 844, 'image_count': 20}, {'id': 845, 'image_count': 194}, {'id': 846, 'image_count': 23}, {'id': 847, 'image_count': 52}, {'id': 848, 'image_count': 673}, {'id': 849, 'image_count': 2}, {'id': 850, 'image_count': 2}, {'id': 851, 'image_count': 1}, {'id': 852, 'image_count': 2}, {'id': 853, 'image_count': 8}, {'id': 854, 'image_count': 80}, {'id': 855, 'image_count': 3}, {'id': 856, 'image_count': 3}, {'id': 857, 'image_count': 15}, {'id': 858, 'image_count': 2}, {'id': 859, 'image_count': 10}, {'id': 860, 'image_count': 386}, {'id': 861, 'image_count': 65}, {'id': 862, 'image_count': 3}, {'id': 863, 'image_count': 35}, {'id': 864, 'image_count': 5}, {'id': 865, 'image_count': 180}, {'id': 866, 'image_count': 99}, {'id': 867, 'image_count': 49}, {'id': 868, 'image_count': 28}, {'id': 869, 'image_count': 1}, {'id': 870, 'image_count': 52}, {'id': 871, 'image_count': 36}, {'id': 872, 'image_count': 70}, {'id': 873, 'image_count': 6}, {'id': 874, 'image_count': 29}, {'id': 875, 'image_count': 24}, {'id': 876, 'image_count': 1115}, {'id': 877, 'image_count': 61}, {'id': 878, 'image_count': 18}, {'id': 879, 'image_count': 18}, {'id': 880, 'image_count': 665}, {'id': 881, 'image_count': 1096}, {'id': 882, 'image_count': 29}, {'id': 883, 'image_count': 8}, {'id': 884, 'image_count': 14}, {'id': 885, 'image_count': 1622}, {'id': 886, 'image_count': 2}, {'id': 887, 'image_count': 3}, {'id': 888, 'image_count': 32}, {'id': 889, 'image_count': 55}, {'id': 890, 'image_count': 1}, {'id': 891, 'image_count': 10}, {'id': 892, 'image_count': 10}, {'id': 893, 'image_count': 47}, {'id': 894, 'image_count': 3}, {'id': 895, 'image_count': 29}, {'id': 896, 'image_count': 342}, {'id': 897, 'image_count': 25}, {'id': 898, 'image_count': 1469}, {'id': 899, 'image_count': 521}, {'id': 900, 'image_count': 347}, {'id': 901, 'image_count': 35}, {'id': 902, 'image_count': 7}, {'id': 903, 'image_count': 207}, {'id': 904, 'image_count': 108}, {'id': 905, 'image_count': 2}, {'id': 906, 'image_count': 34}, {'id': 907, 'image_count': 12}, {'id': 908, 'image_count': 10}, {'id': 909, 'image_count': 13}, {'id': 910, 'image_count': 361}, {'id': 911, 'image_count': 1023}, {'id': 912, 'image_count': 782}, {'id': 913, 'image_count': 2}, {'id': 914, 'image_count': 5}, {'id': 915, 'image_count': 247}, {'id': 916, 'image_count': 221}, {'id': 917, 'image_count': 4}, {'id': 918, 'image_count': 8}, {'id': 919, 'image_count': 158}, {'id': 920, 'image_count': 3}, {'id': 921, 'image_count': 752}, {'id': 922, 'image_count': 64}, {'id': 923, 'image_count': 707}, {'id': 924, 'image_count': 143}, {'id': 925, 'image_count': 1}, {'id': 926, 'image_count': 49}, {'id': 927, 'image_count': 126}, {'id': 928, 'image_count': 76}, {'id': 929, 'image_count': 11}, {'id': 930, 'image_count': 11}, {'id': 931, 'image_count': 4}, {'id': 932, 'image_count': 39}, {'id': 933, 'image_count': 11}, {'id': 934, 'image_count': 13}, {'id': 935, 'image_count': 91}, {'id': 936, 'image_count': 14}, {'id': 937, 'image_count': 5}, {'id': 938, 'image_count': 3}, {'id': 939, 'image_count': 10}, {'id': 940, 'image_count': 18}, {'id': 941, 'image_count': 9}, {'id': 942, 'image_count': 6}, {'id': 943, 'image_count': 951}, {'id': 944, 'image_count': 2}, {'id': 945, 'image_count': 1}, {'id': 946, 'image_count': 19}, {'id': 947, 'image_count': 1942}, {'id': 948, 'image_count': 1916}, {'id': 949, 'image_count': 139}, {'id': 950, 'image_count': 43}, {'id': 951, 'image_count': 1969}, {'id': 952, 'image_count': 5}, {'id': 953, 'image_count': 134}, {'id': 954, 'image_count': 74}, {'id': 955, 'image_count': 381}, {'id': 956, 'image_count': 1}, {'id': 957, 'image_count': 381}, {'id': 958, 'image_count': 6}, {'id': 959, 'image_count': 1826}, {'id': 960, 'image_count': 28}, {'id': 961, 'image_count': 1635}, {'id': 962, 'image_count': 1967}, {'id': 963, 'image_count': 16}, {'id': 964, 'image_count': 1926}, {'id': 965, 'image_count': 1789}, {'id': 966, 'image_count': 401}, {'id': 967, 'image_count': 1968}, {'id': 968, 'image_count': 1167}, {'id': 969, 'image_count': 1}, {'id': 970, 'image_count': 56}, {'id': 971, 'image_count': 17}, {'id': 972, 'image_count': 1}, {'id': 973, 'image_count': 58}, {'id': 974, 'image_count': 9}, {'id': 975, 'image_count': 8}, {'id': 976, 'image_count': 1124}, {'id': 977, 'image_count': 31}, {'id': 978, 'image_count': 16}, {'id': 979, 'image_count': 491}, {'id': 980, 'image_count': 432}, {'id': 981, 'image_count': 1945}, {'id': 982, 'image_count': 1899}, {'id': 983, 'image_count': 5}, {'id': 984, 'image_count': 28}, {'id': 985, 'image_count': 7}, {'id': 986, 'image_count': 146}, {'id': 987, 'image_count': 1}, {'id': 988, 'image_count': 25}, {'id': 989, 'image_count': 22}, {'id': 990, 'image_count': 1}, {'id': 991, 'image_count': 10}, {'id': 992, 'image_count': 9}, {'id': 993, 'image_count': 308}, {'id': 994, 'image_count': 4}, {'id': 995, 'image_count': 1969}, {'id': 996, 'image_count': 45}, {'id': 997, 'image_count': 12}, {'id': 998, 'image_count': 1}, {'id': 999, 'image_count': 85}, {'id': 1000, 'image_count': 1127}, {'id': 1001, 'image_count': 11}, {'id': 1002, 'image_count': 60}, {'id': 1003, 'image_count': 1}, {'id': 1004, 'image_count': 16}, {'id': 1005, 'image_count': 1}, {'id': 1006, 'image_count': 65}, {'id': 1007, 'image_count': 13}, {'id': 1008, 'image_count': 655}, {'id': 1009, 'image_count': 51}, {'id': 1010, 'image_count': 1}, {'id': 1011, 'image_count': 673}, {'id': 1012, 'image_count': 5}, {'id': 1013, 'image_count': 36}, {'id': 1014, 'image_count': 54}, {'id': 1015, 'image_count': 5}, {'id': 1016, 'image_count': 8}, {'id': 1017, 'image_count': 305}, {'id': 1018, 'image_count': 297}, {'id': 1019, 'image_count': 1053}, {'id': 1020, 'image_count': 223}, {'id': 1021, 'image_count': 1037}, {'id': 1022, 'image_count': 63}, {'id': 1023, 'image_count': 1881}, {'id': 1024, 'image_count': 507}, {'id': 1025, 'image_count': 333}, {'id': 1026, 'image_count': 1911}, {'id': 1027, 'image_count': 1765}, {'id': 1028, 'image_count': 1}, {'id': 1029, 'image_count': 5}, {'id': 1030, 'image_count': 1}, {'id': 1031, 'image_count': 9}, {'id': 1032, 'image_count': 2}, {'id': 1033, 'image_count': 151}, {'id': 1034, 'image_count': 82}, {'id': 1035, 'image_count': 1931}, {'id': 1036, 'image_count': 41}, {'id': 1037, 'image_count': 1895}, {'id': 1038, 'image_count': 24}, {'id': 1039, 'image_count': 22}, {'id': 1040, 'image_count': 35}, {'id': 1041, 'image_count': 69}, {'id': 1042, 'image_count': 962}, {'id': 1043, 'image_count': 588}, {'id': 1044, 'image_count': 21}, {'id': 1045, 'image_count': 825}, {'id': 1046, 'image_count': 52}, {'id': 1047, 'image_count': 5}, {'id': 1048, 'image_count': 5}, {'id': 1049, 'image_count': 5}, {'id': 1050, 'image_count': 1860}, {'id': 1051, 'image_count': 56}, {'id': 1052, 'image_count': 1582}, {'id': 1053, 'image_count': 7}, {'id': 1054, 'image_count': 2}, {'id': 1055, 'image_count': 1562}, {'id': 1056, 'image_count': 1885}, {'id': 1057, 'image_count': 1}, {'id': 1058, 'image_count': 5}, {'id': 1059, 'image_count': 137}, {'id': 1060, 'image_count': 1094}, {'id': 1061, 'image_count': 134}, {'id': 1062, 'image_count': 29}, {'id': 1063, 'image_count': 22}, {'id': 1064, 'image_count': 522}, {'id': 1065, 'image_count': 50}, {'id': 1066, 'image_count': 68}, {'id': 1067, 'image_count': 16}, {'id': 1068, 'image_count': 40}, {'id': 1069, 'image_count': 35}, {'id': 1070, 'image_count': 135}, {'id': 1071, 'image_count': 1413}, {'id': 1072, 'image_count': 772}, {'id': 1073, 'image_count': 50}, {'id': 1074, 'image_count': 1015}, {'id': 1075, 'image_count': 1}, {'id': 1076, 'image_count': 65}, {'id': 1077, 'image_count': 1900}, {'id': 1078, 'image_count': 1302}, {'id': 1079, 'image_count': 1977}, {'id': 1080, 'image_count': 2}, {'id': 1081, 'image_count': 29}, {'id': 1082, 'image_count': 36}, {'id': 1083, 'image_count': 138}, {'id': 1084, 'image_count': 4}, {'id': 1085, 'image_count': 67}, {'id': 1086, 'image_count': 26}, {'id': 1087, 'image_count': 25}, {'id': 1088, 'image_count': 33}, {'id': 1089, 'image_count': 37}, {'id': 1090, 'image_count': 50}, {'id': 1091, 'image_count': 270}, {'id': 1092, 'image_count': 12}, {'id': 1093, 'image_count': 316}, {'id': 1094, 'image_count': 41}, {'id': 1095, 'image_count': 224}, {'id': 1096, 'image_count': 105}, {'id': 1097, 'image_count': 1925}, {'id': 1098, 'image_count': 1021}, {'id': 1099, 'image_count': 1213}, {'id': 1100, 'image_count': 172}, {'id': 1101, 'image_count': 28}, {'id': 1102, 'image_count': 745}, {'id': 1103, 'image_count': 187}, {'id': 1104, 'image_count': 147}, {'id': 1105, 'image_count': 136}, {'id': 1106, 'image_count': 34}, {'id': 1107, 'image_count': 41}, {'id': 1108, 'image_count': 636}, {'id': 1109, 'image_count': 570}, {'id': 1110, 'image_count': 1149}, {'id': 1111, 'image_count': 61}, {'id': 1112, 'image_count': 1890}, {'id': 1113, 'image_count': 18}, {'id': 1114, 'image_count': 143}, {'id': 1115, 'image_count': 1517}, {'id': 1116, 'image_count': 7}, {'id': 1117, 'image_count': 943}, {'id': 1118, 'image_count': 6}, {'id': 1119, 'image_count': 1}, {'id': 1120, 'image_count': 11}, {'id': 1121, 'image_count': 101}, {'id': 1122, 'image_count': 1909}, {'id': 1123, 'image_count': 800}, {'id': 1124, 'image_count': 1}, {'id': 1125, 'image_count': 44}, {'id': 1126, 'image_count': 3}, {'id': 1127, 'image_count': 44}, {'id': 1128, 'image_count': 31}, {'id': 1129, 'image_count': 7}, {'id': 1130, 'image_count': 20}, {'id': 1131, 'image_count': 11}, {'id': 1132, 'image_count': 13}, {'id': 1133, 'image_count': 1924}, {'id': 1134, 'image_count': 113}, {'id': 1135, 'image_count': 2}, {'id': 1136, 'image_count': 139}, {'id': 1137, 'image_count': 12}, {'id': 1138, 'image_count': 37}, {'id': 1139, 'image_count': 1866}, {'id': 1140, 'image_count': 47}, {'id': 1141, 'image_count': 1468}, {'id': 1142, 'image_count': 729}, {'id': 1143, 'image_count': 24}, {'id': 1144, 'image_count': 1}, {'id': 1145, 'image_count': 10}, {'id': 1146, 'image_count': 3}, {'id': 1147, 'image_count': 14}, {'id': 1148, 'image_count': 4}, {'id': 1149, 'image_count': 29}, {'id': 1150, 'image_count': 4}, {'id': 1151, 'image_count': 70}, {'id': 1152, 'image_count': 46}, {'id': 1153, 'image_count': 14}, {'id': 1154, 'image_count': 48}, {'id': 1155, 'image_count': 1855}, {'id': 1156, 'image_count': 113}, {'id': 1157, 'image_count': 1}, {'id': 1158, 'image_count': 1}, {'id': 1159, 'image_count': 10}, {'id': 1160, 'image_count': 54}, {'id': 1161, 'image_count': 1923}, {'id': 1162, 'image_count': 630}, {'id': 1163, 'image_count': 31}, {'id': 1164, 'image_count': 69}, {'id': 1165, 'image_count': 7}, {'id': 1166, 'image_count': 11}, {'id': 1167, 'image_count': 1}, {'id': 1168, 'image_count': 30}, {'id': 1169, 'image_count': 50}, {'id': 1170, 'image_count': 45}, {'id': 1171, 'image_count': 28}, {'id': 1172, 'image_count': 114}, {'id': 1173, 'image_count': 193}, {'id': 1174, 'image_count': 21}, {'id': 1175, 'image_count': 91}, {'id': 1176, 'image_count': 31}, {'id': 1177, 'image_count': 1469}, {'id': 1178, 'image_count': 1924}, {'id': 1179, 'image_count': 87}, {'id': 1180, 'image_count': 77}, {'id': 1181, 'image_count': 11}, {'id': 1182, 'image_count': 47}, {'id': 1183, 'image_count': 21}, {'id': 1184, 'image_count': 47}, {'id': 1185, 'image_count': 70}, {'id': 1186, 'image_count': 1838}, {'id': 1187, 'image_count': 19}, {'id': 1188, 'image_count': 531}, {'id': 1189, 'image_count': 11}, {'id': 1190, 'image_count': 941}, {'id': 1191, 'image_count': 113}, {'id': 1192, 'image_count': 26}, {'id': 1193, 'image_count': 5}, {'id': 1194, 'image_count': 56}, {'id': 1195, 'image_count': 73}, {'id': 1196, 'image_count': 32}, {'id': 1197, 'image_count': 128}, {'id': 1198, 'image_count': 623}, {'id': 1199, 'image_count': 12}, {'id': 1200, 'image_count': 52}, {'id': 1201, 'image_count': 11}, {'id': 1202, 'image_count': 1674}, {'id': 1203, 'image_count': 81}] # noqa -# fmt: on diff --git a/detectron2/detectron2/data/datasets/pascal_voc.py b/detectron2/detectron2/data/datasets/pascal_voc.py deleted file mode 100644 index 46f8536ad26f4d47a53a95bed62548d8aff5047e..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/pascal_voc.py +++ /dev/null @@ -1,82 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import numpy as np -import os -import xml.etree.ElementTree as ET -from typing import List, Tuple, Union - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode -from detectron2.utils.file_io import PathManager - -__all__ = ["load_voc_instances", "register_pascal_voc"] - - -# fmt: off -CLASS_NAMES = ( - "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", - "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", - "pottedplant", "sheep", "sofa", "train", "tvmonitor" -) -# fmt: on - - -def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]): - """ - Load Pascal VOC detection annotations to Detectron2 format. - - Args: - dirname: Contain "Annotations", "ImageSets", "JPEGImages" - split (str): one of "train", "test", "val", "trainval" - class_names: list or tuple of class names - """ - with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f: - fileids = np.loadtxt(f, dtype=str) - - # Needs to read many small annotation files. Makes sense at local - annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/")) - dicts = [] - for fileid in fileids: - anno_file = os.path.join(annotation_dirname, fileid + ".xml") - jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg") - - with PathManager.open(anno_file) as f: - tree = ET.parse(f) - - r = { - "file_name": jpeg_file, - "image_id": fileid, - "height": int(tree.findall("./size/height")[0].text), - "width": int(tree.findall("./size/width")[0].text), - } - instances = [] - - for obj in tree.findall("object"): - cls = obj.find("name").text - # We include "difficult" samples in training. - # Based on limited experiments, they don't hurt accuracy. - # difficult = int(obj.find("difficult").text) - # if difficult == 1: - # continue - bbox = obj.find("bndbox") - bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]] - # Original annotations are integers in the range [1, W or H] - # Assuming they mean 1-based pixel indices (inclusive), - # a box with annotation (xmin=1, xmax=W) covers the whole image. - # In coordinate space this is represented by (xmin=0, xmax=W) - bbox[0] -= 1.0 - bbox[1] -= 1.0 - instances.append( - {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS} - ) - r["annotations"] = instances - dicts.append(r) - return dicts - - -def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES): - DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names)) - MetadataCatalog.get(name).set( - thing_classes=list(class_names), dirname=dirname, year=year, split=split - ) diff --git a/detectron2/detectron2/data/datasets/register_coco.py b/detectron2/detectron2/data/datasets/register_coco.py deleted file mode 100644 index e564438d5bf016bcdbb65b4bbdc215d79f579f8a..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/datasets/register_coco.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .coco import register_coco_instances # noqa -from .coco_panoptic import register_coco_panoptic_separated # noqa diff --git a/detectron2/detectron2/data/detection_utils.py b/detectron2/detectron2/data/detection_utils.py deleted file mode 100644 index 469a3ba33bbe32ced5a6aee76f3d7eaa6003791c..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/detection_utils.py +++ /dev/null @@ -1,662 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -Common data processing utilities that are used in a -typical object detection data pipeline. -""" -import logging -import numpy as np -from typing import List, Union -import pycocotools.mask as mask_util -import torch -from PIL import Image - -from detectron2.structures import ( - BitMasks, - Boxes, - BoxMode, - Instances, - Keypoints, - PolygonMasks, - RotatedBoxes, - polygons_to_bitmask, -) -from detectron2.utils.file_io import PathManager - -from . import transforms as T -from .catalog import MetadataCatalog - -__all__ = [ - "SizeMismatchError", - "convert_image_to_rgb", - "check_image_size", - "transform_proposals", - "transform_instance_annotations", - "annotations_to_instances", - "annotations_to_instances_rotated", - "build_augmentation", - "build_transform_gen", - "create_keypoint_hflip_indices", - "filter_empty_instances", - "read_image", -] - - -class SizeMismatchError(ValueError): - """ - When loaded image has difference width/height compared with annotation. - """ - - -# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601 -_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]] -_M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]] - -# https://www.exiv2.org/tags.html -_EXIF_ORIENT = 274 # exif 'Orientation' tag - - -def convert_PIL_to_numpy(image, format): - """ - Convert PIL image to numpy array of target format. - - Args: - image (PIL.Image): a PIL image - format (str): the format of output image - - Returns: - (np.ndarray): also see `read_image` - """ - if format is not None: - # PIL only supports RGB, so convert to RGB and flip channels over below - conversion_format = format - if format in ["BGR", "YUV-BT.601"]: - conversion_format = "RGB" - image = image.convert(conversion_format) - image = np.asarray(image) - # PIL squeezes out the channel dimension for "L", so make it HWC - if format == "L": - image = np.expand_dims(image, -1) - - # handle formats not supported by PIL - elif format == "BGR": - # flip channels if needed - image = image[:, :, ::-1] - elif format == "YUV-BT.601": - image = image / 255.0 - image = np.dot(image, np.array(_M_RGB2YUV).T) - - return image - - -def convert_image_to_rgb(image, format): - """ - Convert an image from given format to RGB. - - Args: - image (np.ndarray or Tensor): an HWC image - format (str): the format of input image, also see `read_image` - - Returns: - (np.ndarray): (H,W,3) RGB image in 0-255 range, can be either float or uint8 - """ - if isinstance(image, torch.Tensor): - image = image.cpu().numpy() - if format == "BGR": - image = image[:, :, [2, 1, 0]] - elif format == "YUV-BT.601": - image = np.dot(image, np.array(_M_YUV2RGB).T) - image = image * 255.0 - else: - if format == "L": - image = image[:, :, 0] - image = image.astype(np.uint8) - image = np.asarray(Image.fromarray(image, mode=format).convert("RGB")) - return image - - -def _apply_exif_orientation(image): - """ - Applies the exif orientation correctly. - - This code exists per the bug: - https://github.com/python-pillow/Pillow/issues/3973 - with the function `ImageOps.exif_transpose`. The Pillow source raises errors with - various methods, especially `tobytes` - - Function based on: - https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59 - https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527 - - Args: - image (PIL.Image): a PIL image - - Returns: - (PIL.Image): the PIL image with exif orientation applied, if applicable - """ - if not hasattr(image, "getexif"): - return image - - try: - exif = image.getexif() - except Exception: # https://github.com/facebookresearch/detectron2/issues/1885 - exif = None - - if exif is None: - return image - - orientation = exif.get(_EXIF_ORIENT) - - method = { - 2: Image.FLIP_LEFT_RIGHT, - 3: Image.ROTATE_180, - 4: Image.FLIP_TOP_BOTTOM, - 5: Image.TRANSPOSE, - 6: Image.ROTATE_270, - 7: Image.TRANSVERSE, - 8: Image.ROTATE_90, - }.get(orientation) - - if method is not None: - return image.transpose(method) - return image - - -def read_image(file_name, format=None): - """ - Read an image into the given format. - Will apply rotation and flipping if the image has such exif information. - - Args: - file_name (str): image file path - format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601". - - Returns: - image (np.ndarray): - an HWC image in the given format, which is 0-255, uint8 for - supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601. - """ - with PathManager.open(file_name, "rb") as f: - image = Image.open(f) - - # work around this bug: https://github.com/python-pillow/Pillow/issues/3973 - image = _apply_exif_orientation(image) - return convert_PIL_to_numpy(image, format) - raise ValueError(f"Failed to read image at: {file_name}") - - -def check_image_size(dataset_dict, image): - """ - Raise an error if the image does not match the size specified in the dict. - """ - if "width" in dataset_dict or "height" in dataset_dict: - image_wh = (image.shape[1], image.shape[0]) - expected_wh = (dataset_dict["width"], dataset_dict["height"]) - if not image_wh == expected_wh: - raise SizeMismatchError( - "Mismatched image shape{}, got {}, expect {}.".format( - ( - " for image " + dataset_dict["file_name"] - if "file_name" in dataset_dict - else "" - ), - image_wh, - expected_wh, - ) - + " Please check the width/height in your annotation." - ) - - # To ensure bbox always remap to original image size - if "width" not in dataset_dict: - dataset_dict["width"] = image.shape[1] - if "height" not in dataset_dict: - dataset_dict["height"] = image.shape[0] - - -def transform_proposals(dataset_dict, image_shape, transforms, *, proposal_topk, min_box_size=0): - """ - Apply transformations to the proposals in dataset_dict, if any. - - Args: - dataset_dict (dict): a dict read from the dataset, possibly - contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" - image_shape (tuple): height, width - transforms (TransformList): - proposal_topk (int): only keep top-K scoring proposals - min_box_size (int): proposals with either side smaller than this - threshold are removed - - The input dict is modified in-place, with abovementioned keys removed. A new - key "proposals" will be added. Its value is an `Instances` - object which contains the transformed proposals in its field - "proposal_boxes" and "objectness_logits". - """ - if "proposal_boxes" in dataset_dict: - # Transform proposal boxes - boxes = transforms.apply_box( - BoxMode.convert( - dataset_dict.pop("proposal_boxes"), - dataset_dict.pop("proposal_bbox_mode"), - BoxMode.XYXY_ABS, - ) - ) - boxes = Boxes(boxes) - objectness_logits = torch.as_tensor( - dataset_dict.pop("proposal_objectness_logits").astype("float32") - ) - - boxes.clip(image_shape) - keep = boxes.nonempty(threshold=min_box_size) - boxes = boxes[keep] - objectness_logits = objectness_logits[keep] - - proposals = Instances(image_shape) - proposals.proposal_boxes = boxes[:proposal_topk] - proposals.objectness_logits = objectness_logits[:proposal_topk] - dataset_dict["proposals"] = proposals - - -def get_bbox(annotation): - """ - Get bbox from data - Args: - annotation (dict): dict of instance annotations for a single instance. - Returns: - bbox (ndarray): x1, y1, x2, y2 coordinates - """ - # bbox is 1d (per-instance bounding box) - bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) - return bbox - - -def transform_instance_annotations( - annotation, transforms, image_size, *, keypoint_hflip_indices=None -): - """ - Apply transforms to box, segmentation and keypoints annotations of a single instance. - - It will use `transforms.apply_box` for the box, and - `transforms.apply_coords` for segmentation polygons & keypoints. - If you need anything more specially designed for each data structure, - you'll need to implement your own version of this function or the transforms. - - Args: - annotation (dict): dict of instance annotations for a single instance. - It will be modified in-place. - transforms (TransformList or list[Transform]): - image_size (tuple): the height, width of the transformed image - keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. - - Returns: - dict: - the same input dict with fields "bbox", "segmentation", "keypoints" - transformed according to `transforms`. - The "bbox_mode" field will be set to XYXY_ABS. - """ - if isinstance(transforms, (tuple, list)): - transforms = T.TransformList(transforms) - # bbox is 1d (per-instance bounding box) - bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) - # clip transformed bbox to image size - bbox = transforms.apply_box(np.array([bbox]))[0].clip(min=0) - annotation["bbox"] = np.minimum(bbox, list(image_size + image_size)[::-1]) - annotation["bbox_mode"] = BoxMode.XYXY_ABS - - if "segmentation" in annotation: - # each instance contains 1 or more polygons - segm = annotation["segmentation"] - if isinstance(segm, list): - # polygons - polygons = [np.asarray(p).reshape(-1, 2) for p in segm] - annotation["segmentation"] = [ - p.reshape(-1) for p in transforms.apply_polygons(polygons) - ] - elif isinstance(segm, dict): - # RLE - mask = mask_util.decode(segm) - mask = transforms.apply_segmentation(mask) - assert tuple(mask.shape[:2]) == image_size - annotation["segmentation"] = mask - else: - raise ValueError( - "Cannot transform segmentation of type '{}'!" - "Supported types are: polygons as list[list[float] or ndarray]," - " COCO-style RLE as a dict.".format(type(segm)) - ) - - if "keypoints" in annotation: - keypoints = transform_keypoint_annotations( - annotation["keypoints"], transforms, image_size, keypoint_hflip_indices - ) - annotation["keypoints"] = keypoints - - return annotation - - -def transform_keypoint_annotations(keypoints, transforms, image_size, keypoint_hflip_indices=None): - """ - Transform keypoint annotations of an image. - If a keypoint is transformed out of image boundary, it will be marked "unlabeled" (visibility=0) - - Args: - keypoints (list[float]): Nx3 float in Detectron2's Dataset format. - Each point is represented by (x, y, visibility). - transforms (TransformList): - image_size (tuple): the height, width of the transformed image - keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. - When `transforms` includes horizontal flip, will use the index - mapping to flip keypoints. - """ - # (N*3,) -> (N, 3) - keypoints = np.asarray(keypoints, dtype="float64").reshape(-1, 3) - keypoints_xy = transforms.apply_coords(keypoints[:, :2]) - - # Set all out-of-boundary points to "unlabeled" - inside = (keypoints_xy >= np.array([0, 0])) & (keypoints_xy <= np.array(image_size[::-1])) - inside = inside.all(axis=1) - keypoints[:, :2] = keypoints_xy - keypoints[:, 2][~inside] = 0 - - # This assumes that HorizFlipTransform is the only one that does flip - do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 - - # Alternative way: check if probe points was horizontally flipped. - # probe = np.asarray([[0.0, 0.0], [image_width, 0.0]]) - # probe_aug = transforms.apply_coords(probe.copy()) - # do_hflip = np.sign(probe[1][0] - probe[0][0]) != np.sign(probe_aug[1][0] - probe_aug[0][0]) # noqa - - # If flipped, swap each keypoint with its opposite-handed equivalent - if do_hflip: - if keypoint_hflip_indices is None: - raise ValueError("Cannot flip keypoints without providing flip indices!") - if len(keypoints) != len(keypoint_hflip_indices): - raise ValueError( - "Keypoint data has {} points, but metadata " - "contains {} points!".format(len(keypoints), len(keypoint_hflip_indices)) - ) - keypoints = keypoints[np.asarray(keypoint_hflip_indices, dtype=np.int32), :] - - # Maintain COCO convention that if visibility == 0 (unlabeled), then x, y = 0 - keypoints[keypoints[:, 2] == 0] = 0 - return keypoints - - -def annotations_to_instances(annos, image_size, mask_format="polygon"): - """ - Create an :class:`Instances` object used by the models, - from instance annotations in the dataset dict. - - Args: - annos (list[dict]): a list of instance annotations in one image, each - element for one instance. - image_size (tuple): height, width - - Returns: - Instances: - It will contain fields "gt_boxes", "gt_classes", - "gt_masks", "gt_keypoints", if they can be obtained from `annos`. - This is the format that builtin models expect. - """ - boxes = ( - np.stack( - [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] - ) - if len(annos) - else np.zeros((0, 4)) - ) - target = Instances(image_size) - target.gt_boxes = Boxes(boxes) - - classes = [int(obj["category_id"]) for obj in annos] - classes = torch.tensor(classes, dtype=torch.int64) - target.gt_classes = classes - - if len(annos) and "segmentation" in annos[0]: - segms = [obj["segmentation"] for obj in annos] - if mask_format == "polygon": - try: - masks = PolygonMasks(segms) - except ValueError as e: - raise ValueError( - "Failed to use mask_format=='polygon' from the given annotations!" - ) from e - else: - assert mask_format == "bitmask", mask_format - masks = [] - for segm in segms: - if isinstance(segm, list): - # polygon - masks.append(polygons_to_bitmask(segm, *image_size)) - elif isinstance(segm, dict): - # COCO RLE - masks.append(mask_util.decode(segm)) - elif isinstance(segm, np.ndarray): - assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( - segm.ndim - ) - # mask array - masks.append(segm) - else: - raise ValueError( - "Cannot convert segmentation of type '{}' to BitMasks!" - "Supported types are: polygons as list[list[float] or ndarray]," - " COCO-style RLE as a dict, or a binary segmentation mask " - " in a 2D numpy array of shape HxW.".format(type(segm)) - ) - # torch.from_numpy does not support array with negative stride. - masks = BitMasks( - torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks]) - ) - target.gt_masks = masks - - if len(annos) and "keypoints" in annos[0]: - kpts = [obj.get("keypoints", []) for obj in annos] - target.gt_keypoints = Keypoints(kpts) - - return target - - -def annotations_to_instances_rotated(annos, image_size): - """ - Create an :class:`Instances` object used by the models, - from instance annotations in the dataset dict. - Compared to `annotations_to_instances`, this function is for rotated boxes only - - Args: - annos (list[dict]): a list of instance annotations in one image, each - element for one instance. - image_size (tuple): height, width - - Returns: - Instances: - Containing fields "gt_boxes", "gt_classes", - if they can be obtained from `annos`. - This is the format that builtin models expect. - """ - boxes = [obj["bbox"] for obj in annos] - target = Instances(image_size) - boxes = target.gt_boxes = RotatedBoxes(boxes) - boxes.clip(image_size) - - classes = [obj["category_id"] for obj in annos] - classes = torch.tensor(classes, dtype=torch.int64) - target.gt_classes = classes - - return target - - -def filter_empty_instances( - instances, by_box=True, by_mask=True, box_threshold=1e-5, return_mask=False -): - """ - Filter out empty instances in an `Instances` object. - - Args: - instances (Instances): - by_box (bool): whether to filter out instances with empty boxes - by_mask (bool): whether to filter out instances with empty masks - box_threshold (float): minimum width and height to be considered non-empty - return_mask (bool): whether to return boolean mask of filtered instances - - Returns: - Instances: the filtered instances. - tensor[bool], optional: boolean mask of filtered instances - """ - assert by_box or by_mask - r = [] - if by_box: - r.append(instances.gt_boxes.nonempty(threshold=box_threshold)) - if instances.has("gt_masks") and by_mask: - r.append(instances.gt_masks.nonempty()) - - # TODO: can also filter visible keypoints - - if not r: - return instances - m = r[0] - for x in r[1:]: - m = m & x - if return_mask: - return instances[m], m - return instances[m] - - -def create_keypoint_hflip_indices(dataset_names: Union[str, List[str]]) -> List[int]: - """ - Args: - dataset_names: list of dataset names - - Returns: - list[int]: a list of size=#keypoints, storing the - horizontally-flipped keypoint indices. - """ - if isinstance(dataset_names, str): - dataset_names = [dataset_names] - - check_metadata_consistency("keypoint_names", dataset_names) - check_metadata_consistency("keypoint_flip_map", dataset_names) - - meta = MetadataCatalog.get(dataset_names[0]) - names = meta.keypoint_names - # TODO flip -> hflip - flip_map = dict(meta.keypoint_flip_map) - flip_map.update({v: k for k, v in flip_map.items()}) - flipped_names = [i if i not in flip_map else flip_map[i] for i in names] - flip_indices = [names.index(i) for i in flipped_names] - return flip_indices - - -def get_fed_loss_cls_weights(dataset_names: Union[str, List[str]], freq_weight_power=1.0): - """ - Get frequency weight for each class sorted by class id. - We now calcualte freqency weight using image_count to the power freq_weight_power. - - Args: - dataset_names: list of dataset names - freq_weight_power: power value - """ - if isinstance(dataset_names, str): - dataset_names = [dataset_names] - - check_metadata_consistency("class_image_count", dataset_names) - - meta = MetadataCatalog.get(dataset_names[0]) - class_freq_meta = meta.class_image_count - class_freq = torch.tensor( - [c["image_count"] for c in sorted(class_freq_meta, key=lambda x: x["id"])] - ) - class_freq_weight = class_freq.float() ** freq_weight_power - return class_freq_weight - - -def gen_crop_transform_with_instance(crop_size, image_size, instance): - """ - Generate a CropTransform so that the cropping region contains - the center of the given instance. - - Args: - crop_size (tuple): h, w in pixels - image_size (tuple): h, w - instance (dict): an annotation dict of one instance, in Detectron2's - dataset format. - """ - crop_size = np.asarray(crop_size, dtype=np.int32) - bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) - center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 - assert ( - image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1] - ), "The annotation bounding box is outside of the image!" - assert ( - image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1] - ), "Crop size is larger than image size!" - - min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) - max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) - max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) - - y0 = np.random.randint(min_yx[0], max_yx[0] + 1) - x0 = np.random.randint(min_yx[1], max_yx[1] + 1) - return T.CropTransform(x0, y0, crop_size[1], crop_size[0]) - - -def check_metadata_consistency(key, dataset_names): - """ - Check that the datasets have consistent metadata. - - Args: - key (str): a metadata key - dataset_names (list[str]): a list of dataset names - - Raises: - AttributeError: if the key does not exist in the metadata - ValueError: if the given datasets do not have the same metadata values defined by key - """ - if len(dataset_names) == 0: - return - logger = logging.getLogger(__name__) - entries_per_dataset = [getattr(MetadataCatalog.get(d), key) for d in dataset_names] - for idx, entry in enumerate(entries_per_dataset): - if entry != entries_per_dataset[0]: - logger.error( - "Metadata '{}' for dataset '{}' is '{}'".format(key, dataset_names[idx], str(entry)) - ) - logger.error( - "Metadata '{}' for dataset '{}' is '{}'".format( - key, dataset_names[0], str(entries_per_dataset[0]) - ) - ) - raise ValueError("Datasets have different metadata '{}'!".format(key)) - - -def build_augmentation(cfg, is_train): - """ - Create a list of default :class:`Augmentation` from config. - Now it includes resizing and flipping. - - Returns: - list[Augmentation] - """ - if is_train: - min_size = cfg.INPUT.MIN_SIZE_TRAIN - max_size = cfg.INPUT.MAX_SIZE_TRAIN - sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING - else: - min_size = cfg.INPUT.MIN_SIZE_TEST - max_size = cfg.INPUT.MAX_SIZE_TEST - sample_style = "choice" - augmentation = [T.ResizeShortestEdge(min_size, max_size, sample_style)] - if is_train and cfg.INPUT.RANDOM_FLIP != "none": - augmentation.append( - T.RandomFlip( - horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", - vertical=cfg.INPUT.RANDOM_FLIP == "vertical", - ) - ) - return augmentation - - -build_transform_gen = build_augmentation -""" -Alias for backward-compatibility. -""" diff --git a/detectron2/detectron2/data/samplers/__init__.py b/detectron2/detectron2/data/samplers/__init__.py deleted file mode 100644 index 85c9f1a9df8a4038fbd4246239b699402e382309..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/samplers/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .distributed_sampler import ( - InferenceSampler, - RandomSubsetTrainingSampler, - RepeatFactorTrainingSampler, - TrainingSampler, -) - -from .grouped_batch_sampler import GroupedBatchSampler - -__all__ = [ - "GroupedBatchSampler", - "TrainingSampler", - "RandomSubsetTrainingSampler", - "InferenceSampler", - "RepeatFactorTrainingSampler", -] diff --git a/detectron2/detectron2/data/samplers/distributed_sampler.py b/detectron2/detectron2/data/samplers/distributed_sampler.py deleted file mode 100644 index cd3b44a42d6f23909530b042dfb4d4ac139d22ff..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/samplers/distributed_sampler.py +++ /dev/null @@ -1,287 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import itertools -import logging -import math -from collections import defaultdict -from typing import Optional -import torch -from torch.utils.data.sampler import Sampler - -from detectron2.utils import comm - -logger = logging.getLogger(__name__) - - -class TrainingSampler(Sampler): - """ - In training, we only care about the "infinite stream" of training data. - So this sampler produces an infinite stream of indices and - all workers cooperate to correctly shuffle the indices and sample different indices. - - The samplers in each worker effectively produces `indices[worker_id::num_workers]` - where `indices` is an infinite stream of indices consisting of - `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True) - or `range(size) + range(size) + ...` (if shuffle is False) - - Note that this sampler does not shard based on pytorch DataLoader worker id. - A sampler passed to pytorch DataLoader is used only with map-style dataset - and will not be executed inside workers. - But if this sampler is used in a way that it gets execute inside a dataloader - worker, then extra work needs to be done to shard its outputs based on worker id. - This is required so that workers don't produce identical data. - :class:`ToIterableDataset` implements this logic. - This note is true for all samplers in detectron2. - """ - - def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = None): - """ - Args: - size (int): the total number of data of the underlying dataset to sample from - shuffle (bool): whether to shuffle the indices or not - seed (int): the initial seed of the shuffle. Must be the same - across all workers. If None, will use a random seed shared - among workers (require synchronization among all workers). - """ - if not isinstance(size, int): - raise TypeError(f"TrainingSampler(size=) expects an int. Got type {type(size)}.") - if size <= 0: - raise ValueError(f"TrainingSampler(size=) expects a positive int. Got {size}.") - self._size = size - self._shuffle = shuffle - if seed is None: - seed = comm.shared_random_seed() - self._seed = int(seed) - - self._rank = comm.get_rank() - self._world_size = comm.get_world_size() - - def __iter__(self): - start = self._rank - yield from itertools.islice(self._infinite_indices(), start, None, self._world_size) - - def _infinite_indices(self): - g = torch.Generator() - if self._seed is not None: - g.manual_seed(self._seed) - while True: - if self._shuffle: - yield from torch.randperm(self._size, generator=g).tolist() - else: - yield from torch.arange(self._size).tolist() - - -class RandomSubsetTrainingSampler(TrainingSampler): - """ - Similar to TrainingSampler, but only sample a random subset of indices. - This is useful when you want to estimate the accuracy vs data-number curves by - training the model with different subset_ratio. - """ - - def __init__( - self, - size: int, - subset_ratio: float, - shuffle: bool = True, - seed_shuffle: Optional[int] = None, - seed_subset: Optional[int] = None, - ): - """ - Args: - size (int): the total number of data of the underlying dataset to sample from - subset_ratio (float): the ratio of subset data to sample from the underlying dataset - shuffle (bool): whether to shuffle the indices or not - seed_shuffle (int): the initial seed of the shuffle. Must be the same - across all workers. If None, will use a random seed shared - among workers (require synchronization among all workers). - seed_subset (int): the seed to randomize the subset to be sampled. - Must be the same across all workers. If None, will use a random seed shared - among workers (require synchronization among all workers). - """ - super().__init__(size=size, shuffle=shuffle, seed=seed_shuffle) - - assert 0.0 < subset_ratio <= 1.0 - self._size_subset = int(size * subset_ratio) - assert self._size_subset > 0 - if seed_subset is None: - seed_subset = comm.shared_random_seed() - self._seed_subset = int(seed_subset) - - # randomly generate the subset indexes to be sampled from - g = torch.Generator() - g.manual_seed(self._seed_subset) - indexes_randperm = torch.randperm(self._size, generator=g) - self._indexes_subset = indexes_randperm[: self._size_subset] - - logger.info("Using RandomSubsetTrainingSampler......") - logger.info(f"Randomly sample {self._size_subset} data from the original {self._size} data") - - def _infinite_indices(self): - g = torch.Generator() - g.manual_seed(self._seed) # self._seed equals seed_shuffle from __init__() - while True: - if self._shuffle: - # generate a random permutation to shuffle self._indexes_subset - randperm = torch.randperm(self._size_subset, generator=g) - yield from self._indexes_subset[randperm].tolist() - else: - yield from self._indexes_subset.tolist() - - -class RepeatFactorTrainingSampler(Sampler): - """ - Similar to TrainingSampler, but a sample may appear more times than others based - on its "repeat factor". This is suitable for training on class imbalanced datasets like LVIS. - """ - - def __init__(self, repeat_factors, *, shuffle=True, seed=None): - """ - Args: - repeat_factors (Tensor): a float vector, the repeat factor for each indice. When it's - full of ones, it is equivalent to ``TrainingSampler(len(repeat_factors), ...)``. - shuffle (bool): whether to shuffle the indices or not - seed (int): the initial seed of the shuffle. Must be the same - across all workers. If None, will use a random seed shared - among workers (require synchronization among all workers). - """ - self._shuffle = shuffle - if seed is None: - seed = comm.shared_random_seed() - self._seed = int(seed) - - self._rank = comm.get_rank() - self._world_size = comm.get_world_size() - - # Split into whole number (_int_part) and fractional (_frac_part) parts. - self._int_part = torch.trunc(repeat_factors) - self._frac_part = repeat_factors - self._int_part - - @staticmethod - def repeat_factors_from_category_frequency(dataset_dicts, repeat_thresh, sqrt=True): - """ - Compute (fractional) per-image repeat factors based on category frequency. - The repeat factor for an image is a function of the frequency of the rarest - category labeled in that image. The "frequency of category c" in [0, 1] is defined - as the fraction of images in the training set (without repeats) in which category c - appears. - See :paper:`lvis` (>= v2) Appendix B.2. - - Args: - dataset_dicts (list[dict]): annotations in Detectron2 dataset format. - repeat_thresh (float): frequency threshold below which data is repeated. - If the frequency is half of `repeat_thresh`, the image will be - repeated twice. - sqrt (bool): if True, apply :func:`math.sqrt` to the repeat factor. - - Returns: - torch.Tensor: - the i-th element is the repeat factor for the dataset image at index i. - """ - # 1. For each category c, compute the fraction of images that contain it: f(c) - category_freq = defaultdict(int) - for dataset_dict in dataset_dicts: # For each image (without repeats) - cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]} - for cat_id in cat_ids: - category_freq[cat_id] += 1 - num_images = len(dataset_dicts) - for k, v in category_freq.items(): - category_freq[k] = v / num_images - - # 2. For each category c, compute the category-level repeat factor: - # r(c) = max(1, sqrt(t / f(c))) - category_rep = { - cat_id: max( - 1.0, - (math.sqrt(repeat_thresh / cat_freq) if sqrt else (repeat_thresh / cat_freq)), - ) - for cat_id, cat_freq in category_freq.items() - } - for cat_id in sorted(category_rep.keys()): - logger.info( - f"Cat ID {cat_id}: freq={category_freq[cat_id]:.2f}, rep={category_rep[cat_id]:.2f}" - ) - - # 3. For each image I, compute the image-level repeat factor: - # r(I) = max_{c in I} r(c) - rep_factors = [] - for dataset_dict in dataset_dicts: - cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]} - rep_factor = max({category_rep[cat_id] for cat_id in cat_ids}, default=1.0) - rep_factors.append(rep_factor) - - return torch.tensor(rep_factors, dtype=torch.float32) - - def _get_epoch_indices(self, generator): - """ - Create a list of dataset indices (with repeats) to use for one epoch. - - Args: - generator (torch.Generator): pseudo random number generator used for - stochastic rounding. - - Returns: - torch.Tensor: list of dataset indices to use in one epoch. Each index - is repeated based on its calculated repeat factor. - """ - # Since repeat factors are fractional, we use stochastic rounding so - # that the target repeat factor is achieved in expectation over the - # course of training - rands = torch.rand(len(self._frac_part), generator=generator) - rep_factors = self._int_part + (rands < self._frac_part).float() - # Construct a list of indices in which we repeat images as specified - indices = [] - for dataset_index, rep_factor in enumerate(rep_factors): - indices.extend([dataset_index] * int(rep_factor.item())) - return torch.tensor(indices, dtype=torch.int64) - - def __iter__(self): - start = self._rank - yield from itertools.islice(self._infinite_indices(), start, None, self._world_size) - - def _infinite_indices(self): - g = torch.Generator() - g.manual_seed(self._seed) - while True: - # Sample indices with repeats determined by stochastic rounding; each - # "epoch" may have a slightly different size due to the rounding. - indices = self._get_epoch_indices(g) - if self._shuffle: - randperm = torch.randperm(len(indices), generator=g) - yield from indices[randperm].tolist() - else: - yield from indices.tolist() - - -class InferenceSampler(Sampler): - """ - Produce indices for inference across all workers. - Inference needs to run on the __exact__ set of samples, - therefore when the total number of samples is not divisible by the number of workers, - this sampler produces different number of samples on different workers. - """ - - def __init__(self, size: int): - """ - Args: - size (int): the total number of data of the underlying dataset to sample from - """ - self._size = size - assert size > 0 - self._rank = comm.get_rank() - self._world_size = comm.get_world_size() - self._local_indices = self._get_local_indices(size, self._world_size, self._rank) - - @staticmethod - def _get_local_indices(total_size, world_size, rank): - shard_size = total_size // world_size - left = total_size % world_size - shard_sizes = [shard_size + int(r < left) for r in range(world_size)] - - begin = sum(shard_sizes[:rank]) - end = min(sum(shard_sizes[: rank + 1]), total_size) - return range(begin, end) - - def __iter__(self): - yield from self._local_indices - - def __len__(self): - return len(self._local_indices) diff --git a/detectron2/detectron2/data/samplers/grouped_batch_sampler.py b/detectron2/detectron2/data/samplers/grouped_batch_sampler.py deleted file mode 100644 index 5b247730aacd04dd0c752664acde3257c4eddd71..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/samplers/grouped_batch_sampler.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -from torch.utils.data.sampler import BatchSampler, Sampler - - -class GroupedBatchSampler(BatchSampler): - """ - Wraps another sampler to yield a mini-batch of indices. - It enforces that the batch only contain elements from the same group. - It also tries to provide mini-batches which follows an ordering which is - as close as possible to the ordering from the original sampler. - """ - - def __init__(self, sampler, group_ids, batch_size): - """ - Args: - sampler (Sampler): Base sampler. - group_ids (list[int]): If the sampler produces indices in range [0, N), - `group_ids` must be a list of `N` ints which contains the group id of each sample. - The group ids must be a set of integers in the range [0, num_groups). - batch_size (int): Size of mini-batch. - """ - if not isinstance(sampler, Sampler): - raise ValueError( - "sampler should be an instance of " - "torch.utils.data.Sampler, but got sampler={}".format(sampler) - ) - self.sampler = sampler - self.group_ids = np.asarray(group_ids) - assert self.group_ids.ndim == 1 - self.batch_size = batch_size - groups = np.unique(self.group_ids).tolist() - - # buffer the indices of each group until batch size is reached - self.buffer_per_group = {k: [] for k in groups} - - def __iter__(self): - for idx in self.sampler: - group_id = self.group_ids[idx] - group_buffer = self.buffer_per_group[group_id] - group_buffer.append(idx) - if len(group_buffer) == self.batch_size: - yield group_buffer[:] # yield a copy of the list - del group_buffer[:] - - def __len__(self): - raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.") diff --git a/detectron2/detectron2/data/transforms/__init__.py b/detectron2/detectron2/data/transforms/__init__.py deleted file mode 100644 index ab3c63b5b456a7fb878757e25768a3634f76ae5b..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/transforms/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from fvcore.transforms.transform import Transform, TransformList # order them first -from fvcore.transforms.transform import * -from .transform import * -from .augmentation import * -from .augmentation_impl import * - -__all__ = [k for k in globals().keys() if not k.startswith("_")] - - -from detectron2.utils.env import fixup_module_metadata - -fixup_module_metadata(__name__, globals(), __all__) -del fixup_module_metadata diff --git a/detectron2/detectron2/data/transforms/augmentation.py b/detectron2/detectron2/data/transforms/augmentation.py deleted file mode 100644 index 63dd41aef658c9b51c7246880399405a029c5580..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/transforms/augmentation.py +++ /dev/null @@ -1,380 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import inspect -import numpy as np -import pprint -from typing import Any, List, Optional, Tuple, Union -from fvcore.transforms.transform import Transform, TransformList - -""" -See "Data Augmentation" tutorial for an overview of the system: -https://detectron2.readthedocs.io/tutorials/augmentation.html -""" - - -__all__ = [ - "Augmentation", - "AugmentationList", - "AugInput", - "TransformGen", - "apply_transform_gens", - "StandardAugInput", - "apply_augmentations", -] - - -def _check_img_dtype(img): - assert isinstance(img, np.ndarray), "[Augmentation] Needs an numpy array, but got a {}!".format( - type(img) - ) - assert not isinstance(img.dtype, np.integer) or ( - img.dtype == np.uint8 - ), "[Augmentation] Got image of type {}, use uint8 or floating points instead!".format( - img.dtype - ) - assert img.ndim in [2, 3], img.ndim - - -def _get_aug_input_args(aug, aug_input) -> List[Any]: - """ - Get the arguments to be passed to ``aug.get_transform`` from the input ``aug_input``. - """ - if aug.input_args is None: - # Decide what attributes are needed automatically - prms = list(inspect.signature(aug.get_transform).parameters.items()) - # The default behavior is: if there is one parameter, then its "image" - # (work automatically for majority of use cases, and also avoid BC breaking), - # Otherwise, use the argument names. - if len(prms) == 1: - names = ("image",) - else: - names = [] - for name, prm in prms: - if prm.kind in ( - inspect.Parameter.VAR_POSITIONAL, - inspect.Parameter.VAR_KEYWORD, - ): - raise TypeError( - f""" \ -The default implementation of `{type(aug)}.__call__` does not allow \ -`{type(aug)}.get_transform` to use variable-length arguments (*args, **kwargs)! \ -If arguments are unknown, reimplement `__call__` instead. \ -""" - ) - names.append(name) - aug.input_args = tuple(names) - - args = [] - for f in aug.input_args: - try: - args.append(getattr(aug_input, f)) - except AttributeError as e: - raise AttributeError( - f"{type(aug)}.get_transform needs input attribute '{f}', " - f"but it is not an attribute of {type(aug_input)}!" - ) from e - return args - - -class Augmentation: - """ - Augmentation defines (often random) policies/strategies to generate :class:`Transform` - from data. It is often used for pre-processing of input data. - - A "policy" that generates a :class:`Transform` may, in the most general case, - need arbitrary information from input data in order to determine what transforms - to apply. Therefore, each :class:`Augmentation` instance defines the arguments - needed by its :meth:`get_transform` method. When called with the positional arguments, - the :meth:`get_transform` method executes the policy. - - Note that :class:`Augmentation` defines the policies to create a :class:`Transform`, - but not how to execute the actual transform operations to those data. - Its :meth:`__call__` method will use :meth:`AugInput.transform` to execute the transform. - - The returned `Transform` object is meant to describe deterministic transformation, which means - it can be re-applied on associated data, e.g. the geometry of an image and its segmentation - masks need to be transformed together. - (If such re-application is not needed, then determinism is not a crucial requirement.) - """ - - input_args: Optional[Tuple[str]] = None - """ - Stores the attribute names needed by :meth:`get_transform`, e.g. ``("image", "sem_seg")``. - By default, it is just a tuple of argument names in :meth:`self.get_transform`, which often only - contain "image". As long as the argument name convention is followed, there is no need for - users to touch this attribute. - """ - - def _init(self, params=None): - if params: - for k, v in params.items(): - if k != "self" and not k.startswith("_"): - setattr(self, k, v) - - def get_transform(self, *args) -> Transform: - """ - Execute the policy based on input data, and decide what transform to apply to inputs. - - Args: - args: Any fixed-length positional arguments. By default, the name of the arguments - should exist in the :class:`AugInput` to be used. - - Returns: - Transform: Returns the deterministic transform to apply to the input. - - Examples: - :: - class MyAug: - # if a policy needs to know both image and semantic segmentation - def get_transform(image, sem_seg) -> T.Transform: - pass - tfm: Transform = MyAug().get_transform(image, sem_seg) - new_image = tfm.apply_image(image) - - Notes: - Users can freely use arbitrary new argument names in custom - :meth:`get_transform` method, as long as they are available in the - input data. In detectron2 we use the following convention: - - * image: (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or - floating point in range [0, 1] or [0, 255]. - * boxes: (N,4) ndarray of float32. It represents the instance bounding boxes - of N instances. Each is in XYXY format in unit of absolute coordinates. - * sem_seg: (H,W) ndarray of type uint8. Each element is an integer label of pixel. - - We do not specify convention for other types and do not include builtin - :class:`Augmentation` that uses other types in detectron2. - """ - raise NotImplementedError - - def __call__(self, aug_input) -> Transform: - """ - Augment the given `aug_input` **in-place**, and return the transform that's used. - - This method will be called to apply the augmentation. In most augmentation, it - is enough to use the default implementation, which calls :meth:`get_transform` - using the inputs. But a subclass can overwrite it to have more complicated logic. - - Args: - aug_input (AugInput): an object that has attributes needed by this augmentation - (defined by ``self.get_transform``). Its ``transform`` method will be called - to in-place transform it. - - Returns: - Transform: the transform that is applied on the input. - """ - args = _get_aug_input_args(self, aug_input) - tfm = self.get_transform(*args) - assert isinstance(tfm, (Transform, TransformList)), ( - f"{type(self)}.get_transform must return an instance of Transform! " - f"Got {type(tfm)} instead." - ) - aug_input.transform(tfm) - return tfm - - def _rand_range(self, low=1.0, high=None, size=None): - """ - Uniform float random number between low and high. - """ - if high is None: - low, high = 0, low - if size is None: - size = [] - return np.random.uniform(low, high, size) - - def __repr__(self): - """ - Produce something like: - "MyAugmentation(field1={self.field1}, field2={self.field2})" - """ - try: - sig = inspect.signature(self.__init__) - classname = type(self).__name__ - argstr = [] - for name, param in sig.parameters.items(): - assert ( - param.kind != param.VAR_POSITIONAL and param.kind != param.VAR_KEYWORD - ), "The default __repr__ doesn't support *args or **kwargs" - assert hasattr(self, name), ( - "Attribute {} not found! " - "Default __repr__ only works if attributes match the constructor.".format(name) - ) - attr = getattr(self, name) - default = param.default - if default is attr: - continue - attr_str = pprint.pformat(attr) - if "\n" in attr_str: - # don't show it if pformat decides to use >1 lines - attr_str = "..." - argstr.append("{}={}".format(name, attr_str)) - return "{}({})".format(classname, ", ".join(argstr)) - except AssertionError: - return super().__repr__() - - __str__ = __repr__ - - -class _TransformToAug(Augmentation): - def __init__(self, tfm: Transform): - self.tfm = tfm - - def get_transform(self, *args): - return self.tfm - - def __repr__(self): - return repr(self.tfm) - - __str__ = __repr__ - - -def _transform_to_aug(tfm_or_aug): - """ - Wrap Transform into Augmentation. - Private, used internally to implement augmentations. - """ - assert isinstance(tfm_or_aug, (Transform, Augmentation)), tfm_or_aug - if isinstance(tfm_or_aug, Augmentation): - return tfm_or_aug - else: - return _TransformToAug(tfm_or_aug) - - -class AugmentationList(Augmentation): - """ - Apply a sequence of augmentations. - - It has ``__call__`` method to apply the augmentations. - - Note that :meth:`get_transform` method is impossible (will throw error if called) - for :class:`AugmentationList`, because in order to apply a sequence of augmentations, - the kth augmentation must be applied first, to provide inputs needed by the (k+1)th - augmentation. - """ - - def __init__(self, augs): - """ - Args: - augs (list[Augmentation or Transform]): - """ - super().__init__() - self.augs = [_transform_to_aug(x) for x in augs] - - def __call__(self, aug_input) -> TransformList: - tfms = [] - for x in self.augs: - tfm = x(aug_input) - tfms.append(tfm) - return TransformList(tfms) - - def __repr__(self): - msgs = [str(x) for x in self.augs] - return "AugmentationList[{}]".format(", ".join(msgs)) - - __str__ = __repr__ - - -class AugInput: - """ - Input that can be used with :meth:`Augmentation.__call__`. - This is a standard implementation for the majority of use cases. - This class provides the standard attributes **"image", "boxes", "sem_seg"** - defined in :meth:`__init__` and they may be needed by different augmentations. - Most augmentation policies do not need attributes beyond these three. - - After applying augmentations to these attributes (using :meth:`AugInput.transform`), - the returned transforms can then be used to transform other data structures that users have. - - Examples: - :: - input = AugInput(image, boxes=boxes) - tfms = augmentation(input) - transformed_image = input.image - transformed_boxes = input.boxes - transformed_other_data = tfms.apply_other(other_data) - - An extended project that works with new data types may implement augmentation policies - that need other inputs. An algorithm may need to transform inputs in a way different - from the standard approach defined in this class. In those rare situations, users can - implement a class similar to this class, that satify the following condition: - - * The input must provide access to these data in the form of attribute access - (``getattr``). For example, if an :class:`Augmentation` to be applied needs "image" - and "sem_seg" arguments, its input must have the attribute "image" and "sem_seg". - * The input must have a ``transform(tfm: Transform) -> None`` method which - in-place transforms all its attributes. - """ - - # TODO maybe should support more builtin data types here - def __init__( - self, - image: np.ndarray, - *, - boxes: Optional[np.ndarray] = None, - sem_seg: Optional[np.ndarray] = None, - ): - """ - Args: - image (ndarray): (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or - floating point in range [0, 1] or [0, 255]. The meaning of C is up - to users. - boxes (ndarray or None): Nx4 float32 boxes in XYXY_ABS mode - sem_seg (ndarray or None): HxW uint8 semantic segmentation mask. Each element - is an integer label of pixel. - """ - _check_img_dtype(image) - self.image = image - self.boxes = boxes - self.sem_seg = sem_seg - - def transform(self, tfm: Transform) -> None: - """ - In-place transform all attributes of this class. - - By "in-place", it means after calling this method, accessing an attribute such - as ``self.image`` will return transformed data. - """ - self.image = tfm.apply_image(self.image) - if self.boxes is not None: - self.boxes = tfm.apply_box(self.boxes) - if self.sem_seg is not None: - self.sem_seg = tfm.apply_segmentation(self.sem_seg) - - def apply_augmentations( - self, augmentations: List[Union[Augmentation, Transform]] - ) -> TransformList: - """ - Equivalent of ``AugmentationList(augmentations)(self)`` - """ - return AugmentationList(augmentations)(self) - - -def apply_augmentations(augmentations: List[Union[Transform, Augmentation]], inputs): - """ - Use ``T.AugmentationList(augmentations)(inputs)`` instead. - """ - if isinstance(inputs, np.ndarray): - # handle the common case of image-only Augmentation, also for backward compatibility - image_only = True - inputs = AugInput(inputs) - else: - image_only = False - tfms = inputs.apply_augmentations(augmentations) - return inputs.image if image_only else inputs, tfms - - -apply_transform_gens = apply_augmentations -""" -Alias for backward-compatibility. -""" - -TransformGen = Augmentation -""" -Alias for Augmentation, since it is something that generates :class:`Transform`s -""" - -StandardAugInput = AugInput -""" -Alias for compatibility. It's not worth the complexity to have two classes. -""" diff --git a/detectron2/detectron2/data/transforms/augmentation_impl.py b/detectron2/detectron2/data/transforms/augmentation_impl.py deleted file mode 100644 index 7cc7b28be66cdf14bff493745c6c567da55aeb34..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/transforms/augmentation_impl.py +++ /dev/null @@ -1,736 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. -""" -Implement many useful :class:`Augmentation`. -""" -import numpy as np -import sys -from numpy import random -from typing import Tuple -import torch -from fvcore.transforms.transform import ( - BlendTransform, - CropTransform, - HFlipTransform, - NoOpTransform, - PadTransform, - Transform, - TransformList, - VFlipTransform, -) -from PIL import Image - -from detectron2.structures import Boxes, pairwise_iou - -from .augmentation import Augmentation, _transform_to_aug -from .transform import ExtentTransform, ResizeTransform, RotationTransform - -__all__ = [ - "FixedSizeCrop", - "RandomApply", - "RandomBrightness", - "RandomContrast", - "RandomCrop", - "RandomExtent", - "RandomFlip", - "RandomSaturation", - "RandomLighting", - "RandomRotation", - "Resize", - "ResizeScale", - "ResizeShortestEdge", - "RandomCrop_CategoryAreaConstraint", - "RandomResize", - "MinIoURandomCrop", -] - - -class RandomApply(Augmentation): - """ - Randomly apply an augmentation with a given probability. - """ - - def __init__(self, tfm_or_aug, prob=0.5): - """ - Args: - tfm_or_aug (Transform, Augmentation): the transform or augmentation - to be applied. It can either be a `Transform` or `Augmentation` - instance. - prob (float): probability between 0.0 and 1.0 that - the wrapper transformation is applied - """ - super().__init__() - self.aug = _transform_to_aug(tfm_or_aug) - assert 0.0 <= prob <= 1.0, f"Probablity must be between 0.0 and 1.0 (given: {prob})" - self.prob = prob - - def get_transform(self, *args): - do = self._rand_range() < self.prob - if do: - return self.aug.get_transform(*args) - else: - return NoOpTransform() - - def __call__(self, aug_input): - do = self._rand_range() < self.prob - if do: - return self.aug(aug_input) - else: - return NoOpTransform() - - -class RandomFlip(Augmentation): - """ - Flip the image horizontally or vertically with the given probability. - """ - - def __init__(self, prob=0.5, *, horizontal=True, vertical=False): - """ - Args: - prob (float): probability of flip. - horizontal (boolean): whether to apply horizontal flipping - vertical (boolean): whether to apply vertical flipping - """ - super().__init__() - - if horizontal and vertical: - raise ValueError("Cannot do both horiz and vert. Please use two Flip instead.") - if not horizontal and not vertical: - raise ValueError("At least one of horiz or vert has to be True!") - self._init(locals()) - - def get_transform(self, image): - h, w = image.shape[:2] - do = self._rand_range() < self.prob - if do: - if self.horizontal: - return HFlipTransform(w) - elif self.vertical: - return VFlipTransform(h) - else: - return NoOpTransform() - - -class Resize(Augmentation): - """Resize image to a fixed target size""" - - def __init__(self, shape, interp=Image.BILINEAR): - """ - Args: - shape: (h, w) tuple or a int - interp: PIL interpolation method - """ - if isinstance(shape, int): - shape = (shape, shape) - shape = tuple(shape) - self._init(locals()) - - def get_transform(self, image): - return ResizeTransform( - image.shape[0], image.shape[1], self.shape[0], self.shape[1], self.interp - ) - - -class ResizeShortestEdge(Augmentation): - """ - Resize the image while keeping the aspect ratio unchanged. - It attempts to scale the shorter edge to the given `short_edge_length`, - as long as the longer edge does not exceed `max_size`. - If `max_size` is reached, then downscale so that the longer edge does not exceed max_size. - """ - - @torch.jit.unused - def __init__( - self, short_edge_length, max_size=sys.maxsize, sample_style="range", interp=Image.BILINEAR - ): - """ - Args: - short_edge_length (list[int]): If ``sample_style=="range"``, - a [min, max] interval from which to sample the shortest edge length. - If ``sample_style=="choice"``, a list of shortest edge lengths to sample from. - max_size (int): maximum allowed longest edge length. - sample_style (str): either "range" or "choice". - """ - super().__init__() - assert sample_style in ["range", "choice"], sample_style - - self.is_range = sample_style == "range" - if isinstance(short_edge_length, int): - short_edge_length = (short_edge_length, short_edge_length) - if self.is_range: - assert len(short_edge_length) == 2, ( - "short_edge_length must be two values using 'range' sample style." - f" Got {short_edge_length}!" - ) - self._init(locals()) - - @torch.jit.unused - def get_transform(self, image): - h, w = image.shape[:2] - if self.is_range: - size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1) - else: - size = np.random.choice(self.short_edge_length) - if size == 0: - return NoOpTransform() - - newh, neww = ResizeShortestEdge.get_output_shape(h, w, size, self.max_size) - return ResizeTransform(h, w, newh, neww, self.interp) - - @staticmethod - def get_output_shape( - oldh: int, oldw: int, short_edge_length: int, max_size: int - ) -> Tuple[int, int]: - """ - Compute the output size given input size and target short edge length. - """ - h, w = oldh, oldw - size = short_edge_length * 1.0 - scale = size / min(h, w) - if h < w: - newh, neww = size, scale * w - else: - newh, neww = scale * h, size - if max(newh, neww) > max_size: - scale = max_size * 1.0 / max(newh, neww) - newh = newh * scale - neww = neww * scale - neww = int(neww + 0.5) - newh = int(newh + 0.5) - return (newh, neww) - - -class ResizeScale(Augmentation): - """ - Takes target size as input and randomly scales the given target size between `min_scale` - and `max_scale`. It then scales the input image such that it fits inside the scaled target - box, keeping the aspect ratio constant. - This implements the resize part of the Google's 'resize_and_crop' data augmentation: - https://github.com/tensorflow/tpu/blob/master/models/official/detection/utils/input_utils.py#L127 - """ - - def __init__( - self, - min_scale: float, - max_scale: float, - target_height: int, - target_width: int, - interp: int = Image.BILINEAR, - ): - """ - Args: - min_scale: minimum image scale range. - max_scale: maximum image scale range. - target_height: target image height. - target_width: target image width. - interp: image interpolation method. - """ - super().__init__() - self._init(locals()) - - def _get_resize(self, image: np.ndarray, scale: float) -> Transform: - input_size = image.shape[:2] - - # Compute new target size given a scale. - target_size = (self.target_height, self.target_width) - target_scale_size = np.multiply(target_size, scale) - - # Compute actual rescaling applied to input image and output size. - output_scale = np.minimum( - target_scale_size[0] / input_size[0], target_scale_size[1] / input_size[1] - ) - output_size = np.round(np.multiply(input_size, output_scale)).astype(int) - - return ResizeTransform( - input_size[0], input_size[1], int(output_size[0]), int(output_size[1]), self.interp - ) - - def get_transform(self, image: np.ndarray) -> Transform: - random_scale = np.random.uniform(self.min_scale, self.max_scale) - return self._get_resize(image, random_scale) - - -class RandomRotation(Augmentation): - """ - This method returns a copy of this image, rotated the given - number of degrees counter clockwise around the given center. - """ - - def __init__(self, angle, expand=True, center=None, sample_style="range", interp=None): - """ - Args: - angle (list[float]): If ``sample_style=="range"``, - a [min, max] interval from which to sample the angle (in degrees). - If ``sample_style=="choice"``, a list of angles to sample from - expand (bool): choose if the image should be resized to fit the whole - rotated image (default), or simply cropped - center (list[[float, float]]): If ``sample_style=="range"``, - a [[minx, miny], [maxx, maxy]] relative interval from which to sample the center, - [0, 0] being the top left of the image and [1, 1] the bottom right. - If ``sample_style=="choice"``, a list of centers to sample from - Default: None, which means that the center of rotation is the center of the image - center has no effect if expand=True because it only affects shifting - """ - super().__init__() - assert sample_style in ["range", "choice"], sample_style - self.is_range = sample_style == "range" - if isinstance(angle, (float, int)): - angle = (angle, angle) - if center is not None and isinstance(center[0], (float, int)): - center = (center, center) - self._init(locals()) - - def get_transform(self, image): - h, w = image.shape[:2] - center = None - if self.is_range: - angle = np.random.uniform(self.angle[0], self.angle[1]) - if self.center is not None: - center = ( - np.random.uniform(self.center[0][0], self.center[1][0]), - np.random.uniform(self.center[0][1], self.center[1][1]), - ) - else: - angle = np.random.choice(self.angle) - if self.center is not None: - center = np.random.choice(self.center) - - if center is not None: - center = (w * center[0], h * center[1]) # Convert to absolute coordinates - - if angle % 360 == 0: - return NoOpTransform() - - return RotationTransform(h, w, angle, expand=self.expand, center=center, interp=self.interp) - - -class FixedSizeCrop(Augmentation): - """ - If `crop_size` is smaller than the input image size, then it uses a random crop of - the crop size. If `crop_size` is larger than the input image size, then it pads - the right and the bottom of the image to the crop size if `pad` is True, otherwise - it returns the smaller image. - """ - - def __init__( - self, - crop_size: Tuple[int], - pad: bool = True, - pad_value: float = 128.0, - seg_pad_value: int = 255, - ): - """ - Args: - crop_size: target image (height, width). - pad: if True, will pad images smaller than `crop_size` up to `crop_size` - pad_value: the padding value to the image. - seg_pad_value: the padding value to the segmentation mask. - """ - super().__init__() - self._init(locals()) - - def _get_crop(self, image: np.ndarray) -> Transform: - # Compute the image scale and scaled size. - input_size = image.shape[:2] - output_size = self.crop_size - - # Add random crop if the image is scaled up. - max_offset = np.subtract(input_size, output_size) - max_offset = np.maximum(max_offset, 0) - offset = np.multiply(max_offset, np.random.uniform(0.0, 1.0)) - offset = np.round(offset).astype(int) - return CropTransform( - offset[1], offset[0], output_size[1], output_size[0], input_size[1], input_size[0] - ) - - def _get_pad(self, image: np.ndarray) -> Transform: - # Compute the image scale and scaled size. - input_size = image.shape[:2] - output_size = self.crop_size - - # Add padding if the image is scaled down. - pad_size = np.subtract(output_size, input_size) - pad_size = np.maximum(pad_size, 0) - original_size = np.minimum(input_size, output_size) - return PadTransform( - 0, - 0, - pad_size[1], - pad_size[0], - original_size[1], - original_size[0], - self.pad_value, - self.seg_pad_value, - ) - - def get_transform(self, image: np.ndarray) -> TransformList: - transforms = [self._get_crop(image)] - if self.pad: - transforms.append(self._get_pad(image)) - return TransformList(transforms) - - -class RandomCrop(Augmentation): - """ - Randomly crop a rectangle region out of an image. - """ - - def __init__(self, crop_type: str, crop_size): - """ - Args: - crop_type (str): one of "relative_range", "relative", "absolute", "absolute_range". - crop_size (tuple[float, float]): two floats, explained below. - - - "relative": crop a (H * crop_size[0], W * crop_size[1]) region from an input image of - size (H, W). crop size should be in (0, 1] - - "relative_range": uniformly sample two values from [crop_size[0], 1] - and [crop_size[1]], 1], and use them as in "relative" crop type. - - "absolute" crop a (crop_size[0], crop_size[1]) region from input image. - crop_size must be smaller than the input image size. - - "absolute_range", for an input of size (H, W), uniformly sample H_crop in - [crop_size[0], min(H, crop_size[1])] and W_crop in [crop_size[0], min(W, crop_size[1])]. - Then crop a region (H_crop, W_crop). - """ - # TODO style of relative_range and absolute_range are not consistent: - # one takes (h, w) but another takes (min, max) - super().__init__() - assert crop_type in ["relative_range", "relative", "absolute", "absolute_range"] - self._init(locals()) - - def get_transform(self, image): - h, w = image.shape[:2] - croph, cropw = self.get_crop_size((h, w)) - assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(self) - h0 = np.random.randint(h - croph + 1) - w0 = np.random.randint(w - cropw + 1) - return CropTransform(w0, h0, cropw, croph) - - def get_crop_size(self, image_size): - """ - Args: - image_size (tuple): height, width - - Returns: - crop_size (tuple): height, width in absolute pixels - """ - h, w = image_size - if self.crop_type == "relative": - ch, cw = self.crop_size - return int(h * ch + 0.5), int(w * cw + 0.5) - elif self.crop_type == "relative_range": - crop_size = np.asarray(self.crop_size, dtype=np.float32) - ch, cw = crop_size + np.random.rand(2) * (1 - crop_size) - return int(h * ch + 0.5), int(w * cw + 0.5) - elif self.crop_type == "absolute": - return (min(self.crop_size[0], h), min(self.crop_size[1], w)) - elif self.crop_type == "absolute_range": - assert self.crop_size[0] <= self.crop_size[1] - ch = np.random.randint(min(h, self.crop_size[0]), min(h, self.crop_size[1]) + 1) - cw = np.random.randint(min(w, self.crop_size[0]), min(w, self.crop_size[1]) + 1) - return ch, cw - else: - raise NotImplementedError("Unknown crop type {}".format(self.crop_type)) - - -class RandomCrop_CategoryAreaConstraint(Augmentation): - """ - Similar to :class:`RandomCrop`, but find a cropping window such that no single category - occupies a ratio of more than `single_category_max_area` in semantic segmentation ground - truth, which can cause unstability in training. The function attempts to find such a valid - cropping window for at most 10 times. - """ - - def __init__( - self, - crop_type: str, - crop_size, - single_category_max_area: float = 1.0, - ignored_category: int = None, - ): - """ - Args: - crop_type, crop_size: same as in :class:`RandomCrop` - single_category_max_area: the maximum allowed area ratio of a - category. Set to 1.0 to disable - ignored_category: allow this category in the semantic segmentation - ground truth to exceed the area ratio. Usually set to the category - that's ignored in training. - """ - self.crop_aug = RandomCrop(crop_type, crop_size) - self._init(locals()) - - def get_transform(self, image, sem_seg): - if self.single_category_max_area >= 1.0: - return self.crop_aug.get_transform(image) - else: - h, w = sem_seg.shape - for _ in range(10): - crop_size = self.crop_aug.get_crop_size((h, w)) - y0 = np.random.randint(h - crop_size[0] + 1) - x0 = np.random.randint(w - crop_size[1] + 1) - sem_seg_temp = sem_seg[y0 : y0 + crop_size[0], x0 : x0 + crop_size[1]] - labels, cnt = np.unique(sem_seg_temp, return_counts=True) - if self.ignored_category is not None: - cnt = cnt[labels != self.ignored_category] - if len(cnt) > 1 and np.max(cnt) < np.sum(cnt) * self.single_category_max_area: - break - crop_tfm = CropTransform(x0, y0, crop_size[1], crop_size[0]) - return crop_tfm - - -class RandomExtent(Augmentation): - """ - Outputs an image by cropping a random "subrect" of the source image. - - The subrect can be parameterized to include pixels outside the source image, - in which case they will be set to zeros (i.e. black). The size of the output - image will vary with the size of the random subrect. - """ - - def __init__(self, scale_range, shift_range): - """ - Args: - output_size (h, w): Dimensions of output image - scale_range (l, h): Range of input-to-output size scaling factor - shift_range (x, y): Range of shifts of the cropped subrect. The rect - is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)], - where (w, h) is the (width, height) of the input image. Set each - component to zero to crop at the image's center. - """ - super().__init__() - self._init(locals()) - - def get_transform(self, image): - img_h, img_w = image.shape[:2] - - # Initialize src_rect to fit the input image. - src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h]) - - # Apply a random scaling to the src_rect. - src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1]) - - # Apply a random shift to the coordinates origin. - src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5) - src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5) - - # Map src_rect coordinates into image coordinates (center at corner). - src_rect[0::2] += 0.5 * img_w - src_rect[1::2] += 0.5 * img_h - - return ExtentTransform( - src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]), - output_size=(int(src_rect[3] - src_rect[1]), int(src_rect[2] - src_rect[0])), - ) - - -class RandomContrast(Augmentation): - """ - Randomly transforms image contrast. - - Contrast intensity is uniformly sampled in (intensity_min, intensity_max). - - intensity < 1 will reduce contrast - - intensity = 1 will preserve the input image - - intensity > 1 will increase contrast - - See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html - """ - - def __init__(self, intensity_min, intensity_max): - """ - Args: - intensity_min (float): Minimum augmentation - intensity_max (float): Maximum augmentation - """ - super().__init__() - self._init(locals()) - - def get_transform(self, image): - w = np.random.uniform(self.intensity_min, self.intensity_max) - return BlendTransform(src_image=image.mean(), src_weight=1 - w, dst_weight=w) - - -class RandomBrightness(Augmentation): - """ - Randomly transforms image brightness. - - Brightness intensity is uniformly sampled in (intensity_min, intensity_max). - - intensity < 1 will reduce brightness - - intensity = 1 will preserve the input image - - intensity > 1 will increase brightness - - See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html - """ - - def __init__(self, intensity_min, intensity_max): - """ - Args: - intensity_min (float): Minimum augmentation - intensity_max (float): Maximum augmentation - """ - super().__init__() - self._init(locals()) - - def get_transform(self, image): - w = np.random.uniform(self.intensity_min, self.intensity_max) - return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w) - - -class RandomSaturation(Augmentation): - """ - Randomly transforms saturation of an RGB image. - Input images are assumed to have 'RGB' channel order. - - Saturation intensity is uniformly sampled in (intensity_min, intensity_max). - - intensity < 1 will reduce saturation (make the image more grayscale) - - intensity = 1 will preserve the input image - - intensity > 1 will increase saturation - - See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html - """ - - def __init__(self, intensity_min, intensity_max): - """ - Args: - intensity_min (float): Minimum augmentation (1 preserves input). - intensity_max (float): Maximum augmentation (1 preserves input). - """ - super().__init__() - self._init(locals()) - - def get_transform(self, image): - assert image.shape[-1] == 3, "RandomSaturation only works on RGB images" - w = np.random.uniform(self.intensity_min, self.intensity_max) - grayscale = image.dot([0.299, 0.587, 0.114])[:, :, np.newaxis] - return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w) - - -class RandomLighting(Augmentation): - """ - The "lighting" augmentation described in AlexNet, using fixed PCA over ImageNet. - Input images are assumed to have 'RGB' channel order. - - The degree of color jittering is randomly sampled via a normal distribution, - with standard deviation given by the scale parameter. - """ - - def __init__(self, scale): - """ - Args: - scale (float): Standard deviation of principal component weighting. - """ - super().__init__() - self._init(locals()) - self.eigen_vecs = np.array( - [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]] - ) - self.eigen_vals = np.array([0.2175, 0.0188, 0.0045]) - - def get_transform(self, image): - assert image.shape[-1] == 3, "RandomLighting only works on RGB images" - weights = np.random.normal(scale=self.scale, size=3) - return BlendTransform( - src_image=self.eigen_vecs.dot(weights * self.eigen_vals), src_weight=1.0, dst_weight=1.0 - ) - - -class RandomResize(Augmentation): - """Randomly resize image to a target size in shape_list""" - - def __init__(self, shape_list, interp=Image.BILINEAR): - """ - Args: - shape_list: a list of shapes in (h, w) - interp: PIL interpolation method - """ - self.shape_list = shape_list - self._init(locals()) - - def get_transform(self, image): - shape_idx = np.random.randint(low=0, high=len(self.shape_list)) - h, w = self.shape_list[shape_idx] - return ResizeTransform(image.shape[0], image.shape[1], h, w, self.interp) - - -class MinIoURandomCrop(Augmentation): - """Random crop the image & bboxes, the cropped patches have minimum IoU - requirement with original image & bboxes, the IoU threshold is randomly - selected from min_ious. - - Args: - min_ious (tuple): minimum IoU threshold for all intersections with - bounding boxes - min_crop_size (float): minimum crop's size (i.e. h,w := a*h, a*w, - where a >= min_crop_size) - mode_trials: number of trials for sampling min_ious threshold - crop_trials: number of trials for sampling crop_size after cropping - """ - - def __init__( - self, - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), - min_crop_size=0.3, - mode_trials=1000, - crop_trials=50, - ): - self.min_ious = min_ious - self.sample_mode = (1, *min_ious, 0) - self.min_crop_size = min_crop_size - self.mode_trials = mode_trials - self.crop_trials = crop_trials - - def get_transform(self, image, boxes): - """Call function to crop images and bounding boxes with minimum IoU - constraint. - - Args: - boxes: ground truth boxes in (x1, y1, x2, y2) format - """ - if boxes is None: - return NoOpTransform() - h, w, c = image.shape - for _ in range(self.mode_trials): - mode = random.choice(self.sample_mode) - self.mode = mode - if mode == 1: - return NoOpTransform() - - min_iou = mode - for _ in range(self.crop_trials): - new_w = random.uniform(self.min_crop_size * w, w) - new_h = random.uniform(self.min_crop_size * h, h) - - # h / w in [0.5, 2] - if new_h / new_w < 0.5 or new_h / new_w > 2: - continue - - left = random.uniform(w - new_w) - top = random.uniform(h - new_h) - - patch = np.array((int(left), int(top), int(left + new_w), int(top + new_h))) - # Line or point crop is not allowed - if patch[2] == patch[0] or patch[3] == patch[1]: - continue - overlaps = pairwise_iou( - Boxes(patch.reshape(-1, 4)), Boxes(boxes.reshape(-1, 4)) - ).reshape(-1) - if len(overlaps) > 0 and overlaps.min() < min_iou: - continue - - # center of boxes should inside the crop img - # only adjust boxes and instance masks when the gt is not empty - if len(overlaps) > 0: - # adjust boxes - def is_center_of_bboxes_in_patch(boxes, patch): - center = (boxes[:, :2] + boxes[:, 2:]) / 2 - mask = ( - (center[:, 0] > patch[0]) - * (center[:, 1] > patch[1]) - * (center[:, 0] < patch[2]) - * (center[:, 1] < patch[3]) - ) - return mask - - mask = is_center_of_bboxes_in_patch(boxes, patch) - if not mask.any(): - continue - return CropTransform(int(left), int(top), int(new_w), int(new_h)) diff --git a/detectron2/detectron2/data/transforms/transform.py b/detectron2/detectron2/data/transforms/transform.py deleted file mode 100644 index 46769a2569ffc6223a95990f8db5973757e7d23f..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/data/transforms/transform.py +++ /dev/null @@ -1,351 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -See "Data Augmentation" tutorial for an overview of the system: -https://detectron2.readthedocs.io/tutorials/augmentation.html -""" - -import numpy as np -import torch -import torch.nn.functional as F -from fvcore.transforms.transform import ( - CropTransform, - HFlipTransform, - NoOpTransform, - Transform, - TransformList, -) -from PIL import Image - -try: - import cv2 # noqa -except ImportError: - # OpenCV is an optional dependency at the moment - pass - -__all__ = [ - "ExtentTransform", - "ResizeTransform", - "RotationTransform", - "ColorTransform", - "PILColorTransform", -] - - -class ExtentTransform(Transform): - """ - Extracts a subregion from the source image and scales it to the output size. - - The fill color is used to map pixels from the source rect that fall outside - the source image. - - See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform - """ - - def __init__(self, src_rect, output_size, interp=Image.BILINEAR, fill=0): - """ - Args: - src_rect (x0, y0, x1, y1): src coordinates - output_size (h, w): dst image size - interp: PIL interpolation methods - fill: Fill color used when src_rect extends outside image - """ - super().__init__() - self._set_attributes(locals()) - - def apply_image(self, img, interp=None): - h, w = self.output_size - if len(img.shape) > 2 and img.shape[2] == 1: - pil_image = Image.fromarray(img[:, :, 0], mode="L") - else: - pil_image = Image.fromarray(img) - pil_image = pil_image.transform( - size=(w, h), - method=Image.EXTENT, - data=self.src_rect, - resample=interp if interp else self.interp, - fill=self.fill, - ) - ret = np.asarray(pil_image) - if len(img.shape) > 2 and img.shape[2] == 1: - ret = np.expand_dims(ret, -1) - return ret - - def apply_coords(self, coords): - # Transform image center from source coordinates into output coordinates - # and then map the new origin to the corner of the output image. - h, w = self.output_size - x0, y0, x1, y1 = self.src_rect - new_coords = coords.astype(np.float32) - new_coords[:, 0] -= 0.5 * (x0 + x1) - new_coords[:, 1] -= 0.5 * (y0 + y1) - new_coords[:, 0] *= w / (x1 - x0) - new_coords[:, 1] *= h / (y1 - y0) - new_coords[:, 0] += 0.5 * w - new_coords[:, 1] += 0.5 * h - return new_coords - - def apply_segmentation(self, segmentation): - segmentation = self.apply_image(segmentation, interp=Image.NEAREST) - return segmentation - - -class ResizeTransform(Transform): - """ - Resize the image to a target size. - """ - - def __init__(self, h, w, new_h, new_w, interp=None): - """ - Args: - h, w (int): original image size - new_h, new_w (int): new image size - interp: PIL interpolation methods, defaults to bilinear. - """ - # TODO decide on PIL vs opencv - super().__init__() - if interp is None: - interp = Image.BILINEAR - self._set_attributes(locals()) - - def apply_image(self, img, interp=None): - assert img.shape[:2] == (self.h, self.w) - assert len(img.shape) <= 4 - interp_method = interp if interp is not None else self.interp - - if img.dtype == np.uint8: - if len(img.shape) > 2 and img.shape[2] == 1: - pil_image = Image.fromarray(img[:, :, 0], mode="L") - else: - pil_image = Image.fromarray(img) - pil_image = pil_image.resize((self.new_w, self.new_h), interp_method) - ret = np.asarray(pil_image) - if len(img.shape) > 2 and img.shape[2] == 1: - ret = np.expand_dims(ret, -1) - else: - # PIL only supports uint8 - if any(x < 0 for x in img.strides): - img = np.ascontiguousarray(img) - img = torch.from_numpy(img) - shape = list(img.shape) - shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:] - img = img.view(shape_4d).permute(2, 3, 0, 1) # hw(c) -> nchw - _PIL_RESIZE_TO_INTERPOLATE_MODE = { - Image.NEAREST: "nearest", - Image.BILINEAR: "bilinear", - Image.BICUBIC: "bicubic", - } - mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method] - align_corners = None if mode == "nearest" else False - img = F.interpolate( - img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners - ) - shape[:2] = (self.new_h, self.new_w) - ret = img.permute(2, 3, 0, 1).view(shape).numpy() # nchw -> hw(c) - - return ret - - def apply_coords(self, coords): - coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w) - coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h) - return coords - - def apply_segmentation(self, segmentation): - segmentation = self.apply_image(segmentation, interp=Image.NEAREST) - return segmentation - - def inverse(self): - return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp) - - -class RotationTransform(Transform): - """ - This method returns a copy of this image, rotated the given - number of degrees counter clockwise around its center. - """ - - def __init__(self, h, w, angle, expand=True, center=None, interp=None): - """ - Args: - h, w (int): original image size - angle (float): degrees for rotation - expand (bool): choose if the image should be resized to fit the whole - rotated image (default), or simply cropped - center (tuple (width, height)): coordinates of the rotation center - if left to None, the center will be fit to the center of each image - center has no effect if expand=True because it only affects shifting - interp: cv2 interpolation method, default cv2.INTER_LINEAR - """ - super().__init__() - image_center = np.array((w / 2, h / 2)) - if center is None: - center = image_center - if interp is None: - interp = cv2.INTER_LINEAR - abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle)))) - if expand: - # find the new width and height bounds - bound_w, bound_h = np.rint( - [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin] - ).astype(int) - else: - bound_w, bound_h = w, h - - self._set_attributes(locals()) - self.rm_coords = self.create_rotation_matrix() - # Needed because of this problem https://github.com/opencv/opencv/issues/11784 - self.rm_image = self.create_rotation_matrix(offset=-0.5) - - def apply_image(self, img, interp=None): - """ - img should be a numpy array, formatted as Height * Width * Nchannels - """ - if len(img) == 0 or self.angle % 360 == 0: - return img - assert img.shape[:2] == (self.h, self.w) - interp = interp if interp is not None else self.interp - return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp) - - def apply_coords(self, coords): - """ - coords should be a N * 2 array-like, containing N couples of (x, y) points - """ - coords = np.asarray(coords, dtype=float) - if len(coords) == 0 or self.angle % 360 == 0: - return coords - return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :] - - def apply_segmentation(self, segmentation): - segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST) - return segmentation - - def create_rotation_matrix(self, offset=0): - center = (self.center[0] + offset, self.center[1] + offset) - rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1) - if self.expand: - # Find the coordinates of the center of rotation in the new image - # The only point for which we know the future coordinates is the center of the image - rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :] - new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center - # shift the rotation center to the new coordinates - rm[:, 2] += new_center - return rm - - def inverse(self): - """ - The inverse is to rotate it back with expand, and crop to get the original shape. - """ - if not self.expand: # Not possible to inverse if a part of the image is lost - raise NotImplementedError() - rotation = RotationTransform( - self.bound_h, self.bound_w, -self.angle, True, None, self.interp - ) - crop = CropTransform( - (rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h - ) - return TransformList([rotation, crop]) - - -class ColorTransform(Transform): - """ - Generic wrapper for any photometric transforms. - These transformations should only affect the color space and - not the coordinate space of the image (e.g. annotation - coordinates such as bounding boxes should not be changed) - """ - - def __init__(self, op): - """ - Args: - op (Callable): operation to be applied to the image, - which takes in an ndarray and returns an ndarray. - """ - if not callable(op): - raise ValueError("op parameter should be callable") - super().__init__() - self._set_attributes(locals()) - - def apply_image(self, img): - return self.op(img) - - def apply_coords(self, coords): - return coords - - def inverse(self): - return NoOpTransform() - - def apply_segmentation(self, segmentation): - return segmentation - - -class PILColorTransform(ColorTransform): - """ - Generic wrapper for PIL Photometric image transforms, - which affect the color space and not the coordinate - space of the image - """ - - def __init__(self, op): - """ - Args: - op (Callable): operation to be applied to the image, - which takes in a PIL Image and returns a transformed - PIL Image. - For reference on possible operations see: - - https://pillow.readthedocs.io/en/stable/ - """ - if not callable(op): - raise ValueError("op parameter should be callable") - super().__init__(op) - - def apply_image(self, img): - img = Image.fromarray(img) - return np.asarray(super().apply_image(img)) - - -def HFlip_rotated_box(transform, rotated_boxes): - """ - Apply the horizontal flip transform on rotated boxes. - - Args: - rotated_boxes (ndarray): Nx5 floating point array of - (x_center, y_center, width, height, angle_degrees) format - in absolute coordinates. - """ - # Transform x_center - rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0] - # Transform angle - rotated_boxes[:, 4] = -rotated_boxes[:, 4] - return rotated_boxes - - -def Resize_rotated_box(transform, rotated_boxes): - """ - Apply the resizing transform on rotated boxes. For details of how these (approximation) - formulas are derived, please refer to :meth:`RotatedBoxes.scale`. - - Args: - rotated_boxes (ndarray): Nx5 floating point array of - (x_center, y_center, width, height, angle_degrees) format - in absolute coordinates. - """ - scale_factor_x = transform.new_w * 1.0 / transform.w - scale_factor_y = transform.new_h * 1.0 / transform.h - rotated_boxes[:, 0] *= scale_factor_x - rotated_boxes[:, 1] *= scale_factor_y - theta = rotated_boxes[:, 4] * np.pi / 180.0 - c = np.cos(theta) - s = np.sin(theta) - rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s)) - rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c)) - rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi - - return rotated_boxes - - -HFlipTransform.register_type("rotated_box", HFlip_rotated_box) -ResizeTransform.register_type("rotated_box", Resize_rotated_box) - -# not necessary any more with latest fvcore -NoOpTransform.register_type("rotated_box", lambda t, x: x) diff --git a/detectron2/detectron2/engine/__init__.py b/detectron2/detectron2/engine/__init__.py deleted file mode 100644 index e6e4d673dedd10419b612755cfcb9744fc4999f8..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/engine/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -from .launch import * -from .train_loop import * - -__all__ = [k for k in globals().keys() if not k.startswith("_")] - - -# prefer to let hooks and defaults live in separate namespaces (therefore not in __all__) -# but still make them available here -from .hooks import * -from .defaults import ( - create_ddp_model, - default_argument_parser, - default_setup, - default_writers, - DefaultPredictor, - DefaultTrainer, -) diff --git a/detectron2/detectron2/engine/defaults.py b/detectron2/detectron2/engine/defaults.py deleted file mode 100644 index 3dbcd86b753ab93185f03ac1b6b3c918da553e7b..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/engine/defaults.py +++ /dev/null @@ -1,751 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -This file contains components with some default boilerplate logic user may need -in training / testing. They will not work for everyone, but many users may find them useful. - -The behavior of functions/classes in this file is subject to change, -since they are meant to represent the "common default behavior" people need in their projects. -""" - -import argparse -import logging -import os -import sys -import weakref -from collections import OrderedDict -from typing import Optional -import torch -from fvcore.nn.precise_bn import get_bn_modules -from omegaconf import OmegaConf -from torch.nn.parallel import DistributedDataParallel - -import detectron2.data.transforms as T -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import CfgNode, LazyConfig -from detectron2.data import ( - MetadataCatalog, - build_detection_test_loader, - build_detection_train_loader, -) -from detectron2.evaluation import ( - DatasetEvaluator, - inference_on_dataset, - print_csv_format, - verify_results, -) -from detectron2.modeling import build_model -from detectron2.solver import build_lr_scheduler, build_optimizer -from detectron2.utils import comm -from detectron2.utils.collect_env import collect_env_info -from detectron2.utils.env import seed_all_rng -from detectron2.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter -from detectron2.utils.file_io import PathManager -from detectron2.utils.logger import setup_logger - -from . import hooks -from .train_loop import AMPTrainer, SimpleTrainer, TrainerBase - -__all__ = [ - "create_ddp_model", - "default_argument_parser", - "default_setup", - "default_writers", - "DefaultPredictor", - "DefaultTrainer", -] - - -def create_ddp_model(model, *, fp16_compression=False, **kwargs): - """ - Create a DistributedDataParallel model if there are >1 processes. - - Args: - model: a torch.nn.Module - fp16_compression: add fp16 compression hooks to the ddp object. - See more at https://pytorch.org/docs/stable/ddp_comm_hooks.html#torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook - kwargs: other arguments of :module:`torch.nn.parallel.DistributedDataParallel`. - """ # noqa - if comm.get_world_size() == 1: - return model - if "device_ids" not in kwargs: - kwargs["device_ids"] = [comm.get_local_rank()] - ddp = DistributedDataParallel(model, **kwargs) - if fp16_compression: - from torch.distributed.algorithms.ddp_comm_hooks import default as comm_hooks - - ddp.register_comm_hook(state=None, hook=comm_hooks.fp16_compress_hook) - return ddp - - -def default_argument_parser(epilog=None): - """ - Create a parser with some common arguments used by detectron2 users. - - Args: - epilog (str): epilog passed to ArgumentParser describing the usage. - - Returns: - argparse.ArgumentParser: - """ - parser = argparse.ArgumentParser( - epilog=epilog - or f""" -Examples: - -Run on single machine: - $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml - -Change some config options: - $ {sys.argv[0]} --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001 - -Run on multiple machines: - (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url [--other-flags] - (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url [--other-flags] -""", - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file") - parser.add_argument( - "--resume", - action="store_true", - help="Whether to attempt to resume from the checkpoint directory. " - "See documentation of `DefaultTrainer.resume_or_load()` for what it means.", - ) - parser.add_argument("--eval-only", action="store_true", help="perform evaluation only") - parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*") - parser.add_argument("--num-machines", type=int, default=1, help="total number of machines") - parser.add_argument( - "--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)" - ) - - # PyTorch still may leave orphan processes in multi-gpu training. - # Therefore we use a deterministic way to obtain port, - # so that users are aware of orphan processes by seeing the port occupied. - port = 2**15 + 2**14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2**14 - parser.add_argument( - "--dist-url", - default="tcp://127.0.0.1:{}".format(port), - help="initialization URL for pytorch distributed backend. See " - "https://pytorch.org/docs/stable/distributed.html for details.", - ) - parser.add_argument( - "opts", - help=""" -Modify config options at the end of the command. For Yacs configs, use -space-separated "PATH.KEY VALUE" pairs. -For python-based LazyConfig, use "path.key=value". - """.strip(), - default=None, - nargs=argparse.REMAINDER, - ) - return parser - - -def _try_get_key(cfg, *keys, default=None): - """ - Try select keys from cfg until the first key that exists. Otherwise return default. - """ - if isinstance(cfg, CfgNode): - cfg = OmegaConf.create(cfg.dump()) - for k in keys: - none = object() - p = OmegaConf.select(cfg, k, default=none) - if p is not none: - return p - return default - - -def _highlight(code, filename): - try: - import pygments - except ImportError: - return code - - from pygments.lexers import Python3Lexer, YamlLexer - from pygments.formatters import Terminal256Formatter - - lexer = Python3Lexer() if filename.endswith(".py") else YamlLexer() - code = pygments.highlight(code, lexer, Terminal256Formatter(style="monokai")) - return code - - -# adapted from: -# https://github.com/pytorch/tnt/blob/ebda066f8f55af6a906807d35bc829686618074d/torchtnt/utils/device.py#L328-L346 -def _set_float32_precision(precision: str = "high") -> None: - """Sets the precision of float32 matrix multiplications and convolution operations. - - For more information, see the PyTorch docs: - - https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html - - https://pytorch.org/docs/stable/backends.html#torch.backends.cudnn.allow_tf32 - - Args: - precision: The setting to determine which datatypes to use for matrix - multiplication and convolution operations. - """ - if not (torch.cuda.is_available()): # Not relevant for non-CUDA devices - return - # set precision for matrix multiplications - torch.set_float32_matmul_precision(precision) - # set precision for convolution operations - if precision == "highest": - torch.backends.cudnn.allow_tf32 = False - else: - torch.backends.cudnn.allow_tf32 = True - - -def default_setup(cfg, args): - """ - Perform some basic common setups at the beginning of a job, including: - - 1. Set up the detectron2 logger - 2. Log basic information about environment, cmdline arguments, and config - 3. Backup the config to the output directory - - Args: - cfg (CfgNode or omegaconf.DictConfig): the full config to be used - args (argparse.NameSpace): the command line arguments to be logged - """ - output_dir = _try_get_key(cfg, "OUTPUT_DIR", "output_dir", "train.output_dir") - if comm.is_main_process() and output_dir: - PathManager.mkdirs(output_dir) - - rank = comm.get_rank() - setup_logger(output_dir, distributed_rank=rank, name="fvcore") - logger = setup_logger(output_dir, distributed_rank=rank) - - logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size())) - logger.info("Environment info:\n" + collect_env_info()) - - logger.info("Command line arguments: " + str(args)) - if hasattr(args, "config_file") and args.config_file != "": - logger.info( - "Contents of args.config_file={}:\n{}".format( - args.config_file, - _highlight(PathManager.open(args.config_file, "r").read(), args.config_file), - ) - ) - - if comm.is_main_process() and output_dir: - # Note: some of our scripts may expect the existence of - # config.yaml in output directory - path = os.path.join(output_dir, "config.yaml") - if isinstance(cfg, CfgNode): - logger.info("Running with full config:\n{}".format(_highlight(cfg.dump(), ".yaml"))) - with PathManager.open(path, "w") as f: - f.write(cfg.dump()) - else: - LazyConfig.save(cfg, path) - logger.info("Full config saved to {}".format(path)) - - # make sure each worker has a different, yet deterministic seed if specified - seed = _try_get_key(cfg, "SEED", "train.seed", default=-1) - seed_all_rng(None if seed < 0 else seed + rank) - - # cudnn benchmark has large overhead. It shouldn't be used considering the small size of - # typical validation set. - if not (hasattr(args, "eval_only") and args.eval_only): - torch.backends.cudnn.benchmark = _try_get_key( - cfg, "CUDNN_BENCHMARK", "train.cudnn_benchmark", default=False - ) - - fp32_precision = _try_get_key(cfg, "FLOAT32_PRECISION", "train.float32_precision", default="") - if fp32_precision != "": - logger.info(f"Set fp32 precision to {fp32_precision}") - _set_float32_precision(fp32_precision) - logger.info(f"{torch.get_float32_matmul_precision()=}") - logger.info(f"{torch.backends.cuda.matmul.allow_tf32=}") - logger.info(f"{torch.backends.cudnn.allow_tf32=}") - - -def default_writers(output_dir: str, max_iter: Optional[int] = None): - """ - Build a list of :class:`EventWriter` to be used. - It now consists of a :class:`CommonMetricPrinter`, - :class:`TensorboardXWriter` and :class:`JSONWriter`. - - Args: - output_dir: directory to store JSON metrics and tensorboard events - max_iter: the total number of iterations - - Returns: - list[EventWriter]: a list of :class:`EventWriter` objects. - """ - PathManager.mkdirs(output_dir) - return [ - # It may not always print what you want to see, since it prints "common" metrics only. - CommonMetricPrinter(max_iter), - JSONWriter(os.path.join(output_dir, "metrics.json")), - TensorboardXWriter(output_dir), - ] - - -class DefaultPredictor: - """ - Create a simple end-to-end predictor with the given config that runs on - single device for a single input image. - - Compared to using the model directly, this class does the following additions: - - 1. Load checkpoint from `cfg.MODEL.WEIGHTS`. - 2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`. - 3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`. - 4. Take one input image and produce a single output, instead of a batch. - - This is meant for simple demo purposes, so it does the above steps automatically. - This is not meant for benchmarks or running complicated inference logic. - If you'd like to do anything more complicated, please refer to its source code as - examples to build and use the model manually. - - Attributes: - metadata (Metadata): the metadata of the underlying dataset, obtained from - cfg.DATASETS.TEST. - - Examples: - :: - pred = DefaultPredictor(cfg) - inputs = cv2.imread("input.jpg") - outputs = pred(inputs) - """ - - def __init__(self, cfg): - self.cfg = cfg.clone() # cfg can be modified by model - self.model = build_model(self.cfg) - self.model.eval() - if len(cfg.DATASETS.TEST): - self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) - - checkpointer = DetectionCheckpointer(self.model) - checkpointer.load(cfg.MODEL.WEIGHTS) - - self.aug = T.ResizeShortestEdge( - [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST - ) - - self.input_format = cfg.INPUT.FORMAT - assert self.input_format in ["RGB", "BGR"], self.input_format - - def __call__(self, original_image): - """ - Args: - original_image (np.ndarray): an image of shape (H, W, C) (in BGR order). - - Returns: - predictions (dict): - the output of the model for one image only. - See :doc:`/tutorials/models` for details about the format. - """ - with torch.no_grad(): # https://github.com/sphinx-doc/sphinx/issues/4258 - # Apply pre-processing to image. - if self.input_format == "RGB": - # whether the model expects BGR inputs or RGB - original_image = original_image[:, :, ::-1] - height, width = original_image.shape[:2] - image = self.aug.get_transform(original_image).apply_image(original_image) - image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) - image.to(self.cfg.MODEL.DEVICE) - - inputs = {"image": image, "height": height, "width": width} - - predictions = self.model([inputs])[0] - return predictions - - -class DefaultTrainer(TrainerBase): - """ - A trainer with default training logic. It does the following: - - 1. Create a :class:`SimpleTrainer` using model, optimizer, dataloader - defined by the given config. Create a LR scheduler defined by the config. - 2. Load the last checkpoint or `cfg.MODEL.WEIGHTS`, if exists, when - `resume_or_load` is called. - 3. Register a few common hooks defined by the config. - - It is created to simplify the **standard model training workflow** and reduce code boilerplate - for users who only need the standard training workflow, with standard features. - It means this class makes *many assumptions* about your training logic that - may easily become invalid in a new research. In fact, any assumptions beyond those made in the - :class:`SimpleTrainer` are too much for research. - - The code of this class has been annotated about restrictive assumptions it makes. - When they do not work for you, you're encouraged to: - - 1. Overwrite methods of this class, OR: - 2. Use :class:`SimpleTrainer`, which only does minimal SGD training and - nothing else. You can then add your own hooks if needed. OR: - 3. Write your own training loop similar to `tools/plain_train_net.py`. - - See the :doc:`/tutorials/training` tutorials for more details. - - Note that the behavior of this class, like other functions/classes in - this file, is not stable, since it is meant to represent the "common default behavior". - It is only guaranteed to work well with the standard models and training workflow in detectron2. - To obtain more stable behavior, write your own training logic with other public APIs. - - Examples: - :: - trainer = DefaultTrainer(cfg) - trainer.resume_or_load() # load last checkpoint or MODEL.WEIGHTS - trainer.train() - - Attributes: - scheduler: - checkpointer (DetectionCheckpointer): - cfg (CfgNode): - """ - - def __init__(self, cfg): - """ - Args: - cfg (CfgNode): - """ - super().__init__() - logger = logging.getLogger("detectron2") - if not logger.isEnabledFor(logging.INFO): # setup_logger is not called for d2 - setup_logger() - cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size()) - - # Assume these objects must be constructed in this order. - model = self.build_model(cfg) - optimizer = self.build_optimizer(cfg, model) - data_loader = self.build_train_loader(cfg) - - model = create_ddp_model(model, broadcast_buffers=False) - self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)( - model, data_loader, optimizer - ) - - self.scheduler = self.build_lr_scheduler(cfg, optimizer) - self.checkpointer = DetectionCheckpointer( - # Assume you want to save checkpoints together with logs/statistics - model, - cfg.OUTPUT_DIR, - trainer=weakref.proxy(self), - ) - self.start_iter = 0 - self.max_iter = cfg.SOLVER.MAX_ITER - self.cfg = cfg - - self.register_hooks(self.build_hooks()) - - def resume_or_load(self, resume=True): - """ - If `resume==True` and `cfg.OUTPUT_DIR` contains the last checkpoint (defined by - a `last_checkpoint` file), resume from the file. Resuming means loading all - available states (eg. optimizer and scheduler) and update iteration counter - from the checkpoint. ``cfg.MODEL.WEIGHTS`` will not be used. - - Otherwise, this is considered as an independent training. The method will load model - weights from the file `cfg.MODEL.WEIGHTS` (but will not load other states) and start - from iteration 0. - - Args: - resume (bool): whether to do resume or not - """ - self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume) - if resume and self.checkpointer.has_checkpoint(): - # The checkpoint stores the training iteration that just finished, thus we start - # at the next iteration - self.start_iter = self.iter + 1 - - def build_hooks(self): - """ - Build a list of default hooks, including timing, evaluation, - checkpointing, lr scheduling, precise BN, writing events. - - Returns: - list[HookBase]: - """ - cfg = self.cfg.clone() - cfg.defrost() - cfg.DATALOADER.NUM_WORKERS = 0 # save some memory and time for PreciseBN - - ret = [ - hooks.IterationTimer(), - hooks.LRScheduler(), - ( - hooks.PreciseBN( - # Run at the same freq as (but before) evaluation. - cfg.TEST.EVAL_PERIOD, - self.model, - # Build a new data loader to not affect training - self.build_train_loader(cfg), - cfg.TEST.PRECISE_BN.NUM_ITER, - ) - if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model) - else None - ), - ] - - # Do PreciseBN before checkpointer, because it updates the model and need to - # be saved by checkpointer. - # This is not always the best: if checkpointing has a different frequency, - # some checkpoints may have more precise statistics than others. - if comm.is_main_process(): - ret.append(hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD)) - - def test_and_save_results(): - self._last_eval_results = self.test(self.cfg, self.model) - return self._last_eval_results - - # Do evaluation after checkpointer, because then if it fails, - # we can use the saved checkpoint to debug. - ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results)) - - if comm.is_main_process(): - # Here the default print/log frequency of each writer is used. - # run writers in the end, so that evaluation metrics are written - ret.append(hooks.PeriodicWriter(self.build_writers(), period=20)) - return ret - - def build_writers(self): - """ - Build a list of writers to be used using :func:`default_writers()`. - If you'd like a different list of writers, you can overwrite it in - your trainer. - - Returns: - list[EventWriter]: a list of :class:`EventWriter` objects. - """ - return default_writers(self.cfg.OUTPUT_DIR, self.max_iter) - - def train(self): - """ - Run training. - - Returns: - OrderedDict of results, if evaluation is enabled. Otherwise None. - """ - super().train(self.start_iter, self.max_iter) - if len(self.cfg.TEST.EXPECTED_RESULTS) and comm.is_main_process(): - assert hasattr( - self, "_last_eval_results" - ), "No evaluation results obtained during training!" - verify_results(self.cfg, self._last_eval_results) - return self._last_eval_results - - def run_step(self): - self._trainer.iter = self.iter - self._trainer.run_step() - - def state_dict(self): - ret = super().state_dict() - ret["_trainer"] = self._trainer.state_dict() - return ret - - def load_state_dict(self, state_dict): - super().load_state_dict(state_dict) - self._trainer.load_state_dict(state_dict["_trainer"]) - - @classmethod - def build_model(cls, cfg): - """ - Returns: - torch.nn.Module: - - It now calls :func:`detectron2.modeling.build_model`. - Overwrite it if you'd like a different model. - """ - model = build_model(cfg) - logger = logging.getLogger(__name__) - logger.info("Model:\n{}".format(model)) - return model - - @classmethod - def build_optimizer(cls, cfg, model): - """ - Returns: - torch.optim.Optimizer: - - It now calls :func:`detectron2.solver.build_optimizer`. - Overwrite it if you'd like a different optimizer. - """ - return build_optimizer(cfg, model) - - @classmethod - def build_lr_scheduler(cls, cfg, optimizer): - """ - It now calls :func:`detectron2.solver.build_lr_scheduler`. - Overwrite it if you'd like a different scheduler. - """ - return build_lr_scheduler(cfg, optimizer) - - @classmethod - def build_train_loader(cls, cfg): - """ - Returns: - iterable - - It now calls :func:`detectron2.data.build_detection_train_loader`. - Overwrite it if you'd like a different data loader. - """ - return build_detection_train_loader(cfg) - - @classmethod - def build_test_loader(cls, cfg, dataset_name): - """ - Returns: - iterable - - It now calls :func:`detectron2.data.build_detection_test_loader`. - Overwrite it if you'd like a different data loader. - """ - return build_detection_test_loader(cfg, dataset_name) - - @classmethod - def build_evaluator(cls, cfg, dataset_name): - """ - Returns: - DatasetEvaluator or None - - It is not implemented by default. - """ - raise NotImplementedError( - """ -If you want DefaultTrainer to automatically run evaluation, -please implement `build_evaluator()` in subclasses (see train_net.py for example). -Alternatively, you can call evaluation functions yourself (see Colab balloon tutorial for example). -""" - ) - - @classmethod - def test(cls, cfg, model, evaluators=None): - """ - Evaluate the given model. The given model is expected to already contain - weights to evaluate. - - Args: - cfg (CfgNode): - model (nn.Module): - evaluators (list[DatasetEvaluator] or None): if None, will call - :meth:`build_evaluator`. Otherwise, must have the same length as - ``cfg.DATASETS.TEST``. - - Returns: - dict: a dict of result metrics - """ - logger = logging.getLogger(__name__) - if isinstance(evaluators, DatasetEvaluator): - evaluators = [evaluators] - if evaluators is not None: - assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format( - len(cfg.DATASETS.TEST), len(evaluators) - ) - - results = OrderedDict() - for idx, dataset_name in enumerate(cfg.DATASETS.TEST): - data_loader = cls.build_test_loader(cfg, dataset_name) - # When evaluators are passed in as arguments, - # implicitly assume that evaluators can be created before data_loader. - if evaluators is not None: - evaluator = evaluators[idx] - else: - try: - evaluator = cls.build_evaluator(cfg, dataset_name) - except NotImplementedError: - logger.warn( - "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, " - "or implement its `build_evaluator` method." - ) - results[dataset_name] = {} - continue - results_i = inference_on_dataset(model, data_loader, evaluator) - results[dataset_name] = results_i - if comm.is_main_process(): - assert isinstance( - results_i, dict - ), "Evaluator must return a dict on the main process. Got {} instead.".format( - results_i - ) - logger.info("Evaluation results for {} in csv format:".format(dataset_name)) - print_csv_format(results_i) - - if len(results) == 1: - results = list(results.values())[0] - return results - - @staticmethod - def auto_scale_workers(cfg, num_workers: int): - """ - When the config is defined for certain number of workers (according to - ``cfg.SOLVER.REFERENCE_WORLD_SIZE``) that's different from the number of - workers currently in use, returns a new cfg where the total batch size - is scaled so that the per-GPU batch size stays the same as the - original ``IMS_PER_BATCH // REFERENCE_WORLD_SIZE``. - - Other config options are also scaled accordingly: - * training steps and warmup steps are scaled inverse proportionally. - * learning rate are scaled proportionally, following :paper:`ImageNet in 1h`. - - For example, with the original config like the following: - - .. code-block:: yaml - - IMS_PER_BATCH: 16 - BASE_LR: 0.1 - REFERENCE_WORLD_SIZE: 8 - MAX_ITER: 5000 - STEPS: (4000,) - CHECKPOINT_PERIOD: 1000 - - When this config is used on 16 GPUs instead of the reference number 8, - calling this method will return a new config with: - - .. code-block:: yaml - - IMS_PER_BATCH: 32 - BASE_LR: 0.2 - REFERENCE_WORLD_SIZE: 16 - MAX_ITER: 2500 - STEPS: (2000,) - CHECKPOINT_PERIOD: 500 - - Note that both the original config and this new config can be trained on 16 GPUs. - It's up to user whether to enable this feature (by setting ``REFERENCE_WORLD_SIZE``). - - Returns: - CfgNode: a new config. Same as original if ``cfg.SOLVER.REFERENCE_WORLD_SIZE==0``. - """ - old_world_size = cfg.SOLVER.REFERENCE_WORLD_SIZE - if old_world_size == 0 or old_world_size == num_workers: - return cfg - cfg = cfg.clone() - frozen = cfg.is_frozen() - cfg.defrost() - - assert ( - cfg.SOLVER.IMS_PER_BATCH % old_world_size == 0 - ), "Invalid REFERENCE_WORLD_SIZE in config!" - scale = num_workers / old_world_size - bs = cfg.SOLVER.IMS_PER_BATCH = int(round(cfg.SOLVER.IMS_PER_BATCH * scale)) - lr = cfg.SOLVER.BASE_LR = cfg.SOLVER.BASE_LR * scale - max_iter = cfg.SOLVER.MAX_ITER = int(round(cfg.SOLVER.MAX_ITER / scale)) - warmup_iter = cfg.SOLVER.WARMUP_ITERS = int(round(cfg.SOLVER.WARMUP_ITERS / scale)) - cfg.SOLVER.STEPS = tuple(int(round(s / scale)) for s in cfg.SOLVER.STEPS) - cfg.TEST.EVAL_PERIOD = int(round(cfg.TEST.EVAL_PERIOD / scale)) - cfg.SOLVER.CHECKPOINT_PERIOD = int(round(cfg.SOLVER.CHECKPOINT_PERIOD / scale)) - cfg.SOLVER.REFERENCE_WORLD_SIZE = num_workers # maintain invariant - logger = logging.getLogger(__name__) - logger.info( - f"Auto-scaling the config to batch_size={bs}, learning_rate={lr}, " - f"max_iter={max_iter}, warmup={warmup_iter}." - ) - - if frozen: - cfg.freeze() - return cfg - - -# Access basic attributes from the underlying trainer -for _attr in ["model", "data_loader", "optimizer"]: - setattr( - DefaultTrainer, - _attr, - property( - # getter - lambda self, x=_attr: getattr(self._trainer, x), - # setter - lambda self, value, x=_attr: setattr(self._trainer, x, value), - ), - ) diff --git a/detectron2/detectron2/engine/hooks.py b/detectron2/detectron2/engine/hooks.py deleted file mode 100644 index fc37af0fd3a276eb389f7667be113b41ca53f012..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/engine/hooks.py +++ /dev/null @@ -1,690 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import datetime -import itertools -import logging -import math -import operator -import os -import tempfile -import time -import warnings -from collections import Counter -import torch -from fvcore.common.checkpoint import Checkpointer -from fvcore.common.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer -from fvcore.common.param_scheduler import ParamScheduler -from fvcore.common.timer import Timer -from fvcore.nn.precise_bn import get_bn_modules, update_bn_stats - -import detectron2.utils.comm as comm -from detectron2.evaluation.testing import flatten_results_dict -from detectron2.solver import LRMultiplier -from detectron2.solver import LRScheduler as _LRScheduler -from detectron2.utils.events import EventStorage, EventWriter -from detectron2.utils.file_io import PathManager - -from .train_loop import HookBase - -__all__ = [ - "CallbackHook", - "IterationTimer", - "PeriodicWriter", - "PeriodicCheckpointer", - "BestCheckpointer", - "LRScheduler", - "AutogradProfiler", - "EvalHook", - "PreciseBN", - "TorchProfiler", - "TorchMemoryStats", -] - - -""" -Implement some common hooks. -""" - - -class CallbackHook(HookBase): - """ - Create a hook using callback functions provided by the user. - """ - - def __init__(self, *, before_train=None, after_train=None, before_step=None, after_step=None): - """ - Each argument is a function that takes one argument: the trainer. - """ - self._before_train = before_train - self._before_step = before_step - self._after_step = after_step - self._after_train = after_train - - def before_train(self): - if self._before_train: - self._before_train(self.trainer) - - def after_train(self): - if self._after_train: - self._after_train(self.trainer) - # The functions may be closures that hold reference to the trainer - # Therefore, delete them to avoid circular reference. - del self._before_train, self._after_train - del self._before_step, self._after_step - - def before_step(self): - if self._before_step: - self._before_step(self.trainer) - - def after_step(self): - if self._after_step: - self._after_step(self.trainer) - - -class IterationTimer(HookBase): - """ - Track the time spent for each iteration (each run_step call in the trainer). - Print a summary in the end of training. - - This hook uses the time between the call to its :meth:`before_step` - and :meth:`after_step` methods. - Under the convention that :meth:`before_step` of all hooks should only - take negligible amount of time, the :class:`IterationTimer` hook should be - placed at the beginning of the list of hooks to obtain accurate timing. - """ - - def __init__(self, warmup_iter=3): - """ - Args: - warmup_iter (int): the number of iterations at the beginning to exclude - from timing. - """ - self._warmup_iter = warmup_iter - self._step_timer = Timer() - self._start_time = time.perf_counter() - self._total_timer = Timer() - - def before_train(self): - self._start_time = time.perf_counter() - self._total_timer.reset() - self._total_timer.pause() - - def after_train(self): - logger = logging.getLogger(__name__) - total_time = time.perf_counter() - self._start_time - total_time_minus_hooks = self._total_timer.seconds() - hook_time = total_time - total_time_minus_hooks - - num_iter = self.trainer.storage.iter + 1 - self.trainer.start_iter - self._warmup_iter - - if num_iter > 0 and total_time_minus_hooks > 0: - # Speed is meaningful only after warmup - # NOTE this format is parsed by grep in some scripts - logger.info( - "Overall training speed: {} iterations in {} ({:.4f} s / it)".format( - num_iter, - str(datetime.timedelta(seconds=int(total_time_minus_hooks))), - total_time_minus_hooks / num_iter, - ) - ) - - logger.info( - "Total training time: {} ({} on hooks)".format( - str(datetime.timedelta(seconds=int(total_time))), - str(datetime.timedelta(seconds=int(hook_time))), - ) - ) - - def before_step(self): - self._step_timer.reset() - self._total_timer.resume() - - def after_step(self): - # +1 because we're in after_step, the current step is done - # but not yet counted - iter_done = self.trainer.storage.iter - self.trainer.start_iter + 1 - if iter_done >= self._warmup_iter: - sec = self._step_timer.seconds() - self.trainer.storage.put_scalars(time=sec) - else: - self._start_time = time.perf_counter() - self._total_timer.reset() - - self._total_timer.pause() - - -class PeriodicWriter(HookBase): - """ - Write events to EventStorage (by calling ``writer.write()``) periodically. - - It is executed every ``period`` iterations and after the last iteration. - Note that ``period`` does not affect how data is smoothed by each writer. - """ - - def __init__(self, writers, period=20): - """ - Args: - writers (list[EventWriter]): a list of EventWriter objects - period (int): - """ - self._writers = writers - for w in writers: - assert isinstance(w, EventWriter), w - self._period = period - - def after_step(self): - if (self.trainer.iter + 1) % self._period == 0 or ( - self.trainer.iter == self.trainer.max_iter - 1 - ): - for writer in self._writers: - writer.write() - - def after_train(self): - for writer in self._writers: - # If any new data is found (e.g. produced by other after_train), - # write them before closing - writer.write() - writer.close() - - -class PeriodicCheckpointer(_PeriodicCheckpointer, HookBase): - """ - Same as :class:`detectron2.checkpoint.PeriodicCheckpointer`, but as a hook. - - Note that when used as a hook, - it is unable to save additional data other than what's defined - by the given `checkpointer`. - - It is executed every ``period`` iterations and after the last iteration. - """ - - def before_train(self): - self.max_iter = self.trainer.max_iter - - def after_step(self): - # No way to use **kwargs - self.step(self.trainer.iter) - - -class BestCheckpointer(HookBase): - """ - Checkpoints best weights based off given metric. - - This hook should be used in conjunction to and executed after the hook - that produces the metric, e.g. `EvalHook`. - """ - - def __init__( - self, - eval_period: int, - checkpointer: Checkpointer, - val_metric: str, - mode: str = "max", - file_prefix: str = "model_best", - ) -> None: - """ - Args: - eval_period (int): the period `EvalHook` is set to run. - checkpointer: the checkpointer object used to save checkpoints. - val_metric (str): validation metric to track for best checkpoint, e.g. "bbox/AP50" - mode (str): one of {'max', 'min'}. controls whether the chosen val metric should be - maximized or minimized, e.g. for "bbox/AP50" it should be "max" - file_prefix (str): the prefix of checkpoint's filename, defaults to "model_best" - """ - self._logger = logging.getLogger(__name__) - self._period = eval_period - self._val_metric = val_metric - assert mode in [ - "max", - "min", - ], f'Mode "{mode}" to `BestCheckpointer` is unknown. It should be one of {"max", "min"}.' - if mode == "max": - self._compare = operator.gt - else: - self._compare = operator.lt - self._checkpointer = checkpointer - self._file_prefix = file_prefix - self.best_metric = None - self.best_iter = None - - def _update_best(self, val, iteration): - if math.isnan(val) or math.isinf(val): - return False - self.best_metric = val - self.best_iter = iteration - return True - - def _best_checking(self): - metric_tuple = self.trainer.storage.latest().get(self._val_metric) - if metric_tuple is None: - self._logger.warning( - f"Given val metric {self._val_metric} does not seem to be computed/stored." - "Will not be checkpointing based on it." - ) - return - else: - latest_metric, metric_iter = metric_tuple - - if self.best_metric is None: - if self._update_best(latest_metric, metric_iter): - additional_state = {"iteration": metric_iter} - self._checkpointer.save(f"{self._file_prefix}", **additional_state) - self._logger.info( - f"Saved first model at {self.best_metric:0.5f} @ {self.best_iter} steps" - ) - elif self._compare(latest_metric, self.best_metric): - additional_state = {"iteration": metric_iter} - self._checkpointer.save(f"{self._file_prefix}", **additional_state) - self._logger.info( - f"Saved best model as latest eval score for {self._val_metric} is " - f"{latest_metric:0.5f}, better than last best score " - f"{self.best_metric:0.5f} @ iteration {self.best_iter}." - ) - self._update_best(latest_metric, metric_iter) - else: - self._logger.info( - f"Not saving as latest eval score for {self._val_metric} is {latest_metric:0.5f}, " - f"not better than best score {self.best_metric:0.5f} @ iteration {self.best_iter}." - ) - - def after_step(self): - # same conditions as `EvalHook` - next_iter = self.trainer.iter + 1 - if ( - self._period > 0 - and next_iter % self._period == 0 - and next_iter != self.trainer.max_iter - ): - self._best_checking() - - def after_train(self): - # same conditions as `EvalHook` - if self.trainer.iter + 1 >= self.trainer.max_iter: - self._best_checking() - - -class LRScheduler(HookBase): - """ - A hook which executes a torch builtin LR scheduler and summarizes the LR. - It is executed after every iteration. - """ - - def __init__(self, optimizer=None, scheduler=None): - """ - Args: - optimizer (torch.optim.Optimizer): - scheduler (torch.optim.LRScheduler or fvcore.common.param_scheduler.ParamScheduler): - if a :class:`ParamScheduler` object, it defines the multiplier over the base LR - in the optimizer. - - If any argument is not given, will try to obtain it from the trainer. - """ - self._optimizer = optimizer - self._scheduler = scheduler - - def before_train(self): - self._optimizer = self._optimizer or self.trainer.optimizer - if isinstance(self.scheduler, ParamScheduler): - self._scheduler = LRMultiplier( - self._optimizer, - self.scheduler, - self.trainer.max_iter, - last_iter=self.trainer.iter - 1, - ) - self._best_param_group_id = LRScheduler.get_best_param_group_id(self._optimizer) - - @staticmethod - def get_best_param_group_id(optimizer): - # NOTE: some heuristics on what LR to summarize - # summarize the param group with most parameters - largest_group = max(len(g["params"]) for g in optimizer.param_groups) - - if largest_group == 1: - # If all groups have one parameter, - # then find the most common initial LR, and use it for summary - lr_count = Counter([g["lr"] for g in optimizer.param_groups]) - lr = lr_count.most_common()[0][0] - for i, g in enumerate(optimizer.param_groups): - if g["lr"] == lr: - return i - else: - for i, g in enumerate(optimizer.param_groups): - if len(g["params"]) == largest_group: - return i - - def after_step(self): - lr = self._optimizer.param_groups[self._best_param_group_id]["lr"] - self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False) - self.scheduler.step() - - @property - def scheduler(self): - return self._scheduler or self.trainer.scheduler - - def state_dict(self): - if isinstance(self.scheduler, _LRScheduler): - return self.scheduler.state_dict() - return {} - - def load_state_dict(self, state_dict): - if isinstance(self.scheduler, _LRScheduler): - logger = logging.getLogger(__name__) - logger.info("Loading scheduler from state_dict ...") - self.scheduler.load_state_dict(state_dict) - - -class TorchProfiler(HookBase): - """ - A hook which runs `torch.profiler.profile`. - - Examples: - :: - hooks.TorchProfiler( - lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR - ) - - The above example will run the profiler for iteration 10~20 and dump - results to ``OUTPUT_DIR``. We did not profile the first few iterations - because they are typically slower than the rest. - The result files can be loaded in the ``chrome://tracing`` page in chrome browser, - and the tensorboard visualizations can be visualized using - ``tensorboard --logdir OUTPUT_DIR/log`` - """ - - def __init__(self, enable_predicate, output_dir, *, activities=None, save_tensorboard=True): - """ - Args: - enable_predicate (callable[trainer -> bool]): a function which takes a trainer, - and returns whether to enable the profiler. - It will be called once every step, and can be used to select which steps to profile. - output_dir (str): the output directory to dump tracing files. - activities (iterable): same as in `torch.profiler.profile`. - save_tensorboard (bool): whether to save tensorboard visualizations at (output_dir)/log/ - """ - self._enable_predicate = enable_predicate - self._activities = activities - self._output_dir = output_dir - self._save_tensorboard = save_tensorboard - - def before_step(self): - if self._enable_predicate(self.trainer): - if self._save_tensorboard: - on_trace_ready = torch.profiler.tensorboard_trace_handler( - os.path.join( - self._output_dir, - "log", - "profiler-tensorboard-iter{}".format(self.trainer.iter), - ), - f"worker{comm.get_rank()}", - ) - else: - on_trace_ready = None - self._profiler = torch.profiler.profile( - activities=self._activities, - on_trace_ready=on_trace_ready, - record_shapes=True, - profile_memory=True, - with_stack=True, - with_flops=True, - ) - self._profiler.__enter__() - else: - self._profiler = None - - def after_step(self): - if self._profiler is None: - return - self._profiler.__exit__(None, None, None) - if not self._save_tensorboard: - PathManager.mkdirs(self._output_dir) - out_file = os.path.join( - self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter) - ) - if "://" not in out_file: - self._profiler.export_chrome_trace(out_file) - else: - # Support non-posix filesystems - with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d: - tmp_file = os.path.join(d, "tmp.json") - self._profiler.export_chrome_trace(tmp_file) - with open(tmp_file) as f: - content = f.read() - with PathManager.open(out_file, "w") as f: - f.write(content) - - -class AutogradProfiler(TorchProfiler): - """ - A hook which runs `torch.autograd.profiler.profile`. - - Examples: - :: - hooks.AutogradProfiler( - lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR - ) - - The above example will run the profiler for iteration 10~20 and dump - results to ``OUTPUT_DIR``. We did not profile the first few iterations - because they are typically slower than the rest. - The result files can be loaded in the ``chrome://tracing`` page in chrome browser. - - Note: - When used together with NCCL on older version of GPUs, - autograd profiler may cause deadlock because it unnecessarily allocates - memory on every device it sees. The memory management calls, if - interleaved with NCCL calls, lead to deadlock on GPUs that do not - support ``cudaLaunchCooperativeKernelMultiDevice``. - """ - - def __init__(self, enable_predicate, output_dir, *, use_cuda=True): - """ - Args: - enable_predicate (callable[trainer -> bool]): a function which takes a trainer, - and returns whether to enable the profiler. - It will be called once every step, and can be used to select which steps to profile. - output_dir (str): the output directory to dump tracing files. - use_cuda (bool): same as in `torch.autograd.profiler.profile`. - """ - warnings.warn("AutogradProfiler has been deprecated in favor of TorchProfiler.") - self._enable_predicate = enable_predicate - self._use_cuda = use_cuda - self._output_dir = output_dir - - def before_step(self): - if self._enable_predicate(self.trainer): - self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda) - self._profiler.__enter__() - else: - self._profiler = None - - -class EvalHook(HookBase): - """ - Run an evaluation function periodically, and at the end of training. - - It is executed every ``eval_period`` iterations and after the last iteration. - """ - - def __init__(self, eval_period, eval_function, eval_after_train=True): - """ - Args: - eval_period (int): the period to run `eval_function`. Set to 0 to - not evaluate periodically (but still evaluate after the last iteration - if `eval_after_train` is True). - eval_function (callable): a function which takes no arguments, and - returns a nested dict of evaluation metrics. - eval_after_train (bool): whether to evaluate after the last iteration - - Note: - This hook must be enabled in all or none workers. - If you would like only certain workers to perform evaluation, - give other workers a no-op function (`eval_function=lambda: None`). - """ - self._period = eval_period - self._func = eval_function - self._eval_after_train = eval_after_train - - def _do_eval(self): - results = self._func() - - if results: - assert isinstance( - results, dict - ), "Eval function must return a dict. Got {} instead.".format(results) - - flattened_results = flatten_results_dict(results) - for k, v in flattened_results.items(): - try: - v = float(v) - except Exception as e: - raise ValueError( - "[EvalHook] eval_function should return a nested dict of float. " - "Got '{}: {}' instead.".format(k, v) - ) from e - self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False) - - # Evaluation may take different time among workers. - # A barrier make them start the next iteration together. - comm.synchronize() - - def after_step(self): - next_iter = self.trainer.iter + 1 - if self._period > 0 and next_iter % self._period == 0: - # do the last eval in after_train - if next_iter != self.trainer.max_iter: - self._do_eval() - - def after_train(self): - # This condition is to prevent the eval from running after a failed training - if self._eval_after_train and self.trainer.iter + 1 >= self.trainer.max_iter: - self._do_eval() - # func is likely a closure that holds reference to the trainer - # therefore we clean it to avoid circular reference in the end - del self._func - - -class PreciseBN(HookBase): - """ - The standard implementation of BatchNorm uses EMA in inference, which is - sometimes suboptimal. - This class computes the true average of statistics rather than the moving average, - and put true averages to every BN layer in the given model. - - It is executed every ``period`` iterations and after the last iteration. - """ - - def __init__(self, period, model, data_loader, num_iter): - """ - Args: - period (int): the period this hook is run, or 0 to not run during training. - The hook will always run in the end of training. - model (nn.Module): a module whose all BN layers in training mode will be - updated by precise BN. - Note that user is responsible for ensuring the BN layers to be - updated are in training mode when this hook is triggered. - data_loader (iterable): it will produce data to be run by `model(data)`. - num_iter (int): number of iterations used to compute the precise - statistics. - """ - self._logger = logging.getLogger(__name__) - if len(get_bn_modules(model)) == 0: - self._logger.info( - "PreciseBN is disabled because model does not contain BN layers in training mode." - ) - self._disabled = True - return - - self._model = model - self._data_loader = data_loader - self._num_iter = num_iter - self._period = period - self._disabled = False - - self._data_iter = None - - def after_step(self): - next_iter = self.trainer.iter + 1 - is_final = next_iter == self.trainer.max_iter - if is_final or (self._period > 0 and next_iter % self._period == 0): - self.update_stats() - - def update_stats(self): - """ - Update the model with precise statistics. Users can manually call this method. - """ - if self._disabled: - return - - if self._data_iter is None: - self._data_iter = iter(self._data_loader) - - def data_loader(): - for num_iter in itertools.count(1): - if num_iter % 100 == 0: - self._logger.info( - "Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter) - ) - # This way we can reuse the same iterator - yield next(self._data_iter) - - with EventStorage(): # capture events in a new storage to discard them - self._logger.info( - "Running precise-BN for {} iterations... ".format(self._num_iter) - + "Note that this could produce different statistics every time." - ) - update_bn_stats(self._model, data_loader(), self._num_iter) - - -class TorchMemoryStats(HookBase): - """ - Writes pytorch's cuda memory statistics periodically. - """ - - def __init__(self, period=20, max_runs=10): - """ - Args: - period (int): Output stats each 'period' iterations - max_runs (int): Stop the logging after 'max_runs' - """ - - self._logger = logging.getLogger(__name__) - self._period = period - self._max_runs = max_runs - self._runs = 0 - - def after_step(self): - if self._runs > self._max_runs: - return - - if (self.trainer.iter + 1) % self._period == 0 or ( - self.trainer.iter == self.trainer.max_iter - 1 - ): - if torch.cuda.is_available(): - max_reserved_mb = torch.cuda.max_memory_reserved() / 1024.0 / 1024.0 - reserved_mb = torch.cuda.memory_reserved() / 1024.0 / 1024.0 - max_allocated_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 - allocated_mb = torch.cuda.memory_allocated() / 1024.0 / 1024.0 - - self._logger.info( - ( - " iter: {} " - " max_reserved_mem: {:.0f}MB " - " reserved_mem: {:.0f}MB " - " max_allocated_mem: {:.0f}MB " - " allocated_mem: {:.0f}MB " - ).format( - self.trainer.iter, - max_reserved_mb, - reserved_mb, - max_allocated_mb, - allocated_mb, - ) - ) - - self._runs += 1 - if self._runs == self._max_runs: - mem_summary = torch.cuda.memory_summary() - self._logger.info("\n" + mem_summary) - - torch.cuda.reset_peak_memory_stats() diff --git a/detectron2/detectron2/engine/launch.py b/detectron2/detectron2/engine/launch.py deleted file mode 100644 index 7052c5040e4d9e6553a1b371518cb53fb056524e..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/engine/launch.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -from datetime import timedelta -import torch -import torch.distributed as dist -import torch.multiprocessing as mp - -from detectron2.utils import comm - -__all__ = ["DEFAULT_TIMEOUT", "launch"] - -DEFAULT_TIMEOUT = timedelta(minutes=30) - - -def _find_free_port(): - import socket - - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - # Binding to port 0 will cause the OS to find an available port for us - sock.bind(("", 0)) - port = sock.getsockname()[1] - sock.close() - # NOTE: there is still a chance the port could be taken by other processes. - return port - - -def launch( - main_func, - # Should be num_processes_per_machine, but kept for compatibility. - num_gpus_per_machine, - num_machines=1, - machine_rank=0, - dist_url=None, - args=(), - timeout=DEFAULT_TIMEOUT, -): - """ - Launch multi-process or distributed training. - This function must be called on all machines involved in the training. - It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine. - - Args: - main_func: a function that will be called by `main_func(*args)` - num_gpus_per_machine (int): number of processes per machine. When - using GPUs, this should be the number of GPUs. - num_machines (int): the total number of machines - machine_rank (int): the rank of this machine - dist_url (str): url to connect to for distributed jobs, including protocol - e.g. "tcp://127.0.0.1:8686". - Can be set to "auto" to automatically select a free port on localhost - timeout (timedelta): timeout of the distributed workers - args (tuple): arguments passed to main_func - """ - world_size = num_machines * num_gpus_per_machine - if world_size > 1: - # https://github.com/pytorch/pytorch/pull/14391 - # TODO prctl in spawned processes - - if dist_url == "auto": - assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs." - port = _find_free_port() - dist_url = f"tcp://127.0.0.1:{port}" - if num_machines > 1 and dist_url.startswith("file://"): - logger = logging.getLogger(__name__) - logger.warning( - "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://" - ) - - mp.start_processes( - _distributed_worker, - nprocs=num_gpus_per_machine, - args=( - main_func, - world_size, - num_gpus_per_machine, - machine_rank, - dist_url, - args, - timeout, - ), - daemon=False, - ) - else: - main_func(*args) - - -def _distributed_worker( - local_rank, - main_func, - world_size, - num_gpus_per_machine, - machine_rank, - dist_url, - args, - timeout=DEFAULT_TIMEOUT, -): - has_gpu = torch.cuda.is_available() - if has_gpu: - assert num_gpus_per_machine <= torch.cuda.device_count() - global_rank = machine_rank * num_gpus_per_machine + local_rank - try: - dist.init_process_group( - backend="NCCL" if has_gpu else "GLOO", - init_method=dist_url, - world_size=world_size, - rank=global_rank, - timeout=timeout, - ) - except Exception as e: - logger = logging.getLogger(__name__) - logger.error("Process group URL: {}".format(dist_url)) - raise e - - # Setup the local process group. - comm.create_local_process_group(num_gpus_per_machine) - if has_gpu: - torch.cuda.set_device(local_rank) - - # synchronize is needed here to prevent a possible timeout after calling init_process_group - # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 - comm.synchronize() - - main_func(*args) diff --git a/detectron2/detectron2/engine/train_loop.py b/detectron2/detectron2/engine/train_loop.py deleted file mode 100644 index 738a69de946ae7741e2e16d322592076b3d1014d..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/engine/train_loop.py +++ /dev/null @@ -1,530 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. -import concurrent.futures -import logging -import numpy as np -import time -import weakref -from typing import List, Mapping, Optional -import torch -from torch.nn.parallel import DataParallel, DistributedDataParallel - -import detectron2.utils.comm as comm -from detectron2.utils.events import EventStorage, get_event_storage -from detectron2.utils.logger import _log_api_usage - -__all__ = ["HookBase", "TrainerBase", "SimpleTrainer", "AMPTrainer"] - - -class HookBase: - """ - Base class for hooks that can be registered with :class:`TrainerBase`. - - Each hook can implement 4 methods. The way they are called is demonstrated - in the following snippet: - :: - hook.before_train() - for iter in range(start_iter, max_iter): - hook.before_step() - trainer.run_step() - hook.after_step() - iter += 1 - hook.after_train() - - Notes: - 1. In the hook method, users can access ``self.trainer`` to access more - properties about the context (e.g., model, current iteration, or config - if using :class:`DefaultTrainer`). - - 2. A hook that does something in :meth:`before_step` can often be - implemented equivalently in :meth:`after_step`. - If the hook takes non-trivial time, it is strongly recommended to - implement the hook in :meth:`after_step` instead of :meth:`before_step`. - The convention is that :meth:`before_step` should only take negligible time. - - Following this convention will allow hooks that do care about the difference - between :meth:`before_step` and :meth:`after_step` (e.g., timer) to - function properly. - - """ - - trainer: "TrainerBase" = None - """ - A weak reference to the trainer object. Set by the trainer when the hook is registered. - """ - - def before_train(self): - """ - Called before the first iteration. - """ - pass - - def after_train(self): - """ - Called after the last iteration. - """ - pass - - def before_step(self): - """ - Called before each iteration. - """ - pass - - def after_backward(self): - """ - Called after the backward pass of each iteration. - """ - pass - - def after_step(self): - """ - Called after each iteration. - """ - pass - - def state_dict(self): - """ - Hooks are stateless by default, but can be made checkpointable by - implementing `state_dict` and `load_state_dict`. - """ - return {} - - -class TrainerBase: - """ - Base class for iterative trainer with hooks. - - The only assumption we made here is: the training runs in a loop. - A subclass can implement what the loop is. - We made no assumptions about the existence of dataloader, optimizer, model, etc. - - Attributes: - iter(int): the current iteration. - - start_iter(int): The iteration to start with. - By convention the minimum possible value is 0. - - max_iter(int): The iteration to end training. - - storage(EventStorage): An EventStorage that's opened during the course of training. - """ - - def __init__(self) -> None: - self._hooks: List[HookBase] = [] - self.iter: int = 0 - self.start_iter: int = 0 - self.max_iter: int - self.storage: EventStorage - _log_api_usage("trainer." + self.__class__.__name__) - - def register_hooks(self, hooks: List[Optional[HookBase]]) -> None: - """ - Register hooks to the trainer. The hooks are executed in the order - they are registered. - - Args: - hooks (list[Optional[HookBase]]): list of hooks - """ - hooks = [h for h in hooks if h is not None] - for h in hooks: - assert isinstance(h, HookBase) - # To avoid circular reference, hooks and trainer cannot own each other. - # This normally does not matter, but will cause memory leak if the - # involved objects contain __del__: - # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/ - h.trainer = weakref.proxy(self) - self._hooks.extend(hooks) - - def train(self, start_iter: int, max_iter: int): - """ - Args: - start_iter, max_iter (int): See docs above - """ - logger = logging.getLogger(__name__) - logger.info("Starting training from iteration {}".format(start_iter)) - - self.iter = self.start_iter = start_iter - self.max_iter = max_iter - - with EventStorage(start_iter) as self.storage: - try: - self.before_train() - for self.iter in range(start_iter, max_iter): - self.before_step() - self.run_step() - self.after_step() - # self.iter == max_iter can be used by `after_train` to - # tell whether the training successfully finished or failed - # due to exceptions. - self.iter += 1 - except Exception: - logger.exception("Exception during training:") - raise - finally: - self.after_train() - - def before_train(self): - for h in self._hooks: - h.before_train() - - def after_train(self): - self.storage.iter = self.iter - for h in self._hooks: - h.after_train() - - def before_step(self): - # Maintain the invariant that storage.iter == trainer.iter - # for the entire execution of each step - self.storage.iter = self.iter - - for h in self._hooks: - h.before_step() - - def after_backward(self): - for h in self._hooks: - h.after_backward() - - def after_step(self): - for h in self._hooks: - h.after_step() - - def run_step(self): - raise NotImplementedError - - def state_dict(self): - ret = {"iteration": self.iter} - hooks_state = {} - for h in self._hooks: - sd = h.state_dict() - if sd: - name = type(h).__qualname__ - if name in hooks_state: - # TODO handle repetitive stateful hooks - continue - hooks_state[name] = sd - if hooks_state: - ret["hooks"] = hooks_state - return ret - - def load_state_dict(self, state_dict): - logger = logging.getLogger(__name__) - self.iter = state_dict["iteration"] - for key, value in state_dict.get("hooks", {}).items(): - for h in self._hooks: - try: - name = type(h).__qualname__ - except AttributeError: - continue - if name == key: - h.load_state_dict(value) - break - else: - logger.warning(f"Cannot find the hook '{key}', its state_dict is ignored.") - - -class SimpleTrainer(TrainerBase): - """ - A simple trainer for the most common type of task: - single-cost single-optimizer single-data-source iterative optimization, - optionally using data-parallelism. - It assumes that every step, you: - - 1. Compute the loss with a data from the data_loader. - 2. Compute the gradients with the above loss. - 3. Update the model with the optimizer. - - All other tasks during training (checkpointing, logging, evaluation, LR schedule) - are maintained by hooks, which can be registered by :meth:`TrainerBase.register_hooks`. - - If you want to do anything fancier than this, - either subclass TrainerBase and implement your own `run_step`, - or write your own training loop. - """ - - def __init__( - self, - model, - data_loader, - optimizer, - gather_metric_period=1, - zero_grad_before_forward=False, - async_write_metrics=False, - ): - """ - Args: - model: a torch Module. Takes a data from data_loader and returns a - dict of losses. - data_loader: an iterable. Contains data to be used to call model. - optimizer: a torch optimizer. - gather_metric_period: an int. Every gather_metric_period iterations - the metrics are gathered from all the ranks to rank 0 and logged. - zero_grad_before_forward: whether to zero the gradients before the forward. - async_write_metrics: bool. If True, then write metrics asynchronously to improve - training speed - """ - super().__init__() - - """ - We set the model to training mode in the trainer. - However it's valid to train a model that's in eval mode. - If you want your model (or a submodule of it) to behave - like evaluation during training, you can overwrite its train() method. - """ - model.train() - - self.model = model - self.data_loader = data_loader - # to access the data loader iterator, call `self._data_loader_iter` - self._data_loader_iter_obj = None - self.optimizer = optimizer - self.gather_metric_period = gather_metric_period - self.zero_grad_before_forward = zero_grad_before_forward - self.async_write_metrics = async_write_metrics - # create a thread pool that can execute non critical logic in run_step asynchronically - # use only 1 worker so tasks will be executred in order of submitting. - self.concurrent_executor = concurrent.futures.ThreadPoolExecutor(max_workers=1) - - def run_step(self): - """ - Implement the standard training logic described above. - """ - assert self.model.training, "[SimpleTrainer] model was changed to eval mode!" - start = time.perf_counter() - """ - If you want to do something with the data, you can wrap the dataloader. - """ - data = next(self._data_loader_iter) - data_time = time.perf_counter() - start - - if self.zero_grad_before_forward: - """ - If you need to accumulate gradients or do something similar, you can - wrap the optimizer with your custom `zero_grad()` method. - """ - self.optimizer.zero_grad() - - """ - If you want to do something with the losses, you can wrap the model. - """ - loss_dict = self.model(data) - if isinstance(loss_dict, torch.Tensor): - losses = loss_dict - loss_dict = {"total_loss": loss_dict} - else: - losses = sum(loss_dict.values()) - if not self.zero_grad_before_forward: - """ - If you need to accumulate gradients or do something similar, you can - wrap the optimizer with your custom `zero_grad()` method. - """ - self.optimizer.zero_grad() - losses.backward() - - self.after_backward() - - if self.async_write_metrics: - # write metrics asynchronically - self.concurrent_executor.submit( - self._write_metrics, loss_dict, data_time, iter=self.iter - ) - else: - self._write_metrics(loss_dict, data_time) - - """ - If you need gradient clipping/scaling or other processing, you can - wrap the optimizer with your custom `step()` method. But it is - suboptimal as explained in https://arxiv.org/abs/2006.15704 Sec 3.2.4 - """ - self.optimizer.step() - - @property - def _data_loader_iter(self): - # only create the data loader iterator when it is used - if self._data_loader_iter_obj is None: - self._data_loader_iter_obj = iter(self.data_loader) - return self._data_loader_iter_obj - - def reset_data_loader(self, data_loader_builder): - """ - Delete and replace the current data loader with a new one, which will be created - by calling `data_loader_builder` (without argument). - """ - del self.data_loader - data_loader = data_loader_builder() - self.data_loader = data_loader - self._data_loader_iter_obj = None - - def _write_metrics( - self, - loss_dict: Mapping[str, torch.Tensor], - data_time: float, - prefix: str = "", - iter: Optional[int] = None, - ) -> None: - logger = logging.getLogger(__name__) - - iter = self.iter if iter is None else iter - if (iter + 1) % self.gather_metric_period == 0: - try: - SimpleTrainer.write_metrics(loss_dict, data_time, iter, prefix) - except Exception: - logger.exception("Exception in writing metrics: ") - raise - - @staticmethod - def write_metrics( - loss_dict: Mapping[str, torch.Tensor], - data_time: float, - cur_iter: int, - prefix: str = "", - ) -> None: - """ - Args: - loss_dict (dict): dict of scalar losses - data_time (float): time taken by the dataloader iteration - prefix (str): prefix for logging keys - """ - metrics_dict = {k: v.detach().cpu().item() for k, v in loss_dict.items()} - metrics_dict["data_time"] = data_time - - storage = get_event_storage() - # Keep track of data time per rank - storage.put_scalar("rank_data_time", data_time, cur_iter=cur_iter) - - # Gather metrics among all workers for logging - # This assumes we do DDP-style training, which is currently the only - # supported method in detectron2. - all_metrics_dict = comm.gather(metrics_dict) - - if comm.is_main_process(): - # data_time among workers can have high variance. The actual latency - # caused by data_time is the maximum among workers. - data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) - storage.put_scalar("data_time", data_time, cur_iter=cur_iter) - - # average the rest metrics - metrics_dict = { - k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() - } - total_losses_reduced = sum(metrics_dict.values()) - if not np.isfinite(total_losses_reduced): - raise FloatingPointError( - f"Loss became infinite or NaN at iteration={cur_iter}!\n" - f"loss_dict = {metrics_dict}" - ) - - storage.put_scalar( - "{}total_loss".format(prefix), total_losses_reduced, cur_iter=cur_iter - ) - if len(metrics_dict) > 1: - storage.put_scalars(cur_iter=cur_iter, **metrics_dict) - - def state_dict(self): - ret = super().state_dict() - ret["optimizer"] = self.optimizer.state_dict() - return ret - - def load_state_dict(self, state_dict): - super().load_state_dict(state_dict) - self.optimizer.load_state_dict(state_dict["optimizer"]) - - def after_train(self): - super().after_train() - self.concurrent_executor.shutdown(wait=True) - - -class AMPTrainer(SimpleTrainer): - """ - Like :class:`SimpleTrainer`, but uses PyTorch's native automatic mixed precision - in the training loop. - """ - - def __init__( - self, - model, - data_loader, - optimizer, - gather_metric_period=1, - zero_grad_before_forward=False, - grad_scaler=None, - precision: torch.dtype = torch.float16, - log_grad_scaler: bool = False, - async_write_metrics=False, - ): - """ - Args: - model, data_loader, optimizer, gather_metric_period, zero_grad_before_forward, - async_write_metrics: same as in :class:`SimpleTrainer`. - grad_scaler: torch GradScaler to automatically scale gradients. - precision: torch.dtype as the target precision to cast to in computations - """ - unsupported = "AMPTrainer does not support single-process multi-device training!" - if isinstance(model, DistributedDataParallel): - assert not (model.device_ids and len(model.device_ids) > 1), unsupported - assert not isinstance(model, DataParallel), unsupported - - super().__init__( - model, data_loader, optimizer, gather_metric_period, zero_grad_before_forward - ) - - if grad_scaler is None: - from torch.cuda.amp import GradScaler - - grad_scaler = GradScaler() - self.grad_scaler = grad_scaler - self.precision = precision - self.log_grad_scaler = log_grad_scaler - - def run_step(self): - """ - Implement the AMP training logic. - """ - assert self.model.training, "[AMPTrainer] model was changed to eval mode!" - assert torch.cuda.is_available(), "[AMPTrainer] CUDA is required for AMP training!" - from torch.cuda.amp import autocast - - start = time.perf_counter() - data = next(self._data_loader_iter) - data_time = time.perf_counter() - start - - if self.zero_grad_before_forward: - self.optimizer.zero_grad() - with autocast(dtype=self.precision): - loss_dict = self.model(data) - if isinstance(loss_dict, torch.Tensor): - losses = loss_dict - loss_dict = {"total_loss": loss_dict} - else: - losses = sum(loss_dict.values()) - - if not self.zero_grad_before_forward: - self.optimizer.zero_grad() - - self.grad_scaler.scale(losses).backward() - - if self.log_grad_scaler: - storage = get_event_storage() - storage.put_scalar("[metric]grad_scaler", self.grad_scaler.get_scale()) - - self.after_backward() - - if self.async_write_metrics: - # write metrics asynchronically - self.concurrent_executor.submit( - self._write_metrics, loss_dict, data_time, iter=self.iter - ) - else: - self._write_metrics(loss_dict, data_time) - - self.grad_scaler.step(self.optimizer) - self.grad_scaler.update() - - def state_dict(self): - ret = super().state_dict() - ret["grad_scaler"] = self.grad_scaler.state_dict() - return ret - - def load_state_dict(self, state_dict): - super().load_state_dict(state_dict) - self.grad_scaler.load_state_dict(state_dict["grad_scaler"]) diff --git a/detectron2/detectron2/evaluation/__init__.py b/detectron2/detectron2/evaluation/__init__.py deleted file mode 100644 index d96609e8f2261a6800fe85fcf3e1eaeaa44455c6..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/evaluation/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator -from .coco_evaluation import COCOEvaluator -from .rotated_coco_evaluation import RotatedCOCOEvaluator -from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset -from .lvis_evaluation import LVISEvaluator -from .panoptic_evaluation import COCOPanopticEvaluator -from .pascal_voc_evaluation import PascalVOCDetectionEvaluator -from .sem_seg_evaluation import SemSegEvaluator -from .testing import print_csv_format, verify_results - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/detectron2/detectron2/evaluation/cityscapes_evaluation.py b/detectron2/detectron2/evaluation/cityscapes_evaluation.py deleted file mode 100644 index 749abb260861b919cbefbe338b0863d52b0d4423..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/evaluation/cityscapes_evaluation.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import glob -import logging -import numpy as np -import os -import tempfile -from collections import OrderedDict -import torch -from PIL import Image - -from detectron2.data import MetadataCatalog -from detectron2.utils import comm -from detectron2.utils.file_io import PathManager - -from .evaluator import DatasetEvaluator - - -class CityscapesEvaluator(DatasetEvaluator): - """ - Base class for evaluation using cityscapes API. - """ - - def __init__(self, dataset_name): - """ - Args: - dataset_name (str): the name of the dataset. - It must have the following metadata associated with it: - "thing_classes", "gt_dir". - """ - self._metadata = MetadataCatalog.get(dataset_name) - self._cpu_device = torch.device("cpu") - self._logger = logging.getLogger(__name__) - - def reset(self): - self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_") - self._temp_dir = self._working_dir.name - # All workers will write to the same results directory - # TODO this does not work in distributed training - assert ( - comm.get_local_size() == comm.get_world_size() - ), "CityscapesEvaluator currently do not work with multiple machines." - self._temp_dir = comm.all_gather(self._temp_dir)[0] - if self._temp_dir != self._working_dir.name: - self._working_dir.cleanup() - self._logger.info( - "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir) - ) - - -class CityscapesInstanceEvaluator(CityscapesEvaluator): - """ - Evaluate instance segmentation results on cityscapes dataset using cityscapes API. - - Note: - * It does not work in multi-machine distributed training. - * It contains a synchronization, therefore has to be used on all ranks. - * Only the main process runs evaluation. - """ - - def process(self, inputs, outputs): - from deeplearning.projects.cityscapesApi.cityscapesscripts.helpers.labels import name2label - - for input, output in zip(inputs, outputs): - file_name = input["file_name"] - basename = os.path.splitext(os.path.basename(file_name))[0] - pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt") - - if "instances" in output: - output = output["instances"].to(self._cpu_device) - num_instances = len(output) - with open(pred_txt, "w") as fout: - for i in range(num_instances): - pred_class = output.pred_classes[i] - classes = self._metadata.thing_classes[pred_class] - class_id = name2label[classes].id - score = output.scores[i] - mask = output.pred_masks[i].numpy().astype("uint8") - png_filename = os.path.join( - self._temp_dir, basename + "_{}_{}.png".format(i, classes) - ) - - Image.fromarray(mask * 255).save(png_filename) - fout.write( - "{} {} {}\n".format(os.path.basename(png_filename), class_id, score) - ) - else: - # Cityscapes requires a prediction file for every ground truth image. - with open(pred_txt, "w") as fout: - pass - - def evaluate(self): - """ - Returns: - dict: has a key "segm", whose value is a dict of "AP" and "AP50". - """ - comm.synchronize() - if comm.get_rank() > 0: - return - import deeplearning.projects.cityscapesApi.cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval # noqa: E501 - - self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) - - # set some global states in cityscapes evaluation API, before evaluating - cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) - cityscapes_eval.args.predictionWalk = None - cityscapes_eval.args.JSONOutput = False - cityscapes_eval.args.colorized = False - cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json") - - # These lines are adopted from - # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa - gt_dir = PathManager.get_local_path(self._metadata.gt_dir) - groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_instanceIds.png")) - assert len( - groundTruthImgList - ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( - cityscapes_eval.args.groundTruthSearch - ) - predictionImgList = [] - for gt in groundTruthImgList: - predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args)) - results = cityscapes_eval.evaluateImgLists( - predictionImgList, groundTruthImgList, cityscapes_eval.args - )["averages"] - - ret = OrderedDict() - ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100} - self._working_dir.cleanup() - return ret - - -class CityscapesSemSegEvaluator(CityscapesEvaluator): - """ - Evaluate semantic segmentation results on cityscapes dataset using cityscapes API. - - Note: - * It does not work in multi-machine distributed training. - * It contains a synchronization, therefore has to be used on all ranks. - * Only the main process runs evaluation. - """ - - def process(self, inputs, outputs): - from deeplearning.projects.cityscapesApi.cityscapesscripts.helpers.labels import ( - trainId2label, - ) - - for input, output in zip(inputs, outputs): - file_name = input["file_name"] - basename = os.path.splitext(os.path.basename(file_name))[0] - pred_filename = os.path.join(self._temp_dir, basename + "_pred.png") - - output = output["sem_seg"].argmax(dim=0).to(self._cpu_device).numpy() - pred = 255 * np.ones(output.shape, dtype=np.uint8) - for train_id, label in trainId2label.items(): - if label.ignoreInEval: - continue - pred[output == train_id] = label.id - Image.fromarray(pred).save(pred_filename) - - def evaluate(self): - comm.synchronize() - if comm.get_rank() > 0: - return - # Load the Cityscapes eval script *after* setting the required env var, - # since the script reads CITYSCAPES_DATASET into global variables at load time. - import deeplearning.projects.cityscapesApi.cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as cityscapes_eval # noqa: E501 - - self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) - - # set some global states in cityscapes evaluation API, before evaluating - cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) - cityscapes_eval.args.predictionWalk = None - cityscapes_eval.args.JSONOutput = False - cityscapes_eval.args.colorized = False - - # These lines are adopted from - # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py # noqa - gt_dir = PathManager.get_local_path(self._metadata.gt_dir) - groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_labelIds.png")) - assert len( - groundTruthImgList - ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( - cityscapes_eval.args.groundTruthSearch - ) - predictionImgList = [] - for gt in groundTruthImgList: - predictionImgList.append(cityscapes_eval.getPrediction(cityscapes_eval.args, gt)) - results = cityscapes_eval.evaluateImgLists( - predictionImgList, groundTruthImgList, cityscapes_eval.args - ) - ret = OrderedDict() - ret["sem_seg"] = { - "IoU": 100.0 * results["averageScoreClasses"], - "iIoU": 100.0 * results["averageScoreInstClasses"], - "IoU_sup": 100.0 * results["averageScoreCategories"], - "iIoU_sup": 100.0 * results["averageScoreInstCategories"], - } - self._working_dir.cleanup() - return ret diff --git a/detectron2/detectron2/evaluation/coco_evaluation.py b/detectron2/detectron2/evaluation/coco_evaluation.py deleted file mode 100644 index fe8142cda29613ce1cf78523e422bf598128f590..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/evaluation/coco_evaluation.py +++ /dev/null @@ -1,722 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import contextlib -import copy -import io -import itertools -import json -import logging -import numpy as np -import os -import pickle -from collections import OrderedDict -import pycocotools.mask as mask_util -import torch -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval -from tabulate import tabulate - -import detectron2.utils.comm as comm -from detectron2.config import CfgNode -from detectron2.data import MetadataCatalog -from detectron2.data.datasets.coco import convert_to_coco_json -from detectron2.structures import Boxes, BoxMode, pairwise_iou -from detectron2.utils.file_io import PathManager -from detectron2.utils.logger import create_small_table - -from .evaluator import DatasetEvaluator - -try: - from detectron2.evaluation.fast_eval_api import COCOeval_opt -except ImportError: - COCOeval_opt = COCOeval - - -class COCOEvaluator(DatasetEvaluator): - """ - Evaluate AR for object proposals, AP for instance detection/segmentation, AP - for keypoint detection outputs using COCO's metrics. - See http://cocodataset.org/#detection-eval and - http://cocodataset.org/#keypoints-eval to understand its metrics. - The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means - the metric cannot be computed (e.g. due to no predictions made). - - In addition to COCO, this evaluator is able to support any bounding box detection, - instance segmentation, or keypoint detection dataset. - """ - - def __init__( - self, - dataset_name, - tasks=None, - distributed=True, - output_dir=None, - *, - max_dets_per_image=None, - use_fast_impl=True, - kpt_oks_sigmas=(), - allow_cached_coco=True, - ): - """ - Args: - dataset_name (str): name of the dataset to be evaluated. - It must have either the following corresponding metadata: - - "json_file": the path to the COCO format annotation - - Or it must be in detectron2's standard dataset format - so it can be converted to COCO format automatically. - tasks (tuple[str]): tasks that can be evaluated under the given - configuration. A task is one of "bbox", "segm", "keypoints". - By default, will infer this automatically from predictions. - distributed (True): if True, will collect results from all ranks and run evaluation - in the main process. - Otherwise, will only evaluate the results in the current process. - output_dir (str): optional, an output directory to dump all - results predicted on the dataset. The dump contains two files: - - 1. "instances_predictions.pth" a file that can be loaded with `torch.load` and - contains all the results in the format they are produced by the model. - 2. "coco_instances_results.json" a json file in COCO's result format. - max_dets_per_image (int): limit on the maximum number of detections per image. - By default in COCO, this limit is to 100, but this can be customized - to be greater, as is needed in evaluation metrics AP fixed and AP pool - (see https://arxiv.org/pdf/2102.01066.pdf) - This doesn't affect keypoint evaluation. - use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. - Although the results should be very close to the official implementation in COCO - API, it is still recommended to compute results with the official API for use in - papers. The faster implementation also uses more RAM. - kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS. - See http://cocodataset.org/#keypoints-eval - When empty, it will use the defaults in COCO. - Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. - allow_cached_coco (bool): Whether to use cached coco json from previous validation - runs. You should set this to False if you need to use different validation data. - Defaults to True. - """ - self._logger = logging.getLogger(__name__) - self._distributed = distributed - self._output_dir = output_dir - - if use_fast_impl and (COCOeval_opt is COCOeval): - self._logger.info("Fast COCO eval is not built. Falling back to official COCO eval.") - use_fast_impl = False - self._use_fast_impl = use_fast_impl - - # COCOeval requires the limit on the number of detections per image (maxDets) to be a list - # with at least 3 elements. The default maxDets in COCOeval is [1, 10, 100], in which the - # 3rd element (100) is used as the limit on the number of detections per image when - # evaluating AP. COCOEvaluator expects an integer for max_dets_per_image, so for COCOeval, - # we reformat max_dets_per_image into [1, 10, max_dets_per_image], based on the defaults. - if max_dets_per_image is None: - max_dets_per_image = [1, 10, 100] - else: - max_dets_per_image = [1, 10, max_dets_per_image] - self._max_dets_per_image = max_dets_per_image - - if tasks is not None and isinstance(tasks, CfgNode): - kpt_oks_sigmas = ( - tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas - ) - self._logger.warn( - "COCO Evaluator instantiated using config, this is deprecated behavior." - " Please pass in explicit arguments instead." - ) - self._tasks = None # Infering it from predictions should be better - else: - self._tasks = tasks - - self._cpu_device = torch.device("cpu") - - self._metadata = MetadataCatalog.get(dataset_name) - if not hasattr(self._metadata, "json_file"): - if output_dir is None: - raise ValueError( - "output_dir must be provided to COCOEvaluator " - "for datasets not in COCO format." - ) - self._logger.info(f"Trying to convert '{dataset_name}' to COCO format ...") - - cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json") - self._metadata.json_file = cache_path - convert_to_coco_json(dataset_name, cache_path, allow_cached=allow_cached_coco) - - json_file = PathManager.get_local_path(self._metadata.json_file) - with contextlib.redirect_stdout(io.StringIO()): - self._coco_api = COCO(json_file) - - # Test set json files do not contain annotations (evaluation must be - # performed using the COCO evaluation server). - self._do_evaluation = "annotations" in self._coco_api.dataset - if self._do_evaluation: - self._kpt_oks_sigmas = kpt_oks_sigmas - - def reset(self): - self._predictions = [] - - def process(self, inputs, outputs): - """ - Args: - inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). - It is a list of dict. Each dict corresponds to an image and - contains keys like "height", "width", "file_name", "image_id". - outputs: the outputs of a COCO model. It is a list of dicts with key - "instances" that contains :class:`Instances`. - """ - for input, output in zip(inputs, outputs): - prediction = {"image_id": input["image_id"]} - - if "instances" in output: - instances = output["instances"].to(self._cpu_device) - prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) - if "proposals" in output: - prediction["proposals"] = output["proposals"].to(self._cpu_device) - if len(prediction) > 1: - self._predictions.append(prediction) - - def evaluate(self, img_ids=None): - """ - Args: - img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset - """ - if self._distributed: - comm.synchronize() - predictions = comm.gather(self._predictions, dst=0) - predictions = list(itertools.chain(*predictions)) - - if not comm.is_main_process(): - return {} - else: - predictions = self._predictions - - if len(predictions) == 0: - self._logger.warning("[COCOEvaluator] Did not receive valid predictions.") - return {} - - if self._output_dir: - PathManager.mkdirs(self._output_dir) - file_path = os.path.join(self._output_dir, "instances_predictions.pth") - with PathManager.open(file_path, "wb") as f: - torch.save(predictions, f) - - self._results = OrderedDict() - if "proposals" in predictions[0]: - self._eval_box_proposals(predictions) - if "instances" in predictions[0]: - self._eval_predictions(predictions, img_ids=img_ids) - # Copy so the caller can do whatever with results - return copy.deepcopy(self._results) - - def _tasks_from_predictions(self, predictions): - """ - Get COCO API "tasks" (i.e. iou_type) from COCO-format predictions. - """ - tasks = {"bbox"} - for pred in predictions: - if "segmentation" in pred: - tasks.add("segm") - if "keypoints" in pred: - tasks.add("keypoints") - return sorted(tasks) - - def _eval_predictions(self, predictions, img_ids=None): - """ - Evaluate predictions. Fill self._results with the metrics of the tasks. - """ - self._logger.info("Preparing results for COCO format ...") - coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) - tasks = self._tasks or self._tasks_from_predictions(coco_results) - - # unmap the category ids for COCO - if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): - dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id - all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) - num_classes = len(all_contiguous_ids) - assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1 - - reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} - for result in coco_results: - category_id = result["category_id"] - assert category_id < num_classes, ( - f"A prediction has class={category_id}, " - f"but the dataset only has {num_classes} classes and " - f"predicted class id should be in [0, {num_classes - 1}]." - ) - result["category_id"] = reverse_id_mapping[category_id] - - if self._output_dir: - file_path = os.path.join(self._output_dir, "coco_instances_results.json") - self._logger.info("Saving results to {}".format(file_path)) - with PathManager.open(file_path, "w") as f: - f.write(json.dumps(coco_results)) - f.flush() - - if not self._do_evaluation: - self._logger.info("Annotations are not available for evaluation.") - return - - self._logger.info( - "Evaluating predictions with {} COCO API...".format( - "unofficial" if self._use_fast_impl else "official" - ) - ) - for task in sorted(tasks): - assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!" - coco_eval = ( - _evaluate_predictions_on_coco( - self._coco_api, - coco_results, - task, - kpt_oks_sigmas=self._kpt_oks_sigmas, - cocoeval_fn=COCOeval_opt if self._use_fast_impl else COCOeval, - img_ids=img_ids, - max_dets_per_image=self._max_dets_per_image, - ) - if len(coco_results) > 0 - else None # cocoapi does not handle empty results very well - ) - - res = self._derive_coco_results( - coco_eval, task, class_names=self._metadata.get("thing_classes") - ) - self._results[task] = res - - def _eval_box_proposals(self, predictions): - """ - Evaluate the box proposals in predictions. - Fill self._results with the metrics for "box_proposals" task. - """ - if self._output_dir: - # Saving generated box proposals to file. - # Predicted box_proposals are in XYXY_ABS mode. - bbox_mode = BoxMode.XYXY_ABS.value - ids, boxes, objectness_logits = [], [], [] - for prediction in predictions: - ids.append(prediction["image_id"]) - boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) - objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) - - proposal_data = { - "boxes": boxes, - "objectness_logits": objectness_logits, - "ids": ids, - "bbox_mode": bbox_mode, - } - with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: - pickle.dump(proposal_data, f) - - if not self._do_evaluation: - self._logger.info("Annotations are not available for evaluation.") - return - - self._logger.info("Evaluating bbox proposals ...") - res = {} - areas = {"all": "", "small": "s", "medium": "m", "large": "l"} - for limit in [100, 1000]: - for area, suffix in areas.items(): - stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit) - key = "AR{}@{:d}".format(suffix, limit) - res[key] = float(stats["ar"].item() * 100) - self._logger.info("Proposal metrics: \n" + create_small_table(res)) - self._results["box_proposals"] = res - - def _derive_coco_results(self, coco_eval, iou_type, class_names=None): - """ - Derive the desired score numbers from summarized COCOeval. - - Args: - coco_eval (None or COCOEval): None represents no predictions from model. - iou_type (str): - class_names (None or list[str]): if provided, will use it to predict - per-category AP. - - Returns: - a dict of {metric name: score} - """ - - metrics = { - "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], - "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], - "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], - }[iou_type] - - if coco_eval is None: - self._logger.warn("No predictions from the model!") - return {metric: float("nan") for metric in metrics} - - # the standard metrics - results = { - metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan") - for idx, metric in enumerate(metrics) - } - self._logger.info( - "Evaluation results for {}: \n".format(iou_type) + create_small_table(results) - ) - if not np.isfinite(sum(results.values())): - self._logger.info("Some metrics cannot be computed and is shown as NaN.") - - if class_names is None or len(class_names) <= 1: - return results - # Compute per-category AP - # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa - precisions = coco_eval.eval["precision"] - # precision has dims (iou, recall, cls, area range, max dets) - assert len(class_names) == precisions.shape[2] - - results_per_category = [] - for idx, name in enumerate(class_names): - # area range index 0: all area ranges - # max dets index -1: typically 100 per image - precision = precisions[:, :, idx, 0, -1] - precision = precision[precision > -1] - ap = np.mean(precision) if precision.size else float("nan") - results_per_category.append(("{}".format(name), float(ap * 100))) - - # tabulate it - N_COLS = min(6, len(results_per_category) * 2) - results_flatten = list(itertools.chain(*results_per_category)) - results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) - table = tabulate( - results_2d, - tablefmt="pipe", - floatfmt=".3f", - headers=["category", "AP"] * (N_COLS // 2), - numalign="left", - ) - self._logger.info("Per-category {} AP: \n".format(iou_type) + table) - - results.update({"AP-" + name: ap for name, ap in results_per_category}) - return results - - -def instances_to_coco_json(instances, img_id): - """ - Dump an "Instances" object to a COCO-format json that's used for evaluation. - - Args: - instances (Instances): - img_id (int): the image id - - Returns: - list[dict]: list of json annotations in COCO format. - """ - num_instance = len(instances) - if num_instance == 0: - return [] - - boxes = instances.pred_boxes.tensor.numpy() - boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - boxes = boxes.tolist() - scores = instances.scores.tolist() - classes = instances.pred_classes.tolist() - - has_mask = instances.has("pred_masks") - if has_mask: - # use RLE to encode the masks, because they are too large and takes memory - # since this evaluator stores outputs of the entire dataset - rles = [ - mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] - for mask in instances.pred_masks - ] - for rle in rles: - # "counts" is an array encoded by mask_util as a byte-stream. Python3's - # json writer which always produces strings cannot serialize a bytestream - # unless you decode it. Thankfully, utf-8 works out (which is also what - # the pycocotools/_mask.pyx does). - rle["counts"] = rle["counts"].decode("utf-8") - - has_keypoints = instances.has("pred_keypoints") - if has_keypoints: - keypoints = instances.pred_keypoints - - results = [] - for k in range(num_instance): - result = { - "image_id": img_id, - "category_id": classes[k], - "bbox": boxes[k], - "score": scores[k], - } - if has_mask: - result["segmentation"] = rles[k] - if has_keypoints: - # In COCO annotations, - # keypoints coordinates are pixel indices. - # However our predictions are floating point coordinates. - # Therefore we subtract 0.5 to be consistent with the annotation format. - # This is the inverse of data loading logic in `datasets/coco.py`. - keypoints[k][:, :2] -= 0.5 - result["keypoints"] = keypoints[k].flatten().tolist() - results.append(result) - return results - - -# inspired from Detectron: -# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa -def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None): - """ - Evaluate detection proposal recall metrics. This function is a much - faster alternative to the official COCO API recall evaluation code. However, - it produces slightly different results. - """ - # Record max overlap value for each gt box - # Return vector of overlap values - areas = { - "all": 0, - "small": 1, - "medium": 2, - "large": 3, - "96-128": 4, - "128-256": 5, - "256-512": 6, - "512-inf": 7, - } - area_ranges = [ - [0**2, 1e5**2], # all - [0**2, 32**2], # small - [32**2, 96**2], # medium - [96**2, 1e5**2], # large - [96**2, 128**2], # 96-128 - [128**2, 256**2], # 128-256 - [256**2, 512**2], # 256-512 - [512**2, 1e5**2], - ] # 512-inf - assert area in areas, "Unknown area range: {}".format(area) - area_range = area_ranges[areas[area]] - gt_overlaps = [] - num_pos = 0 - - for prediction_dict in dataset_predictions: - predictions = prediction_dict["proposals"] - - # sort predictions in descending order - # TODO maybe remove this and make it explicit in the documentation - inds = predictions.objectness_logits.sort(descending=True)[1] - predictions = predictions[inds] - - ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"]) - anno = coco_api.loadAnns(ann_ids) - gt_boxes = [ - BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) - for obj in anno - if obj["iscrowd"] == 0 - ] - gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes - gt_boxes = Boxes(gt_boxes) - gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0]) - - if len(gt_boxes) == 0 or len(predictions) == 0: - continue - - valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) - gt_boxes = gt_boxes[valid_gt_inds] - - num_pos += len(gt_boxes) - - if len(gt_boxes) == 0: - continue - - if limit is not None and len(predictions) > limit: - predictions = predictions[:limit] - - overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) - - _gt_overlaps = torch.zeros(len(gt_boxes)) - for j in range(min(len(predictions), len(gt_boxes))): - # find which proposal box maximally covers each gt box - # and get the iou amount of coverage for each gt box - max_overlaps, argmax_overlaps = overlaps.max(dim=0) - - # find which gt box is 'best' covered (i.e. 'best' = most iou) - gt_ovr, gt_ind = max_overlaps.max(dim=0) - assert gt_ovr >= 0 - # find the proposal box that covers the best covered gt box - box_ind = argmax_overlaps[gt_ind] - # record the iou coverage of this gt box - _gt_overlaps[j] = overlaps[box_ind, gt_ind] - assert _gt_overlaps[j] == gt_ovr - # mark the proposal box and the gt box as used - overlaps[box_ind, :] = -1 - overlaps[:, gt_ind] = -1 - - # append recorded iou coverage level - gt_overlaps.append(_gt_overlaps) - gt_overlaps = ( - torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) - ) - gt_overlaps, _ = torch.sort(gt_overlaps) - - if thresholds is None: - step = 0.05 - thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) - recalls = torch.zeros_like(thresholds) - # compute recall for each iou threshold - for i, t in enumerate(thresholds): - recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) - # ar = 2 * np.trapz(recalls, thresholds) - ar = recalls.mean() - return { - "ar": ar, - "recalls": recalls, - "thresholds": thresholds, - "gt_overlaps": gt_overlaps, - "num_pos": num_pos, - } - - -def _evaluate_predictions_on_coco( - coco_gt, - coco_results, - iou_type, - kpt_oks_sigmas=None, - cocoeval_fn=COCOeval_opt, - img_ids=None, - max_dets_per_image=None, -): - """ - Evaluate the coco results using COCOEval API. - """ - assert len(coco_results) > 0 - - if iou_type == "segm": - coco_results = copy.deepcopy(coco_results) - # When evaluating mask AP, if the results contain bbox, cocoapi will - # use the box area as the area of the instance, instead of the mask area. - # This leads to a different definition of small/medium/large. - # We remove the bbox field to let mask AP use mask area. - for c in coco_results: - c.pop("bbox", None) - - coco_dt = coco_gt.loadRes(coco_results) - coco_eval = cocoeval_fn(coco_gt, coco_dt, iou_type) - # For COCO, the default max_dets_per_image is [1, 10, 100]. - if max_dets_per_image is None: - max_dets_per_image = [1, 10, 100] # Default from COCOEval - else: - assert ( - len(max_dets_per_image) >= 3 - ), "COCOeval requires maxDets (and max_dets_per_image) to have length at least 3" - # In the case that user supplies a custom input for max_dets_per_image, - # apply COCOevalMaxDets to evaluate AP with the custom input. - if max_dets_per_image[2] != 100: - coco_eval = COCOevalMaxDets(coco_gt, coco_dt, iou_type) - if iou_type != "keypoints": - coco_eval.params.maxDets = max_dets_per_image - - if img_ids is not None: - coco_eval.params.imgIds = img_ids - - if iou_type == "keypoints": - # Use the COCO default keypoint OKS sigmas unless overrides are specified - if kpt_oks_sigmas: - assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "pycocotools is too old!" - coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas) - # COCOAPI requires every detection and every gt to have keypoints, so - # we just take the first entry from both - num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3 - num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3 - num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas) - assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, ( - f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. " - f"Ground truth contains {num_keypoints_gt} keypoints. " - f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. " - "They have to agree with each other. For meaning of OKS, please refer to " - "http://cocodataset.org/#keypoints-eval." - ) - - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - - return coco_eval - - -class COCOevalMaxDets(COCOeval): - """ - Modified version of COCOeval for evaluating AP with a custom - maxDets (by default for COCO, maxDets is 100) - """ - - def summarize(self): - """ - Compute and display summary metrics for evaluation results given - a custom value for max_dets_per_image - """ - - def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100): - p = self.params - iStr = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}" - titleStr = "Average Precision" if ap == 1 else "Average Recall" - typeStr = "(AP)" if ap == 1 else "(AR)" - iouStr = ( - "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1]) - if iouThr is None - else "{:0.2f}".format(iouThr) - ) - - aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] - mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] - if ap == 1: - # dimension of precision: [TxRxKxAxM] - s = self.eval["precision"] - # IoU - if iouThr is not None: - t = np.where(iouThr == p.iouThrs)[0] - s = s[t] - s = s[:, :, :, aind, mind] - else: - # dimension of recall: [TxKxAxM] - s = self.eval["recall"] - if iouThr is not None: - t = np.where(iouThr == p.iouThrs)[0] - s = s[t] - s = s[:, :, aind, mind] - if len(s[s > -1]) == 0: - mean_s = -1 - else: - mean_s = np.mean(s[s > -1]) - print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) - return mean_s - - def _summarizeDets(): - stats = np.zeros((12,)) - # Evaluate AP using the custom limit on maximum detections per image - stats[0] = _summarize(1, maxDets=self.params.maxDets[2]) - stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2]) - stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2]) - stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2]) - stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2]) - stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2]) - stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) - stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) - stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) - stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2]) - stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2]) - stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2]) - return stats - - def _summarizeKps(): - stats = np.zeros((10,)) - stats[0] = _summarize(1, maxDets=20) - stats[1] = _summarize(1, maxDets=20, iouThr=0.5) - stats[2] = _summarize(1, maxDets=20, iouThr=0.75) - stats[3] = _summarize(1, maxDets=20, areaRng="medium") - stats[4] = _summarize(1, maxDets=20, areaRng="large") - stats[5] = _summarize(0, maxDets=20) - stats[6] = _summarize(0, maxDets=20, iouThr=0.5) - stats[7] = _summarize(0, maxDets=20, iouThr=0.75) - stats[8] = _summarize(0, maxDets=20, areaRng="medium") - stats[9] = _summarize(0, maxDets=20, areaRng="large") - return stats - - if not self.eval: - raise Exception("Please run accumulate() first") - iouType = self.params.iouType - if iouType == "segm" or iouType == "bbox": - summarize = _summarizeDets - elif iouType == "keypoints": - summarize = _summarizeKps - self.stats = summarize() - - def __str__(self): - self.summarize() diff --git a/detectron2/detectron2/evaluation/evaluator.py b/detectron2/detectron2/evaluation/evaluator.py deleted file mode 100644 index 993c5ed4f478f724e170468dee266e7363b71f6a..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/evaluation/evaluator.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import datetime -import logging -import time -from collections import OrderedDict, abc -from contextlib import ExitStack, contextmanager -from typing import List, Union -import torch -from torch import nn - -from detectron2.utils.comm import get_world_size, is_main_process -from detectron2.utils.logger import log_every_n_seconds - - -class DatasetEvaluator: - """ - Base class for a dataset evaluator. - - The function :func:`inference_on_dataset` runs the model over - all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs. - - This class will accumulate information of the inputs/outputs (by :meth:`process`), - and produce evaluation results in the end (by :meth:`evaluate`). - """ - - def reset(self): - """ - Preparation for a new round of evaluation. - Should be called before starting a round of evaluation. - """ - pass - - def process(self, inputs, outputs): - """ - Process the pair of inputs and outputs. - If they contain batches, the pairs can be consumed one-by-one using `zip`: - - .. code-block:: python - - for input_, output in zip(inputs, outputs): - # do evaluation on single input/output pair - ... - - Args: - inputs (list): the inputs that's used to call the model. - outputs (list): the return value of `model(inputs)` - """ - pass - - def evaluate(self): - """ - Evaluate/summarize the performance, after processing all input/output pairs. - - Returns: - dict: - A new evaluator class can return a dict of arbitrary format - as long as the user can process the results. - In our train_net.py, we expect the following format: - - * key: the name of the task (e.g., bbox) - * value: a dict of {metric name: score}, e.g.: {"AP50": 80} - """ - pass - - -class DatasetEvaluators(DatasetEvaluator): - """ - Wrapper class to combine multiple :class:`DatasetEvaluator` instances. - - This class dispatches every evaluation call to - all of its :class:`DatasetEvaluator`. - """ - - def __init__(self, evaluators): - """ - Args: - evaluators (list): the evaluators to combine. - """ - super().__init__() - self._evaluators = evaluators - - def reset(self): - for evaluator in self._evaluators: - evaluator.reset() - - def process(self, inputs, outputs): - for evaluator in self._evaluators: - evaluator.process(inputs, outputs) - - def evaluate(self): - results = OrderedDict() - for evaluator in self._evaluators: - result = evaluator.evaluate() - if is_main_process() and result is not None: - for k, v in result.items(): - assert ( - k not in results - ), "Different evaluators produce results with the same key {}".format(k) - results[k] = v - return results - - -def inference_on_dataset( - model, - data_loader, - evaluator: Union[DatasetEvaluator, List[DatasetEvaluator], None], - callbacks=None, -): - """ - Run model on the data_loader and evaluate the metrics with evaluator. - Also benchmark the inference speed of `model.__call__` accurately. - The model will be used in eval mode. - - Args: - model (callable): a callable which takes an object from - `data_loader` and returns some outputs. - - If it's an nn.Module, it will be temporarily set to `eval` mode. - If you wish to evaluate a model in `training` mode instead, you can - wrap the given model and override its behavior of `.eval()` and `.train()`. - data_loader: an iterable object with a length. - The elements it generates will be the inputs to the model. - evaluator: the evaluator(s) to run. Use `None` if you only want to benchmark, - but don't want to do any evaluation. - callbacks (dict of callables): a dictionary of callback functions which can be - called at each stage of inference. - - Returns: - The return value of `evaluator.evaluate()` - """ - num_devices = get_world_size() - logger = logging.getLogger(__name__) - logger.info("Start inference on {} batches".format(len(data_loader))) - - total = len(data_loader) # inference data loader must have a fixed length - if evaluator is None: - # create a no-op evaluator - evaluator = DatasetEvaluators([]) - if isinstance(evaluator, abc.MutableSequence): - evaluator = DatasetEvaluators(evaluator) - evaluator.reset() - - num_warmup = min(5, total - 1) - start_time = time.perf_counter() - total_data_time = 0 - total_compute_time = 0 - total_eval_time = 0 - with ExitStack() as stack: - if isinstance(model, nn.Module): - stack.enter_context(inference_context(model)) - stack.enter_context(torch.no_grad()) - - start_data_time = time.perf_counter() - dict.get(callbacks or {}, "on_start", lambda: None)() - for idx, inputs in enumerate(data_loader): - total_data_time += time.perf_counter() - start_data_time - if idx == num_warmup: - start_time = time.perf_counter() - total_data_time = 0 - total_compute_time = 0 - total_eval_time = 0 - - start_compute_time = time.perf_counter() - dict.get(callbacks or {}, "before_inference", lambda: None)() - outputs = model(inputs) - dict.get(callbacks or {}, "after_inference", lambda: None)() - if torch.cuda.is_available(): - torch.cuda.synchronize() - total_compute_time += time.perf_counter() - start_compute_time - - start_eval_time = time.perf_counter() - evaluator.process(inputs, outputs) - total_eval_time += time.perf_counter() - start_eval_time - - iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) - data_seconds_per_iter = total_data_time / iters_after_start - compute_seconds_per_iter = total_compute_time / iters_after_start - eval_seconds_per_iter = total_eval_time / iters_after_start - total_seconds_per_iter = (time.perf_counter() - start_time) / iters_after_start - if idx >= num_warmup * 2 or compute_seconds_per_iter > 5: - eta = datetime.timedelta(seconds=int(total_seconds_per_iter * (total - idx - 1))) - log_every_n_seconds( - logging.INFO, - ( - f"Inference done {idx + 1}/{total}. " - f"Dataloading: {data_seconds_per_iter:.4f} s/iter. " - f"Inference: {compute_seconds_per_iter:.4f} s/iter. " - f"Eval: {eval_seconds_per_iter:.4f} s/iter. " - f"Total: {total_seconds_per_iter:.4f} s/iter. " - f"ETA={eta}" - ), - n=5, - ) - start_data_time = time.perf_counter() - dict.get(callbacks or {}, "on_end", lambda: None)() - - # Measure the time only for this worker (before the synchronization barrier) - total_time = time.perf_counter() - start_time - total_time_str = str(datetime.timedelta(seconds=total_time)) - # NOTE this format is parsed by grep - logger.info( - "Total inference time: {} ({:.6f} s / iter per device, on {} devices)".format( - total_time_str, total_time / (total - num_warmup), num_devices - ) - ) - total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time))) - logger.info( - "Total inference pure compute time: {} ({:.6f} s / iter per device, on {} devices)".format( - total_compute_time_str, - total_compute_time / (total - num_warmup), - num_devices, - ) - ) - - results = evaluator.evaluate() - # An evaluator may return None when not in main process. - # Replace it by an empty dict instead to make it easier for downstream code to handle - if results is None: - results = {} - return results - - -@contextmanager -def inference_context(model): - """ - A context where the model is temporarily changed to eval mode, - and restored to previous mode afterwards. - - Args: - model: a torch Module - """ - training_mode = model.training - model.eval() - yield - model.train(training_mode) diff --git a/detectron2/detectron2/evaluation/fast_eval_api.py b/detectron2/detectron2/evaluation/fast_eval_api.py deleted file mode 100644 index 2eb202bd5efa3ec3d366027b1debffc269ae8b17..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/evaluation/fast_eval_api.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import logging -import numpy as np -import time -from pycocotools.cocoeval import COCOeval - -from detectron2 import _C - -logger = logging.getLogger(__name__) - - -class COCOeval_opt(COCOeval): - """ - This is a slightly modified version of the original COCO API, where the functions evaluateImg() - and accumulate() are implemented in C++ to speedup evaluation - """ - - def evaluate(self): - """ - Run per image evaluation on given images and store results in self.evalImgs_cpp, a - datastructure that isn't readable from Python but is used by a c++ implementation of - accumulate(). Unlike the original COCO PythonAPI, we don't populate the datastructure - self.evalImgs because this datastructure is a computational bottleneck. - :return: None - """ - tic = time.time() - - p = self.params - # add backward compatibility if useSegm is specified in params - if p.useSegm is not None: - p.iouType = "segm" if p.useSegm == 1 else "bbox" - logger.info("Evaluate annotation type *{}*".format(p.iouType)) - p.imgIds = list(np.unique(p.imgIds)) - if p.useCats: - p.catIds = list(np.unique(p.catIds)) - p.maxDets = sorted(p.maxDets) - self.params = p - - self._prepare() # bottleneck - - # loop through images, area range, max detection number - catIds = p.catIds if p.useCats else [-1] - - if p.iouType == "segm" or p.iouType == "bbox": - computeIoU = self.computeIoU - elif p.iouType == "keypoints": - computeIoU = self.computeOks - self.ious = { - (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds - } # bottleneck - - maxDet = p.maxDets[-1] - - # <<<< Beginning of code differences with original COCO API - def convert_instances_to_cpp(instances, is_det=False): - # Convert annotations for a list of instances in an image to a format that's fast - # to access in C++ - instances_cpp = [] - for instance in instances: - instance_cpp = _C.InstanceAnnotation( - int(instance["id"]), - instance["score"] if is_det else instance.get("score", 0.0), - instance["area"], - bool(instance.get("iscrowd", 0)), - bool(instance.get("ignore", 0)), - ) - instances_cpp.append(instance_cpp) - return instances_cpp - - # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++ - ground_truth_instances = [ - [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds] - for imgId in p.imgIds - ] - detected_instances = [ - [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds] - for imgId in p.imgIds - ] - ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds] - - if not p.useCats: - # For each image, flatten per-category lists into a single list - ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances] - detected_instances = [[[o for c in i for o in c]] for i in detected_instances] - - # Call C++ implementation of self.evaluateImgs() - self._evalImgs_cpp = _C.COCOevalEvaluateImages( - p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances - ) - self._evalImgs = None - - self._paramsEval = copy.deepcopy(self.params) - toc = time.time() - logger.info("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic)) - # >>>> End of code differences with original COCO API - - def accumulate(self): - """ - Accumulate per image evaluation results and store the result in self.eval. Does not - support changing parameter settings from those used by self.evaluate() - """ - logger.info("Accumulating evaluation results...") - tic = time.time() - assert hasattr( - self, "_evalImgs_cpp" - ), "evaluate() must be called before accmulate() is called." - - self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp) - - # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections - self.eval["recall"] = np.array(self.eval["recall"]).reshape( - self.eval["counts"][:1] + self.eval["counts"][2:] - ) - - # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X - # num_area_ranges X num_max_detections - self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"]) - self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"]) - toc = time.time() - logger.info("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic)) diff --git a/detectron2/detectron2/evaluation/lvis_evaluation.py b/detectron2/detectron2/evaluation/lvis_evaluation.py deleted file mode 100644 index 6cc854a157dc469be99a9be1bb7d570068adc891..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/evaluation/lvis_evaluation.py +++ /dev/null @@ -1,380 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import itertools -import json -import logging -import os -import pickle -from collections import OrderedDict -import torch - -import detectron2.utils.comm as comm -from detectron2.config import CfgNode -from detectron2.data import MetadataCatalog -from detectron2.structures import Boxes, BoxMode, pairwise_iou -from detectron2.utils.file_io import PathManager -from detectron2.utils.logger import create_small_table - -from .coco_evaluation import instances_to_coco_json -from .evaluator import DatasetEvaluator - - -class LVISEvaluator(DatasetEvaluator): - """ - Evaluate object proposal and instance detection/segmentation outputs using - LVIS's metrics and evaluation API. - """ - - def __init__( - self, - dataset_name, - tasks=None, - distributed=True, - output_dir=None, - *, - max_dets_per_image=None, - ): - """ - Args: - dataset_name (str): name of the dataset to be evaluated. - It must have the following corresponding metadata: - "json_file": the path to the LVIS format annotation - tasks (tuple[str]): tasks that can be evaluated under the given - configuration. A task is one of "bbox", "segm". - By default, will infer this automatically from predictions. - distributed (True): if True, will collect results from all ranks for evaluation. - Otherwise, will evaluate the results in the current process. - output_dir (str): optional, an output directory to dump results. - max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP - This limit, by default of the LVIS dataset, is 300. - """ - from lvis import LVIS - - self._logger = logging.getLogger(__name__) - - if tasks is not None and isinstance(tasks, CfgNode): - self._logger.warn( - "COCO Evaluator instantiated using config, this is deprecated behavior." - " Please pass in explicit arguments instead." - ) - self._tasks = None # Infering it from predictions should be better - else: - self._tasks = tasks - - self._distributed = distributed - self._output_dir = output_dir - self._max_dets_per_image = max_dets_per_image - - self._cpu_device = torch.device("cpu") - - self._metadata = MetadataCatalog.get(dataset_name) - json_file = PathManager.get_local_path(self._metadata.json_file) - self._lvis_api = LVIS(json_file) - # Test set json files do not contain annotations (evaluation must be - # performed using the LVIS evaluation server). - self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0 - - def reset(self): - self._predictions = [] - - def process(self, inputs, outputs): - """ - Args: - inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN). - It is a list of dict. Each dict corresponds to an image and - contains keys like "height", "width", "file_name", "image_id". - outputs: the outputs of a LVIS model. It is a list of dicts with key - "instances" that contains :class:`Instances`. - """ - for input, output in zip(inputs, outputs): - prediction = {"image_id": input["image_id"]} - - if "instances" in output: - instances = output["instances"].to(self._cpu_device) - prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) - if "proposals" in output: - prediction["proposals"] = output["proposals"].to(self._cpu_device) - self._predictions.append(prediction) - - def evaluate(self): - if self._distributed: - comm.synchronize() - predictions = comm.gather(self._predictions, dst=0) - predictions = list(itertools.chain(*predictions)) - - if not comm.is_main_process(): - return - else: - predictions = self._predictions - - if len(predictions) == 0: - self._logger.warning("[LVISEvaluator] Did not receive valid predictions.") - return {} - - if self._output_dir: - PathManager.mkdirs(self._output_dir) - file_path = os.path.join(self._output_dir, "instances_predictions.pth") - with PathManager.open(file_path, "wb") as f: - torch.save(predictions, f) - - self._results = OrderedDict() - if "proposals" in predictions[0]: - self._eval_box_proposals(predictions) - if "instances" in predictions[0]: - self._eval_predictions(predictions) - # Copy so the caller can do whatever with results - return copy.deepcopy(self._results) - - def _tasks_from_predictions(self, predictions): - for pred in predictions: - if "segmentation" in pred: - return ("bbox", "segm") - return ("bbox",) - - def _eval_predictions(self, predictions): - """ - Evaluate predictions. Fill self._results with the metrics of the tasks. - - Args: - predictions (list[dict]): list of outputs from the model - """ - self._logger.info("Preparing results in the LVIS format ...") - lvis_results = list(itertools.chain(*[x["instances"] for x in predictions])) - tasks = self._tasks or self._tasks_from_predictions(lvis_results) - - # LVIS evaluator can be used to evaluate results for COCO dataset categories. - # In this case `_metadata` variable will have a field with COCO-specific category mapping. - if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): - reverse_id_mapping = { - v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() - } - for result in lvis_results: - result["category_id"] = reverse_id_mapping[result["category_id"]] - else: - # unmap the category ids for LVIS (from 0-indexed to 1-indexed) - for result in lvis_results: - result["category_id"] += 1 - - if self._output_dir: - file_path = os.path.join(self._output_dir, "lvis_instances_results.json") - self._logger.info("Saving results to {}".format(file_path)) - with PathManager.open(file_path, "w") as f: - f.write(json.dumps(lvis_results)) - f.flush() - - if not self._do_evaluation: - self._logger.info("Annotations are not available for evaluation.") - return - - self._logger.info("Evaluating predictions ...") - for task in sorted(tasks): - res = _evaluate_predictions_on_lvis( - self._lvis_api, - lvis_results, - task, - max_dets_per_image=self._max_dets_per_image, - class_names=self._metadata.get("thing_classes"), - ) - self._results[task] = res - - def _eval_box_proposals(self, predictions): - """ - Evaluate the box proposals in predictions. - Fill self._results with the metrics for "box_proposals" task. - """ - if self._output_dir: - # Saving generated box proposals to file. - # Predicted box_proposals are in XYXY_ABS mode. - bbox_mode = BoxMode.XYXY_ABS.value - ids, boxes, objectness_logits = [], [], [] - for prediction in predictions: - ids.append(prediction["image_id"]) - boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) - objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) - - proposal_data = { - "boxes": boxes, - "objectness_logits": objectness_logits, - "ids": ids, - "bbox_mode": bbox_mode, - } - with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: - pickle.dump(proposal_data, f) - - if not self._do_evaluation: - self._logger.info("Annotations are not available for evaluation.") - return - - self._logger.info("Evaluating bbox proposals ...") - res = {} - areas = {"all": "", "small": "s", "medium": "m", "large": "l"} - for limit in [100, 1000]: - for area, suffix in areas.items(): - stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit) - key = "AR{}@{:d}".format(suffix, limit) - res[key] = float(stats["ar"].item() * 100) - self._logger.info("Proposal metrics: \n" + create_small_table(res)) - self._results["box_proposals"] = res - - -# inspired from Detectron: -# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa -def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None): - """ - Evaluate detection proposal recall metrics. This function is a much - faster alternative to the official LVIS API recall evaluation code. However, - it produces slightly different results. - """ - # Record max overlap value for each gt box - # Return vector of overlap values - areas = { - "all": 0, - "small": 1, - "medium": 2, - "large": 3, - "96-128": 4, - "128-256": 5, - "256-512": 6, - "512-inf": 7, - } - area_ranges = [ - [0**2, 1e5**2], # all - [0**2, 32**2], # small - [32**2, 96**2], # medium - [96**2, 1e5**2], # large - [96**2, 128**2], # 96-128 - [128**2, 256**2], # 128-256 - [256**2, 512**2], # 256-512 - [512**2, 1e5**2], - ] # 512-inf - assert area in areas, "Unknown area range: {}".format(area) - area_range = area_ranges[areas[area]] - gt_overlaps = [] - num_pos = 0 - - for prediction_dict in dataset_predictions: - predictions = prediction_dict["proposals"] - - # sort predictions in descending order - # TODO maybe remove this and make it explicit in the documentation - inds = predictions.objectness_logits.sort(descending=True)[1] - predictions = predictions[inds] - - ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]]) - anno = lvis_api.load_anns(ann_ids) - gt_boxes = [ - BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno - ] - gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes - gt_boxes = Boxes(gt_boxes) - gt_areas = torch.as_tensor([obj["area"] for obj in anno]) - - if len(gt_boxes) == 0 or len(predictions) == 0: - continue - - valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) - gt_boxes = gt_boxes[valid_gt_inds] - - num_pos += len(gt_boxes) - - if len(gt_boxes) == 0: - continue - - if limit is not None and len(predictions) > limit: - predictions = predictions[:limit] - - overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) - - _gt_overlaps = torch.zeros(len(gt_boxes)) - for j in range(min(len(predictions), len(gt_boxes))): - # find which proposal box maximally covers each gt box - # and get the iou amount of coverage for each gt box - max_overlaps, argmax_overlaps = overlaps.max(dim=0) - - # find which gt box is 'best' covered (i.e. 'best' = most iou) - gt_ovr, gt_ind = max_overlaps.max(dim=0) - assert gt_ovr >= 0 - # find the proposal box that covers the best covered gt box - box_ind = argmax_overlaps[gt_ind] - # record the iou coverage of this gt box - _gt_overlaps[j] = overlaps[box_ind, gt_ind] - assert _gt_overlaps[j] == gt_ovr - # mark the proposal box and the gt box as used - overlaps[box_ind, :] = -1 - overlaps[:, gt_ind] = -1 - - # append recorded iou coverage level - gt_overlaps.append(_gt_overlaps) - gt_overlaps = ( - torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) - ) - gt_overlaps, _ = torch.sort(gt_overlaps) - - if thresholds is None: - step = 0.05 - thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) - recalls = torch.zeros_like(thresholds) - # compute recall for each iou threshold - for i, t in enumerate(thresholds): - recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) - # ar = 2 * np.trapz(recalls, thresholds) - ar = recalls.mean() - return { - "ar": ar, - "recalls": recalls, - "thresholds": thresholds, - "gt_overlaps": gt_overlaps, - "num_pos": num_pos, - } - - -def _evaluate_predictions_on_lvis( - lvis_gt, lvis_results, iou_type, max_dets_per_image=None, class_names=None -): - """ - Args: - iou_type (str): - max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP - This limit, by default of the LVIS dataset, is 300. - class_names (None or list[str]): if provided, will use it to predict - per-category AP. - - Returns: - a dict of {metric name: score} - """ - metrics = { - "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], - "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], - }[iou_type] - - logger = logging.getLogger(__name__) - - if len(lvis_results) == 0: # TODO: check if needed - logger.warn("No predictions from the model!") - return {metric: float("nan") for metric in metrics} - - if iou_type == "segm": - lvis_results = copy.deepcopy(lvis_results) - # When evaluating mask AP, if the results contain bbox, LVIS API will - # use the box area as the area of the instance, instead of the mask area. - # This leads to a different definition of small/medium/large. - # We remove the bbox field to let mask AP use mask area. - for c in lvis_results: - c.pop("bbox", None) - - if max_dets_per_image is None: - max_dets_per_image = 300 # Default for LVIS dataset - - from lvis import LVISEval, LVISResults - - logger.info(f"Evaluating with max detections per image = {max_dets_per_image}") - lvis_results = LVISResults(lvis_gt, lvis_results, max_dets=max_dets_per_image) - lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) - lvis_eval.run() - lvis_eval.print_results() - - # Pull the standard metrics from the LVIS results - results = lvis_eval.get_results() - results = {metric: float(results[metric] * 100) for metric in metrics} - logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) - return results diff --git a/detectron2/detectron2/evaluation/panoptic_evaluation.py b/detectron2/detectron2/evaluation/panoptic_evaluation.py deleted file mode 100644 index 9fb3462b7f9abf6feaa499976bfed526ebd17e31..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/evaluation/panoptic_evaluation.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import contextlib -import io -import itertools -import json -import logging -import numpy as np -import os -import tempfile -from collections import OrderedDict -from typing import Optional -from PIL import Image -from tabulate import tabulate - -from detectron2.data import MetadataCatalog -from detectron2.utils import comm -from detectron2.utils.file_io import PathManager - -from .evaluator import DatasetEvaluator - -logger = logging.getLogger(__name__) - - -class COCOPanopticEvaluator(DatasetEvaluator): - """ - Evaluate Panoptic Quality metrics on COCO using PanopticAPI. - It saves panoptic segmentation prediction in `output_dir` - - It contains a synchronize call and has to be called from all workers. - """ - - def __init__(self, dataset_name: str, output_dir: Optional[str] = None): - """ - Args: - dataset_name: name of the dataset - output_dir: output directory to save results for evaluation. - """ - self._metadata = MetadataCatalog.get(dataset_name) - self._thing_contiguous_id_to_dataset_id = { - v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() - } - self._stuff_contiguous_id_to_dataset_id = { - v: k for k, v in self._metadata.stuff_dataset_id_to_contiguous_id.items() - } - - self._output_dir = output_dir - if self._output_dir is not None: - PathManager.mkdirs(self._output_dir) - - def reset(self): - self._predictions = [] - - def _convert_category_id(self, segment_info): - isthing = segment_info.pop("isthing", None) - if isthing is None: - # the model produces panoptic category id directly. No more conversion needed - return segment_info - if isthing is True: - segment_info["category_id"] = self._thing_contiguous_id_to_dataset_id[ - segment_info["category_id"] - ] - else: - segment_info["category_id"] = self._stuff_contiguous_id_to_dataset_id[ - segment_info["category_id"] - ] - return segment_info - - def process(self, inputs, outputs): - from panopticapi.utils import id2rgb - - for input, output in zip(inputs, outputs): - panoptic_img, segments_info = output["panoptic_seg"] - panoptic_img = panoptic_img.cpu().numpy() - if segments_info is None: - # If "segments_info" is None, we assume "panoptic_img" is a - # H*W int32 image storing the panoptic_id in the format of - # category_id * label_divisor + instance_id. We reserve -1 for - # VOID label, and add 1 to panoptic_img since the official - # evaluation script uses 0 for VOID label. - label_divisor = self._metadata.label_divisor - segments_info = [] - for panoptic_label in np.unique(panoptic_img): - if panoptic_label == -1: - # VOID region. - continue - pred_class = panoptic_label // label_divisor - isthing = ( - pred_class in self._metadata.thing_dataset_id_to_contiguous_id.values() - ) - segments_info.append( - { - "id": int(panoptic_label) + 1, - "category_id": int(pred_class), - "isthing": bool(isthing), - } - ) - # Official evaluation script uses 0 for VOID label. - panoptic_img += 1 - - file_name = os.path.basename(input["file_name"]) - file_name_png = os.path.splitext(file_name)[0] + ".png" - with io.BytesIO() as out: - Image.fromarray(id2rgb(panoptic_img)).save(out, format="PNG") - segments_info = [self._convert_category_id(x) for x in segments_info] - self._predictions.append( - { - "image_id": input["image_id"], - "file_name": file_name_png, - "png_string": out.getvalue(), - "segments_info": segments_info, - } - ) - - def evaluate(self): - comm.synchronize() - - self._predictions = comm.gather(self._predictions) - self._predictions = list(itertools.chain(*self._predictions)) - if not comm.is_main_process(): - return - - # PanopticApi requires local files - gt_json = PathManager.get_local_path(self._metadata.panoptic_json) - gt_folder = PathManager.get_local_path(self._metadata.panoptic_root) - - with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir: - logger.info("Writing all panoptic predictions to {} ...".format(pred_dir)) - for p in self._predictions: - with open(os.path.join(pred_dir, p["file_name"]), "wb") as f: - f.write(p.pop("png_string")) - - with open(gt_json, "r") as f: - json_data = json.load(f) - json_data["annotations"] = self._predictions - - output_dir = self._output_dir or pred_dir - predictions_json = os.path.join(output_dir, "predictions.json") - with PathManager.open(predictions_json, "w") as f: - f.write(json.dumps(json_data)) - - from panopticapi.evaluation import pq_compute - - with contextlib.redirect_stdout(io.StringIO()): - pq_res = pq_compute( - gt_json, - PathManager.get_local_path(predictions_json), - gt_folder=gt_folder, - pred_folder=pred_dir, - ) - - res = {} - res["PQ"] = 100 * pq_res["All"]["pq"] - res["SQ"] = 100 * pq_res["All"]["sq"] - res["RQ"] = 100 * pq_res["All"]["rq"] - res["PQ_th"] = 100 * pq_res["Things"]["pq"] - res["SQ_th"] = 100 * pq_res["Things"]["sq"] - res["RQ_th"] = 100 * pq_res["Things"]["rq"] - res["PQ_st"] = 100 * pq_res["Stuff"]["pq"] - res["SQ_st"] = 100 * pq_res["Stuff"]["sq"] - res["RQ_st"] = 100 * pq_res["Stuff"]["rq"] - - results = OrderedDict({"panoptic_seg": res}) - _print_panoptic_results(pq_res) - - return results - - -def _print_panoptic_results(pq_res): - headers = ["", "PQ", "SQ", "RQ", "#categories"] - data = [] - for name in ["All", "Things", "Stuff"]: - row = [name] + [pq_res[name][k] * 100 for k in ["pq", "sq", "rq"]] + [pq_res[name]["n"]] - data.append(row) - table = tabulate( - data, headers=headers, tablefmt="pipe", floatfmt=".3f", stralign="center", numalign="center" - ) - logger.info("Panoptic Evaluation Results:\n" + table) - - -if __name__ == "__main__": - from detectron2.utils.logger import setup_logger - - logger = setup_logger() - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument("--gt-json") - parser.add_argument("--gt-dir") - parser.add_argument("--pred-json") - parser.add_argument("--pred-dir") - args = parser.parse_args() - - from panopticapi.evaluation import pq_compute - - with contextlib.redirect_stdout(io.StringIO()): - pq_res = pq_compute( - args.gt_json, args.pred_json, gt_folder=args.gt_dir, pred_folder=args.pred_dir - ) - _print_panoptic_results(pq_res) diff --git a/detectron2/detectron2/evaluation/pascal_voc_evaluation.py b/detectron2/detectron2/evaluation/pascal_voc_evaluation.py deleted file mode 100644 index 88bb42e6f75f5f0faa4b774ddf16938477a37d2b..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/evaluation/pascal_voc_evaluation.py +++ /dev/null @@ -1,300 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import logging -import numpy as np -import os -import tempfile -import xml.etree.ElementTree as ET -from collections import OrderedDict, defaultdict -from functools import lru_cache -import torch - -from detectron2.data import MetadataCatalog -from detectron2.utils import comm -from detectron2.utils.file_io import PathManager - -from .evaluator import DatasetEvaluator - - -class PascalVOCDetectionEvaluator(DatasetEvaluator): - """ - Evaluate Pascal VOC style AP for Pascal VOC dataset. - It contains a synchronization, therefore has to be called from all ranks. - - Note that the concept of AP can be implemented in different ways and may not - produce identical results. This class mimics the implementation of the official - Pascal VOC Matlab API, and should produce similar but not identical results to the - official API. - """ - - def __init__(self, dataset_name): - """ - Args: - dataset_name (str): name of the dataset, e.g., "voc_2007_test" - """ - self._dataset_name = dataset_name - meta = MetadataCatalog.get(dataset_name) - - # Too many tiny files, download all to local for speed. - annotation_dir_local = PathManager.get_local_path( - os.path.join(meta.dirname, "Annotations/") - ) - self._anno_file_template = os.path.join(annotation_dir_local, "{}.xml") - self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt") - self._class_names = meta.thing_classes - assert meta.year in [2007, 2012], meta.year - self._is_2007 = meta.year == 2007 - self._cpu_device = torch.device("cpu") - self._logger = logging.getLogger(__name__) - - def reset(self): - self._predictions = defaultdict(list) # class name -> list of prediction strings - - def process(self, inputs, outputs): - for input, output in zip(inputs, outputs): - image_id = input["image_id"] - instances = output["instances"].to(self._cpu_device) - boxes = instances.pred_boxes.tensor.numpy() - scores = instances.scores.tolist() - classes = instances.pred_classes.tolist() - for box, score, cls in zip(boxes, scores, classes): - xmin, ymin, xmax, ymax = box - # The inverse of data loading logic in `datasets/pascal_voc.py` - xmin += 1 - ymin += 1 - self._predictions[cls].append( - f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}" - ) - - def evaluate(self): - """ - Returns: - dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". - """ - all_predictions = comm.gather(self._predictions, dst=0) - if not comm.is_main_process(): - return - predictions = defaultdict(list) - for predictions_per_rank in all_predictions: - for clsid, lines in predictions_per_rank.items(): - predictions[clsid].extend(lines) - del all_predictions - - self._logger.info( - "Evaluating {} using {} metric. " - "Note that results do not use the official Matlab API.".format( - self._dataset_name, 2007 if self._is_2007 else 2012 - ) - ) - - with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: - res_file_template = os.path.join(dirname, "{}.txt") - - aps = defaultdict(list) # iou -> ap per class - for cls_id, cls_name in enumerate(self._class_names): - lines = predictions.get(cls_id, [""]) - - with open(res_file_template.format(cls_name), "w") as f: - f.write("\n".join(lines)) - - for thresh in range(50, 100, 5): - rec, prec, ap = voc_eval( - res_file_template, - self._anno_file_template, - self._image_set_path, - cls_name, - ovthresh=thresh / 100.0, - use_07_metric=self._is_2007, - ) - aps[thresh].append(ap * 100) - - ret = OrderedDict() - mAP = {iou: np.mean(x) for iou, x in aps.items()} - ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]} - return ret - - -############################################################################## -# -# Below code is modified from -# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py -# -------------------------------------------------------- -# Fast/er R-CNN -# Licensed under The MIT License [see LICENSE for details] -# Written by Bharath Hariharan -# -------------------------------------------------------- - -"""Python implementation of the PASCAL VOC devkit's AP evaluation code.""" - - -@lru_cache(maxsize=None) -def parse_rec(filename): - """Parse a PASCAL VOC xml file.""" - with PathManager.open(filename) as f: - tree = ET.parse(f) - objects = [] - for obj in tree.findall("object"): - obj_struct = {} - obj_struct["name"] = obj.find("name").text - obj_struct["pose"] = obj.find("pose").text - obj_struct["truncated"] = int(obj.find("truncated").text) - obj_struct["difficult"] = int(obj.find("difficult").text) - bbox = obj.find("bndbox") - obj_struct["bbox"] = [ - int(bbox.find("xmin").text), - int(bbox.find("ymin").text), - int(bbox.find("xmax").text), - int(bbox.find("ymax").text), - ] - objects.append(obj_struct) - - return objects - - -def voc_ap(rec, prec, use_07_metric=False): - """Compute VOC AP given precision and recall. If use_07_metric is true, uses - the VOC 07 11-point method (default:False). - """ - if use_07_metric: - # 11 point metric - ap = 0.0 - for t in np.arange(0.0, 1.1, 0.1): - if np.sum(rec >= t) == 0: - p = 0 - else: - p = np.max(prec[rec >= t]) - ap = ap + p / 11.0 - else: - # correct AP calculation - # first append sentinel values at the end - mrec = np.concatenate(([0.0], rec, [1.0])) - mpre = np.concatenate(([0.0], prec, [0.0])) - - # compute the precision envelope - for i in range(mpre.size - 1, 0, -1): - mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) - - # to calculate area under PR curve, look for points - # where X axis (recall) changes value - i = np.where(mrec[1:] != mrec[:-1])[0] - - # and sum (\Delta recall) * prec - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) - return ap - - -def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False): - """rec, prec, ap = voc_eval(detpath, - annopath, - imagesetfile, - classname, - [ovthresh], - [use_07_metric]) - - Top level function that does the PASCAL VOC evaluation. - - detpath: Path to detections - detpath.format(classname) should produce the detection results file. - annopath: Path to annotations - annopath.format(imagename) should be the xml annotations file. - imagesetfile: Text file containing the list of images, one image per line. - classname: Category name (duh) - [ovthresh]: Overlap threshold (default = 0.5) - [use_07_metric]: Whether to use VOC07's 11 point AP computation - (default False) - """ - # assumes detections are in detpath.format(classname) - # assumes annotations are in annopath.format(imagename) - # assumes imagesetfile is a text file with each line an image name - - # first load gt - # read list of images - with PathManager.open(imagesetfile, "r") as f: - lines = f.readlines() - imagenames = [x.strip() for x in lines] - - # load annots - recs = {} - for imagename in imagenames: - recs[imagename] = parse_rec(annopath.format(imagename)) - - # extract gt objects for this class - class_recs = {} - npos = 0 - for imagename in imagenames: - R = [obj for obj in recs[imagename] if obj["name"] == classname] - bbox = np.array([x["bbox"] for x in R]) - difficult = np.array([x["difficult"] for x in R]).astype(bool) - # difficult = np.array([False for x in R]).astype(bool) # treat all "difficult" as GT - det = [False] * len(R) - npos = npos + sum(~difficult) - class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det} - - # read dets - detfile = detpath.format(classname) - with open(detfile, "r") as f: - lines = f.readlines() - - splitlines = [x.strip().split(" ") for x in lines] - image_ids = [x[0] for x in splitlines] - confidence = np.array([float(x[1]) for x in splitlines]) - BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4) - - # sort by confidence - sorted_ind = np.argsort(-confidence) - BB = BB[sorted_ind, :] - image_ids = [image_ids[x] for x in sorted_ind] - - # go down dets and mark TPs and FPs - nd = len(image_ids) - tp = np.zeros(nd) - fp = np.zeros(nd) - for d in range(nd): - R = class_recs[image_ids[d]] - bb = BB[d, :].astype(float) - ovmax = -np.inf - BBGT = R["bbox"].astype(float) - - if BBGT.size > 0: - # compute overlaps - # intersection - ixmin = np.maximum(BBGT[:, 0], bb[0]) - iymin = np.maximum(BBGT[:, 1], bb[1]) - ixmax = np.minimum(BBGT[:, 2], bb[2]) - iymax = np.minimum(BBGT[:, 3], bb[3]) - iw = np.maximum(ixmax - ixmin + 1.0, 0.0) - ih = np.maximum(iymax - iymin + 1.0, 0.0) - inters = iw * ih - - # union - uni = ( - (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0) - + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0) - - inters - ) - - overlaps = inters / uni - ovmax = np.max(overlaps) - jmax = np.argmax(overlaps) - - if ovmax > ovthresh: - if not R["difficult"][jmax]: - if not R["det"][jmax]: - tp[d] = 1.0 - R["det"][jmax] = 1 - else: - fp[d] = 1.0 - else: - fp[d] = 1.0 - - # compute precision recall - fp = np.cumsum(fp) - tp = np.cumsum(tp) - rec = tp / float(npos) - # avoid divide by zero in case the first detection matches a difficult - # ground truth - prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) - ap = voc_ap(rec, prec, use_07_metric) - - return rec, prec, ap diff --git a/detectron2/detectron2/evaluation/rotated_coco_evaluation.py b/detectron2/detectron2/evaluation/rotated_coco_evaluation.py deleted file mode 100644 index a6c53def95f441243ae98ec17121a75d7002870e..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/evaluation/rotated_coco_evaluation.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import itertools -import json -import numpy as np -import os -import torch -from pycocotools.cocoeval import COCOeval, maskUtils - -from detectron2.structures import BoxMode, RotatedBoxes, pairwise_iou_rotated -from detectron2.utils.file_io import PathManager - -from .coco_evaluation import COCOEvaluator - - -class RotatedCOCOeval(COCOeval): - @staticmethod - def is_rotated(box_list): - if type(box_list) is np.ndarray: - return box_list.shape[1] == 5 - elif type(box_list) is list: - if box_list == []: # cannot decide the box_dim - return False - return np.all( - np.array( - [ - (len(obj) == 5) and ((type(obj) is list) or (type(obj) is np.ndarray)) - for obj in box_list - ] - ) - ) - return False - - @staticmethod - def boxlist_to_tensor(boxlist, output_box_dim): - if type(boxlist) is np.ndarray: - box_tensor = torch.from_numpy(boxlist) - elif type(boxlist) is list: - if boxlist == []: - return torch.zeros((0, output_box_dim), dtype=torch.float32) - else: - box_tensor = torch.FloatTensor(boxlist) - else: - raise Exception("Unrecognized boxlist type") - - input_box_dim = box_tensor.shape[1] - if input_box_dim != output_box_dim: - if input_box_dim == 4 and output_box_dim == 5: - box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS) - else: - raise Exception( - "Unable to convert from {}-dim box to {}-dim box".format( - input_box_dim, output_box_dim - ) - ) - return box_tensor - - def compute_iou_dt_gt(self, dt, gt, is_crowd): - if self.is_rotated(dt) or self.is_rotated(gt): - # TODO: take is_crowd into consideration - assert all(c == 0 for c in is_crowd) - dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5)) - gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5)) - return pairwise_iou_rotated(dt, gt) - else: - # This is the same as the classical COCO evaluation - return maskUtils.iou(dt, gt, is_crowd) - - def computeIoU(self, imgId: int, catId: int): - p = self.params - if p.useCats: - gt = self._gts[imgId, catId] - dt = self._dts[imgId, catId] - else: - gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] - dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] - - if len(gt) == 0 or len(dt) == 0: - return [] - - inds = np.argsort([-d["score"] for d in dt], kind="mergesort") - dt = [dt[i] for i in inds] - if len(dt) > p.maxDets[-1]: - dt = dt[0 : p.maxDets[-1]] - - assert p.iouType == "bbox", "unsupported iouType for iou computation" - - g = [g["bbox"] for g in gt] - d = [d["bbox"] for d in dt] - - # compute iou between each dt and gt region - iscrowd = [int(o["iscrowd"]) for o in gt] - - # Note: this function is copied from cocoeval.py in cocoapi - # and the major difference is here. - ious = self.compute_iou_dt_gt(d, g, iscrowd) - return ious - - -class RotatedCOCOEvaluator(COCOEvaluator): - """ - Evaluate object proposal/instance detection outputs using COCO-like metrics and APIs, - with rotated boxes support. - Note: this uses IOU only and does not consider angle differences. - """ - - def process(self, inputs, outputs): - """ - Args: - inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). - It is a list of dict. Each dict corresponds to an image and - contains keys like "height", "width", "file_name", "image_id". - outputs: the outputs of a COCO model. It is a list of dicts with key - "instances" that contains :class:`Instances`. - """ - for input, output in zip(inputs, outputs): - prediction = {"image_id": input["image_id"]} - - if "instances" in output: - instances = output["instances"].to(self._cpu_device) - - prediction["instances"] = self.instances_to_json(instances, input["image_id"]) - if "proposals" in output: - prediction["proposals"] = output["proposals"].to(self._cpu_device) - self._predictions.append(prediction) - - def instances_to_json(self, instances, img_id): - num_instance = len(instances) - if num_instance == 0: - return [] - - boxes = instances.pred_boxes.tensor.numpy() - if boxes.shape[1] == 4: - boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - boxes = boxes.tolist() - scores = instances.scores.tolist() - classes = instances.pred_classes.tolist() - - results = [] - for k in range(num_instance): - result = { - "image_id": img_id, - "category_id": classes[k], - "bbox": boxes[k], - "score": scores[k], - } - - results.append(result) - return results - - def _eval_predictions(self, predictions, img_ids=None): # img_ids: unused - """ - Evaluate predictions on the given tasks. - Fill self._results with the metrics of the tasks. - """ - self._logger.info("Preparing results for COCO format ...") - coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) - - # unmap the category ids for COCO - if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): - reverse_id_mapping = { - v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() - } - for result in coco_results: - result["category_id"] = reverse_id_mapping[result["category_id"]] - - if self._output_dir: - file_path = os.path.join(self._output_dir, "coco_instances_results.json") - self._logger.info("Saving results to {}".format(file_path)) - with PathManager.open(file_path, "w") as f: - f.write(json.dumps(coco_results)) - f.flush() - - if not self._do_evaluation: - self._logger.info("Annotations are not available for evaluation.") - return - - self._logger.info("Evaluating predictions ...") - - assert self._tasks is None or set(self._tasks) == { - "bbox" - }, "[RotatedCOCOEvaluator] Only bbox evaluation is supported" - coco_eval = ( - self._evaluate_predictions_on_coco(self._coco_api, coco_results) - if len(coco_results) > 0 - else None # cocoapi does not handle empty results very well - ) - - task = "bbox" - res = self._derive_coco_results( - coco_eval, task, class_names=self._metadata.get("thing_classes") - ) - self._results[task] = res - - def _evaluate_predictions_on_coco(self, coco_gt, coco_results): - """ - Evaluate the coco results using COCOEval API. - """ - assert len(coco_results) > 0 - - coco_dt = coco_gt.loadRes(coco_results) - - # Only bbox is supported for now - coco_eval = RotatedCOCOeval(coco_gt, coco_dt, iouType="bbox") - - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - - return coco_eval diff --git a/detectron2/detectron2/evaluation/sem_seg_evaluation.py b/detectron2/detectron2/evaluation/sem_seg_evaluation.py deleted file mode 100644 index 582ddcc3490da38db0169369816bcf776fa75845..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/evaluation/sem_seg_evaluation.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import itertools -import json -import logging -import numpy as np -import os -from collections import OrderedDict -from typing import Optional, Union -import pycocotools.mask as mask_util -import torch -from PIL import Image - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.utils.comm import all_gather, is_main_process, synchronize -from detectron2.utils.file_io import PathManager - -from .evaluator import DatasetEvaluator - -_CV2_IMPORTED = True -try: - import cv2 # noqa -except ImportError: - # OpenCV is an optional dependency at the moment - _CV2_IMPORTED = False - - -def load_image_into_numpy_array( - filename: str, - dtype: Optional[Union[np.dtype, str]] = None, -) -> np.ndarray: - with PathManager.open(filename, "rb") as f: - array = np.asarray(Image.open(f), dtype=dtype) - return array - - -class SemSegEvaluator(DatasetEvaluator): - """ - Evaluate semantic segmentation metrics. - """ - - def __init__( - self, - dataset_name, - distributed=True, - output_dir=None, - *, - sem_seg_loading_fn=load_image_into_numpy_array, - num_classes=None, - ignore_label=None, - ): - """ - Args: - dataset_name (str): name of the dataset to be evaluated. - distributed (bool): if True, will collect results from all ranks for evaluation. - Otherwise, will evaluate the results in the current process. - output_dir (str): an output directory to dump results. - sem_seg_loading_fn: function to read sem seg file and load into numpy array. - Default provided, but projects can customize. - num_classes, ignore_label: deprecated argument - """ - self._logger = logging.getLogger(__name__) - if num_classes is not None: - self._logger.warn( - "SemSegEvaluator(num_classes) is deprecated! It should be obtained from metadata." - ) - if ignore_label is not None: - self._logger.warn( - "SemSegEvaluator(ignore_label) is deprecated! It should be obtained from metadata." - ) - self._dataset_name = dataset_name - self._distributed = distributed - self._output_dir = output_dir - - self._cpu_device = torch.device("cpu") - - self.input_file_to_gt_file = { - dataset_record["file_name"]: dataset_record["sem_seg_file_name"] - for dataset_record in DatasetCatalog.get(dataset_name) - } - - meta = MetadataCatalog.get(dataset_name) - # Dict that maps contiguous training ids to COCO category ids - try: - c2d = meta.stuff_dataset_id_to_contiguous_id - self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()} - except AttributeError: - self._contiguous_id_to_dataset_id = None - self._class_names = meta.stuff_classes - self.sem_seg_loading_fn = sem_seg_loading_fn - self._num_classes = len(meta.stuff_classes) - if num_classes is not None: - assert self._num_classes == num_classes, f"{self._num_classes} != {num_classes}" - self._ignore_label = ignore_label if ignore_label is not None else meta.ignore_label - - # This is because cv2.erode did not work for int datatype. Only works for uint8. - self._compute_boundary_iou = True - if not _CV2_IMPORTED: - self._compute_boundary_iou = False - self._logger.warn( - """Boundary IoU calculation requires OpenCV. B-IoU metrics are - not going to be computed because OpenCV is not available to import.""" - ) - if self._num_classes >= np.iinfo(np.uint8).max: - self._compute_boundary_iou = False - self._logger.warn( - f"""SemSegEvaluator(num_classes) is more than supported value for Boundary IoU - calculation! B-IoU metrics are not going to be computed. Max allowed value - (exclusive) for num_classes for calculating Boundary IoU is. - {np.iinfo(np.uint8).max} The number of classes of dataset {self._dataset_name} is - {self._num_classes}""" - ) - - def reset(self): - self._conf_matrix = np.zeros((self._num_classes + 1, self._num_classes + 1), dtype=np.int64) - self._b_conf_matrix = np.zeros( - (self._num_classes + 1, self._num_classes + 1), dtype=np.int64 - ) - self._predictions = [] - - def process(self, inputs, outputs): - """ - Args: - inputs: the inputs to a model. - It is a list of dicts. Each dict corresponds to an image and - contains keys like "height", "width", "file_name". - outputs: the outputs of a model. It is either list of semantic segmentation predictions - (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic - segmentation prediction in the same format. - """ - for input, output in zip(inputs, outputs): - output = output["sem_seg"].argmax(dim=0).to(self._cpu_device) - pred = np.array(output, dtype=int) - gt_filename = self.input_file_to_gt_file[input["file_name"]] - gt = self.sem_seg_loading_fn(gt_filename, dtype=int) - - gt[gt == self._ignore_label] = self._num_classes - - self._conf_matrix += np.bincount( - (self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1), - minlength=self._conf_matrix.size, - ).reshape(self._conf_matrix.shape) - - if self._compute_boundary_iou: - b_gt = self._mask_to_boundary(gt.astype(np.uint8)) - b_pred = self._mask_to_boundary(pred.astype(np.uint8)) - - self._b_conf_matrix += np.bincount( - (self._num_classes + 1) * b_pred.reshape(-1) + b_gt.reshape(-1), - minlength=self._conf_matrix.size, - ).reshape(self._conf_matrix.shape) - - self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"])) - - def evaluate(self): - """ - Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): - - * Mean intersection-over-union averaged across classes (mIoU) - * Frequency Weighted IoU (fwIoU) - * Mean pixel accuracy averaged across classes (mACC) - * Pixel Accuracy (pACC) - """ - if self._distributed: - synchronize() - conf_matrix_list = all_gather(self._conf_matrix) - b_conf_matrix_list = all_gather(self._b_conf_matrix) - self._predictions = all_gather(self._predictions) - self._predictions = list(itertools.chain(*self._predictions)) - if not is_main_process(): - return - - self._conf_matrix = np.zeros_like(self._conf_matrix) - for conf_matrix in conf_matrix_list: - self._conf_matrix += conf_matrix - - self._b_conf_matrix = np.zeros_like(self._b_conf_matrix) - for b_conf_matrix in b_conf_matrix_list: - self._b_conf_matrix += b_conf_matrix - - if self._output_dir: - PathManager.mkdirs(self._output_dir) - file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") - with PathManager.open(file_path, "w") as f: - f.write(json.dumps(self._predictions)) - - acc = np.full(self._num_classes, np.nan, dtype=float) - iou = np.full(self._num_classes, np.nan, dtype=float) - tp = self._conf_matrix.diagonal()[:-1].astype(float) - pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(float) - class_weights = pos_gt / np.sum(pos_gt) - pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(float) - acc_valid = pos_gt > 0 - acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] - union = pos_gt + pos_pred - tp - iou_valid = np.logical_and(acc_valid, union > 0) - iou[iou_valid] = tp[iou_valid] / union[iou_valid] - macc = np.sum(acc[acc_valid]) / np.sum(acc_valid) - miou = np.sum(iou[iou_valid]) / np.sum(iou_valid) - fiou = np.sum(iou[iou_valid] * class_weights[iou_valid]) - pacc = np.sum(tp) / np.sum(pos_gt) - - if self._compute_boundary_iou: - b_iou = np.full(self._num_classes, np.nan, dtype=float) - b_tp = self._b_conf_matrix.diagonal()[:-1].astype(float) - b_pos_gt = np.sum(self._b_conf_matrix[:-1, :-1], axis=0).astype(float) - b_pos_pred = np.sum(self._b_conf_matrix[:-1, :-1], axis=1).astype(float) - b_union = b_pos_gt + b_pos_pred - b_tp - b_iou_valid = b_union > 0 - b_iou[b_iou_valid] = b_tp[b_iou_valid] / b_union[b_iou_valid] - - res = {} - res["mIoU"] = 100 * miou - res["fwIoU"] = 100 * fiou - for i, name in enumerate(self._class_names): - res[f"IoU-{name}"] = 100 * iou[i] - if self._compute_boundary_iou: - res[f"BoundaryIoU-{name}"] = 100 * b_iou[i] - res[f"min(IoU, B-Iou)-{name}"] = 100 * min(iou[i], b_iou[i]) - res["mACC"] = 100 * macc - res["pACC"] = 100 * pacc - for i, name in enumerate(self._class_names): - res[f"ACC-{name}"] = 100 * acc[i] - - if self._output_dir: - file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") - with PathManager.open(file_path, "wb") as f: - torch.save(res, f) - results = OrderedDict({"sem_seg": res}) - self._logger.info(results) - return results - - def encode_json_sem_seg(self, sem_seg, input_file_name): - """ - Convert semantic segmentation to COCO stuff format with segments encoded as RLEs. - See http://cocodataset.org/#format-results - """ - json_list = [] - for label in np.unique(sem_seg): - if self._contiguous_id_to_dataset_id is not None: - assert ( - label in self._contiguous_id_to_dataset_id - ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name) - dataset_id = self._contiguous_id_to_dataset_id[label] - else: - dataset_id = int(label) - mask = (sem_seg == label).astype(np.uint8) - mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0] - mask_rle["counts"] = mask_rle["counts"].decode("utf-8") - json_list.append( - {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle} - ) - return json_list - - def _mask_to_boundary(self, mask: np.ndarray, dilation_ratio=0.02): - assert mask.ndim == 2, "mask_to_boundary expects a 2-dimensional image" - h, w = mask.shape - diag_len = np.sqrt(h**2 + w**2) - dilation = max(1, int(round(dilation_ratio * diag_len))) - kernel = np.ones((3, 3), dtype=np.uint8) - - padded_mask = cv2.copyMakeBorder(mask, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=0) - eroded_mask_with_padding = cv2.erode(padded_mask, kernel, iterations=dilation) - eroded_mask = eroded_mask_with_padding[1:-1, 1:-1] - boundary = mask - eroded_mask - return boundary diff --git a/detectron2/detectron2/evaluation/testing.py b/detectron2/detectron2/evaluation/testing.py deleted file mode 100644 index 9e5ae625bb0593fc20739dd3ea549157e4df4f3d..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/evaluation/testing.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import numpy as np -import pprint -import sys -from collections.abc import Mapping - - -def print_csv_format(results): - """ - Print main metrics in a format similar to Detectron, - so that they are easy to copypaste into a spreadsheet. - - Args: - results (OrderedDict[dict]): task_name -> {metric -> score} - unordered dict can also be printed, but in arbitrary order - """ - assert isinstance(results, Mapping) or not len(results), results - logger = logging.getLogger(__name__) - for task, res in results.items(): - if isinstance(res, Mapping): - # Don't print "AP-category" metrics since they are usually not tracked. - important_res = [(k, v) for k, v in res.items() if "-" not in k] - logger.info("copypaste: Task: {}".format(task)) - logger.info("copypaste: " + ",".join([k[0] for k in important_res])) - logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res])) - else: - logger.info(f"copypaste: {task}={res}") - - -def verify_results(cfg, results): - """ - Args: - results (OrderedDict[dict]): task_name -> {metric -> score} - - Returns: - bool: whether the verification succeeds or not - """ - expected_results = cfg.TEST.EXPECTED_RESULTS - if not len(expected_results): - return True - - ok = True - for task, metric, expected, tolerance in expected_results: - actual = results[task].get(metric, None) - if actual is None: - ok = False - continue - if not np.isfinite(actual): - ok = False - continue - diff = abs(actual - expected) - if diff > tolerance: - ok = False - - logger = logging.getLogger(__name__) - if not ok: - logger.error("Result verification failed!") - logger.error("Expected Results: " + str(expected_results)) - logger.error("Actual Results: " + pprint.pformat(results)) - - sys.exit(1) - else: - logger.info("Results verification passed.") - return ok - - -def flatten_results_dict(results): - """ - Expand a hierarchical dict of scalars into a flat dict of scalars. - If results[k1][k2][k3] = v, the returned dict will have the entry - {"k1/k2/k3": v}. - - Args: - results (dict): - """ - r = {} - for k, v in results.items(): - if isinstance(v, Mapping): - v = flatten_results_dict(v) - for kk, vv in v.items(): - r[k + "/" + kk] = vv - else: - r[k] = v - return r diff --git a/detectron2/detectron2/export/README.md b/detectron2/detectron2/export/README.md deleted file mode 100644 index c86ff62516f4e8e4b1a6c1f33f11192933cf3861..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/README.md +++ /dev/null @@ -1,15 +0,0 @@ - -This directory contains code to prepare a detectron2 model for deployment. -Currently it supports exporting a detectron2 model to TorchScript, ONNX, or (deprecated) Caffe2 format. - -Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage. - - -### Acknowledgements - -Thanks to Mobile Vision team at Facebook for developing the Caffe2 conversion tools. - -Thanks to Computing Platform Department - PAI team at Alibaba Group (@bddpqq, @chenbohua3) who -help export Detectron2 models to TorchScript. - -Thanks to ONNX Converter team at Microsoft who help export Detectron2 models to ONNX. diff --git a/detectron2/detectron2/export/__init__.py b/detectron2/detectron2/export/__init__.py deleted file mode 100644 index 5a58758f64aae6071fa688be4400622ce6036efa..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- - -import warnings - -from .flatten import TracingAdapter -from .torchscript import dump_torchscript_IR, scripting_with_instances - -try: - from caffe2.proto import caffe2_pb2 as _tmp - from caffe2.python import core - - # caffe2 is optional -except ImportError: - pass -else: - from .api import * - - -# TODO: Update ONNX Opset version and run tests when a newer PyTorch is supported -STABLE_ONNX_OPSET_VERSION = 11 - - -def add_export_config(cfg): - warnings.warn( - "add_export_config has been deprecated and behaves as no-op function.", DeprecationWarning - ) - return cfg - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/detectron2/detectron2/export/api.py b/detectron2/detectron2/export/api.py deleted file mode 100644 index 1a272fed929217f18e04f731365f4bf7472110fc..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/api.py +++ /dev/null @@ -1,230 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import logging -import os -import torch -from caffe2.proto import caffe2_pb2 -from torch import nn - -from detectron2.config import CfgNode -from detectron2.utils.file_io import PathManager - -from .caffe2_inference import ProtobufDetectionModel -from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format -from .shared import get_pb_arg_vali, get_pb_arg_vals, save_graph - -__all__ = [ - "Caffe2Model", - "Caffe2Tracer", -] - - -class Caffe2Tracer: - """ - Make a detectron2 model traceable with Caffe2 operators. - This class creates a traceable version of a detectron2 model which: - - 1. Rewrite parts of the model using ops in Caffe2. Note that some ops do - not have GPU implementation in Caffe2. - 2. Remove post-processing and only produce raw layer outputs - - After making a traceable model, the class provide methods to export such a - model to different deployment formats. - Exported graph produced by this class take two input tensors: - - 1. (1, C, H, W) float "data" which is an image (usually in [0, 255]). - (H, W) often has to be padded to multiple of 32 (depend on the model - architecture). - 2. 1x3 float "im_info", each row of which is (height, width, 1.0). - Height and width are true image shapes before padding. - - The class currently only supports models using builtin meta architectures. - Batch inference is not supported, and contributions are welcome. - """ - - def __init__(self, cfg: CfgNode, model: nn.Module, inputs): - """ - Args: - cfg (CfgNode): a detectron2 config used to construct caffe2-compatible model. - model (nn.Module): An original pytorch model. Must be among a few official models - in detectron2 that can be converted to become caffe2-compatible automatically. - Weights have to be already loaded to this model. - inputs: sample inputs that the given model takes for inference. - Will be used to trace the model. For most models, random inputs with - no detected objects will not work as they lead to wrong traces. - """ - assert isinstance(cfg, CfgNode), cfg - assert isinstance(model, torch.nn.Module), type(model) - - # TODO make it support custom models, by passing in c2 model directly - C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[cfg.MODEL.META_ARCHITECTURE] - self.traceable_model = C2MetaArch(cfg, copy.deepcopy(model)) - self.inputs = inputs - self.traceable_inputs = self.traceable_model.get_caffe2_inputs(inputs) - - def export_caffe2(self): - """ - Export the model to Caffe2's protobuf format. - The returned object can be saved with its :meth:`.save_protobuf()` method. - The result can be loaded and executed using Caffe2 runtime. - - Returns: - :class:`Caffe2Model` - """ - from .caffe2_export import export_caffe2_detection_model - - predict_net, init_net = export_caffe2_detection_model( - self.traceable_model, self.traceable_inputs - ) - return Caffe2Model(predict_net, init_net) - - def export_onnx(self): - """ - Export the model to ONNX format. - Note that the exported model contains custom ops only available in caffe2, therefore it - cannot be directly executed by other runtime (such as onnxruntime or TensorRT). - Post-processing or transformation passes may be applied on the model to accommodate - different runtimes, but we currently do not provide support for them. - - Returns: - onnx.ModelProto: an onnx model. - """ - from .caffe2_export import export_onnx_model as export_onnx_model_impl - - return export_onnx_model_impl(self.traceable_model, (self.traceable_inputs,)) - - def export_torchscript(self): - """ - Export the model to a ``torch.jit.TracedModule`` by tracing. - The returned object can be saved to a file by ``.save()``. - - Returns: - torch.jit.TracedModule: a torch TracedModule - """ - logger = logging.getLogger(__name__) - logger.info("Tracing the model with torch.jit.trace ...") - with torch.no_grad(): - return torch.jit.trace(self.traceable_model, (self.traceable_inputs,)) - - -class Caffe2Model(nn.Module): - """ - A wrapper around the traced model in Caffe2's protobuf format. - The exported graph has different inputs/outputs from the original Pytorch - model, as explained in :class:`Caffe2Tracer`. This class wraps around the - exported graph to simulate the same interface as the original Pytorch model. - It also provides functions to save/load models in Caffe2's format.' - - Examples: - :: - c2_model = Caffe2Tracer(cfg, torch_model, inputs).export_caffe2() - inputs = [{"image": img_tensor_CHW}] - outputs = c2_model(inputs) - orig_outputs = torch_model(inputs) - """ - - def __init__(self, predict_net, init_net): - super().__init__() - self.eval() # always in eval mode - self._predict_net = predict_net - self._init_net = init_net - self._predictor = None - - __init__.__HIDE_SPHINX_DOC__ = True - - @property - def predict_net(self): - """ - caffe2.core.Net: the underlying caffe2 predict net - """ - return self._predict_net - - @property - def init_net(self): - """ - caffe2.core.Net: the underlying caffe2 init net - """ - return self._init_net - - def save_protobuf(self, output_dir): - """ - Save the model as caffe2's protobuf format. - It saves the following files: - - * "model.pb": definition of the graph. Can be visualized with - tools like `netron `_. - * "model_init.pb": model parameters - * "model.pbtxt": human-readable definition of the graph. Not - needed for deployment. - - Args: - output_dir (str): the output directory to save protobuf files. - """ - logger = logging.getLogger(__name__) - logger.info("Saving model to {} ...".format(output_dir)) - if not PathManager.exists(output_dir): - PathManager.mkdirs(output_dir) - - with PathManager.open(os.path.join(output_dir, "model.pb"), "wb") as f: - f.write(self._predict_net.SerializeToString()) - with PathManager.open(os.path.join(output_dir, "model.pbtxt"), "w") as f: - f.write(str(self._predict_net)) - with PathManager.open(os.path.join(output_dir, "model_init.pb"), "wb") as f: - f.write(self._init_net.SerializeToString()) - - def save_graph(self, output_file, inputs=None): - """ - Save the graph as SVG format. - - Args: - output_file (str): a SVG file - inputs: optional inputs given to the model. - If given, the inputs will be used to run the graph to record - shape of every tensor. The shape information will be - saved together with the graph. - """ - from .caffe2_export import run_and_save_graph - - if inputs is None: - save_graph(self._predict_net, output_file, op_only=False) - else: - size_divisibility = get_pb_arg_vali(self._predict_net, "size_divisibility", 0) - device = get_pb_arg_vals(self._predict_net, "device", b"cpu").decode("ascii") - inputs = convert_batched_inputs_to_c2_format(inputs, size_divisibility, device) - inputs = [x.cpu().numpy() for x in inputs] - run_and_save_graph(self._predict_net, self._init_net, inputs, output_file) - - @staticmethod - def load_protobuf(dir): - """ - Args: - dir (str): a directory used to save Caffe2Model with - :meth:`save_protobuf`. - The files "model.pb" and "model_init.pb" are needed. - - Returns: - Caffe2Model: the caffe2 model loaded from this directory. - """ - predict_net = caffe2_pb2.NetDef() - with PathManager.open(os.path.join(dir, "model.pb"), "rb") as f: - predict_net.ParseFromString(f.read()) - - init_net = caffe2_pb2.NetDef() - with PathManager.open(os.path.join(dir, "model_init.pb"), "rb") as f: - init_net.ParseFromString(f.read()) - - return Caffe2Model(predict_net, init_net) - - def __call__(self, inputs): - """ - An interface that wraps around a Caffe2 model and mimics detectron2's models' - input/output format. See details about the format at :doc:`/tutorials/models`. - This is used to compare the outputs of caffe2 model with its original torch model. - - Due to the extra conversion between Pytorch/Caffe2, this method is not meant for - benchmark. Because of the conversion, this method also has dependency - on detectron2 in order to convert to detectron2's output format. - """ - if self._predictor is None: - self._predictor = ProtobufDetectionModel(self._predict_net, self._init_net) - return self._predictor(inputs) diff --git a/detectron2/detectron2/export/c10.py b/detectron2/detectron2/export/c10.py deleted file mode 100644 index 35380af01ab8bd53b870930d2b4d623295511090..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/c10.py +++ /dev/null @@ -1,576 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import math -from typing import Dict -import torch -import torch.nn.functional as F - -from detectron2.layers import ShapeSpec, cat -from detectron2.layers.roi_align_rotated import ROIAlignRotated -from detectron2.modeling import poolers -from detectron2.modeling.proposal_generator import rpn -from detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference -from detectron2.structures import Boxes, ImageList, Instances, Keypoints, RotatedBoxes - -from .shared import alias, to_device - - -""" -This file contains caffe2-compatible implementation of several detectron2 components. -""" - - -class Caffe2Boxes(Boxes): - """ - Representing a list of detectron2.structures.Boxes from minibatch, each box - is represented by a 5d vector (batch index + 4 coordinates), or a 6d vector - (batch index + 5 coordinates) for RotatedBoxes. - """ - - def __init__(self, tensor): - assert isinstance(tensor, torch.Tensor) - assert tensor.dim() == 2 and tensor.size(-1) in [4, 5, 6], tensor.size() - # TODO: make tensor immutable when dim is Nx5 for Boxes, - # and Nx6 for RotatedBoxes? - self.tensor = tensor - - -# TODO clean up this class, maybe just extend Instances -class InstancesList: - """ - Tensor representation of a list of Instances object for a batch of images. - - When dealing with a batch of images with Caffe2 ops, a list of bboxes - (instances) are usually represented by single Tensor with size - (sigma(Ni), 5) or (sigma(Ni), 4) plus a batch split Tensor. This class is - for providing common functions to convert between these two representations. - """ - - def __init__(self, im_info, indices, extra_fields=None): - # [N, 3] -> (H, W, Scale) - self.im_info = im_info - # [N,] -> indice of batch to which the instance belongs - self.indices = indices - # [N, ...] - self.batch_extra_fields = extra_fields or {} - - self.image_size = self.im_info - - def get_fields(self): - """like `get_fields` in the Instances object, - but return each field in tensor representations""" - ret = {} - for k, v in self.batch_extra_fields.items(): - # if isinstance(v, torch.Tensor): - # tensor_rep = v - # elif isinstance(v, (Boxes, Keypoints)): - # tensor_rep = v.tensor - # else: - # raise ValueError("Can't find tensor representation for: {}".format()) - ret[k] = v - return ret - - def has(self, name): - return name in self.batch_extra_fields - - def set(self, name, value): - # len(tensor) is a bad practice that generates ONNX constants during tracing. - # Although not a problem for the `assert` statement below, torch ONNX exporter - # still raises a misleading warning as it does not this call comes from `assert` - if isinstance(value, Boxes): - data_len = value.tensor.shape[0] - elif isinstance(value, torch.Tensor): - data_len = value.shape[0] - else: - data_len = len(value) - if len(self.batch_extra_fields): - # If we are tracing with Dynamo, the check here is needed since len(self) - # represents the number of bounding boxes detected in the image and thus is - # an unbounded SymInt. - if torch._utils.is_compiling(): - torch._check(len(self) == data_len) - assert ( - len(self) == data_len - ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self)) - self.batch_extra_fields[name] = value - - def __getattr__(self, name): - if name not in self.batch_extra_fields: - raise AttributeError("Cannot find field '{}' in the given Instances!".format(name)) - return self.batch_extra_fields[name] - - def __len__(self): - return len(self.indices) - - def flatten(self): - ret = [] - for _, v in self.batch_extra_fields.items(): - if isinstance(v, (Boxes, Keypoints)): - ret.append(v.tensor) - else: - ret.append(v) - return ret - - @staticmethod - def to_d2_instances_list(instances_list): - """ - Convert InstancesList to List[Instances]. The input `instances_list` can - also be a List[Instances], in this case this method is a non-op. - """ - if not isinstance(instances_list, InstancesList): - assert all(isinstance(x, Instances) for x in instances_list) - return instances_list - - ret = [] - for i, info in enumerate(instances_list.im_info): - instances = Instances(torch.Size([int(info[0].item()), int(info[1].item())])) - - ids = instances_list.indices == i - for k, v in instances_list.batch_extra_fields.items(): - if isinstance(v, torch.Tensor): - instances.set(k, v[ids]) - continue - elif isinstance(v, Boxes): - instances.set(k, v[ids, -4:]) - continue - - target_type, tensor_source = v - assert isinstance(tensor_source, torch.Tensor) - assert tensor_source.shape[0] == instances_list.indices.shape[0] - tensor_source = tensor_source[ids] - - if issubclass(target_type, Boxes): - instances.set(k, Boxes(tensor_source[:, -4:])) - elif issubclass(target_type, Keypoints): - instances.set(k, Keypoints(tensor_source)) - elif issubclass(target_type, torch.Tensor): - instances.set(k, tensor_source) - else: - raise ValueError("Can't handle targe type: {}".format(target_type)) - - ret.append(instances) - return ret - - -class Caffe2Compatible: - """ - A model can inherit this class to indicate that it can be traced and deployed with caffe2. - """ - - def _get_tensor_mode(self): - return self._tensor_mode - - def _set_tensor_mode(self, v): - self._tensor_mode = v - - tensor_mode = property(_get_tensor_mode, _set_tensor_mode) - """ - If true, the model expects C2-style tensor only inputs/outputs format. - """ - - -class Caffe2RPN(Caffe2Compatible, rpn.RPN): - @classmethod - def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): - ret = super(Caffe2Compatible, cls).from_config(cfg, input_shape) - assert tuple(cfg.MODEL.RPN.BBOX_REG_WEIGHTS) == (1.0, 1.0, 1.0, 1.0) or tuple( - cfg.MODEL.RPN.BBOX_REG_WEIGHTS - ) == (1.0, 1.0, 1.0, 1.0, 1.0) - return ret - - def _generate_proposals( - self, images, objectness_logits_pred, anchor_deltas_pred, gt_instances=None - ): - assert isinstance(images, ImageList) - if self.tensor_mode: - im_info = images.image_sizes - else: - im_info = torch.tensor([[im_sz[0], im_sz[1], 1.0] for im_sz in images.image_sizes]).to( - images.tensor.device - ) - assert isinstance(im_info, torch.Tensor) - - rpn_rois_list = [] - rpn_roi_probs_list = [] - for scores, bbox_deltas, cell_anchors_tensor, feat_stride in zip( - objectness_logits_pred, - anchor_deltas_pred, - [b for (n, b) in self.anchor_generator.cell_anchors.named_buffers()], - self.anchor_generator.strides, - ): - scores = scores.detach() - bbox_deltas = bbox_deltas.detach() - - rpn_rois, rpn_roi_probs = torch.ops._caffe2.GenerateProposals( - scores, - bbox_deltas, - im_info, - cell_anchors_tensor, - spatial_scale=1.0 / feat_stride, - pre_nms_topN=self.pre_nms_topk[self.training], - post_nms_topN=self.post_nms_topk[self.training], - nms_thresh=self.nms_thresh, - min_size=self.min_box_size, - # correct_transform_coords=True, # deprecated argument - angle_bound_on=True, # Default - angle_bound_lo=-180, - angle_bound_hi=180, - clip_angle_thresh=1.0, # Default - legacy_plus_one=False, - ) - rpn_rois_list.append(rpn_rois) - rpn_roi_probs_list.append(rpn_roi_probs) - - # For FPN in D2, in RPN all proposals from different levels are concated - # together, ranked and picked by top post_nms_topk. Then in ROIPooler - # it calculates level_assignments and calls the RoIAlign from - # the corresponding level. - - if len(objectness_logits_pred) == 1: - rpn_rois = rpn_rois_list[0] - rpn_roi_probs = rpn_roi_probs_list[0] - else: - assert len(rpn_rois_list) == len(rpn_roi_probs_list) - rpn_post_nms_topN = self.post_nms_topk[self.training] - - device = rpn_rois_list[0].device - input_list = [to_device(x, "cpu") for x in (rpn_rois_list + rpn_roi_probs_list)] - - # TODO remove this after confirming rpn_max_level/rpn_min_level - # is not needed in CollectRpnProposals. - feature_strides = list(self.anchor_generator.strides) - rpn_min_level = int(math.log2(feature_strides[0])) - rpn_max_level = int(math.log2(feature_strides[-1])) - assert (rpn_max_level - rpn_min_level + 1) == len( - rpn_rois_list - ), "CollectRpnProposals requires continuous levels" - - rpn_rois = torch.ops._caffe2.CollectRpnProposals( - input_list, - # NOTE: in current implementation, rpn_max_level and rpn_min_level - # are not needed, only the subtraction of two matters and it - # can be infer from the number of inputs. Keep them now for - # consistency. - rpn_max_level=2 + len(rpn_rois_list) - 1, - rpn_min_level=2, - rpn_post_nms_topN=rpn_post_nms_topN, - ) - rpn_rois = to_device(rpn_rois, device) - rpn_roi_probs = [] - - proposals = self.c2_postprocess(im_info, rpn_rois, rpn_roi_probs, self.tensor_mode) - return proposals, {} - - def forward(self, images, features, gt_instances=None): - assert not self.training - features = [features[f] for f in self.in_features] - objectness_logits_pred, anchor_deltas_pred = self.rpn_head(features) - return self._generate_proposals( - images, - objectness_logits_pred, - anchor_deltas_pred, - gt_instances, - ) - - @staticmethod - def c2_postprocess(im_info, rpn_rois, rpn_roi_probs, tensor_mode): - proposals = InstancesList( - im_info=im_info, - indices=rpn_rois[:, 0], - extra_fields={ - "proposal_boxes": Caffe2Boxes(rpn_rois), - "objectness_logits": (torch.Tensor, rpn_roi_probs), - }, - ) - if not tensor_mode: - proposals = InstancesList.to_d2_instances_list(proposals) - else: - proposals = [proposals] - return proposals - - -class Caffe2ROIPooler(Caffe2Compatible, poolers.ROIPooler): - @staticmethod - def c2_preprocess(box_lists): - assert all(isinstance(x, Boxes) for x in box_lists) - if all(isinstance(x, Caffe2Boxes) for x in box_lists): - # input is pure-tensor based - assert len(box_lists) == 1 - pooler_fmt_boxes = box_lists[0].tensor - else: - pooler_fmt_boxes = poolers.convert_boxes_to_pooler_format(box_lists) - return pooler_fmt_boxes - - def forward(self, x, box_lists): - assert not self.training - - pooler_fmt_boxes = self.c2_preprocess(box_lists) - num_level_assignments = len(self.level_poolers) - - if num_level_assignments == 1: - if isinstance(self.level_poolers[0], ROIAlignRotated): - c2_roi_align = torch.ops._caffe2.RoIAlignRotated - aligned = True - else: - c2_roi_align = torch.ops._caffe2.RoIAlign - aligned = self.level_poolers[0].aligned - - x0 = x[0] - if x0.is_quantized: - x0 = x0.dequantize() - - out = c2_roi_align( - x0, - pooler_fmt_boxes, - order="NCHW", - spatial_scale=float(self.level_poolers[0].spatial_scale), - pooled_h=int(self.output_size[0]), - pooled_w=int(self.output_size[1]), - sampling_ratio=int(self.level_poolers[0].sampling_ratio), - aligned=aligned, - ) - return out - - device = pooler_fmt_boxes.device - assert ( - self.max_level - self.min_level + 1 == 4 - ), "Currently DistributeFpnProposals only support 4 levels" - fpn_outputs = torch.ops._caffe2.DistributeFpnProposals( - to_device(pooler_fmt_boxes, "cpu"), - roi_canonical_scale=self.canonical_box_size, - roi_canonical_level=self.canonical_level, - roi_max_level=self.max_level, - roi_min_level=self.min_level, - legacy_plus_one=False, - ) - fpn_outputs = [to_device(x, device) for x in fpn_outputs] - - rois_fpn_list = fpn_outputs[:-1] - rois_idx_restore_int32 = fpn_outputs[-1] - - roi_feat_fpn_list = [] - for roi_fpn, x_level, pooler in zip(rois_fpn_list, x, self.level_poolers): - if isinstance(pooler, ROIAlignRotated): - c2_roi_align = torch.ops._caffe2.RoIAlignRotated - aligned = True - else: - c2_roi_align = torch.ops._caffe2.RoIAlign - aligned = bool(pooler.aligned) - - if x_level.is_quantized: - x_level = x_level.dequantize() - - roi_feat_fpn = c2_roi_align( - x_level, - roi_fpn, - order="NCHW", - spatial_scale=float(pooler.spatial_scale), - pooled_h=int(self.output_size[0]), - pooled_w=int(self.output_size[1]), - sampling_ratio=int(pooler.sampling_ratio), - aligned=aligned, - ) - roi_feat_fpn_list.append(roi_feat_fpn) - - roi_feat_shuffled = cat(roi_feat_fpn_list, dim=0) - assert roi_feat_shuffled.numel() > 0 and rois_idx_restore_int32.numel() > 0, ( - "Caffe2 export requires tracing with a model checkpoint + input that can produce valid" - " detections. But no detections were obtained with the given checkpoint and input!" - ) - roi_feat = torch.ops._caffe2.BatchPermutation(roi_feat_shuffled, rois_idx_restore_int32) - return roi_feat - - -def caffe2_fast_rcnn_outputs_inference(tensor_mode, box_predictor, predictions, proposals): - """equivalent to FastRCNNOutputLayers.inference""" - num_classes = box_predictor.num_classes - score_thresh = box_predictor.test_score_thresh - nms_thresh = box_predictor.test_nms_thresh - topk_per_image = box_predictor.test_topk_per_image - is_rotated = len(box_predictor.box2box_transform.weights) == 5 - - if is_rotated: - box_dim = 5 - assert box_predictor.box2box_transform.weights[4] == 1, ( - "The weights for Rotated BBoxTransform in C2 have only 4 dimensions," - + " thus enforcing the angle weight to be 1 for now" - ) - box2box_transform_weights = box_predictor.box2box_transform.weights[:4] - else: - box_dim = 4 - box2box_transform_weights = box_predictor.box2box_transform.weights - - class_logits, box_regression = predictions - if num_classes + 1 == class_logits.shape[1]: - class_prob = F.softmax(class_logits, -1) - else: - assert num_classes == class_logits.shape[1] - class_prob = F.sigmoid(class_logits) - # BoxWithNMSLimit will infer num_classes from the shape of the class_prob - # So append a zero column as placeholder for the background class - class_prob = torch.cat((class_prob, torch.zeros(class_prob.shape[0], 1)), dim=1) - - assert box_regression.shape[1] % box_dim == 0 - cls_agnostic_bbox_reg = box_regression.shape[1] // box_dim == 1 - - input_tensor_mode = proposals[0].proposal_boxes.tensor.shape[1] == box_dim + 1 - - proposal_boxes = proposals[0].proposal_boxes - if isinstance(proposal_boxes, Caffe2Boxes): - rois = Caffe2Boxes.cat([p.proposal_boxes for p in proposals]) - elif isinstance(proposal_boxes, RotatedBoxes): - rois = RotatedBoxes.cat([p.proposal_boxes for p in proposals]) - elif isinstance(proposal_boxes, Boxes): - rois = Boxes.cat([p.proposal_boxes for p in proposals]) - else: - raise NotImplementedError( - 'Expected proposals[0].proposal_boxes to be type "Boxes", ' - f"instead got {type(proposal_boxes)}" - ) - - device, dtype = rois.tensor.device, rois.tensor.dtype - if input_tensor_mode: - im_info = proposals[0].image_size - rois = rois.tensor - else: - im_info = torch.tensor([[sz[0], sz[1], 1.0] for sz in [x.image_size for x in proposals]]) - batch_ids = cat( - [ - torch.full((b, 1), i, dtype=dtype, device=device) - for i, b in enumerate(len(p) for p in proposals) - ], - dim=0, - ) - rois = torch.cat([batch_ids, rois.tensor], dim=1) - - roi_pred_bbox, roi_batch_splits = torch.ops._caffe2.BBoxTransform( - to_device(rois, "cpu"), - to_device(box_regression, "cpu"), - to_device(im_info, "cpu"), - weights=box2box_transform_weights, - apply_scale=True, - rotated=is_rotated, - angle_bound_on=True, - angle_bound_lo=-180, - angle_bound_hi=180, - clip_angle_thresh=1.0, - legacy_plus_one=False, - ) - roi_pred_bbox = to_device(roi_pred_bbox, device) - roi_batch_splits = to_device(roi_batch_splits, device) - - nms_outputs = torch.ops._caffe2.BoxWithNMSLimit( - to_device(class_prob, "cpu"), - to_device(roi_pred_bbox, "cpu"), - to_device(roi_batch_splits, "cpu"), - score_thresh=float(score_thresh), - nms=float(nms_thresh), - detections_per_im=int(topk_per_image), - soft_nms_enabled=False, - soft_nms_method="linear", - soft_nms_sigma=0.5, - soft_nms_min_score_thres=0.001, - rotated=is_rotated, - cls_agnostic_bbox_reg=cls_agnostic_bbox_reg, - input_boxes_include_bg_cls=False, - output_classes_include_bg_cls=False, - legacy_plus_one=False, - ) - roi_score_nms = to_device(nms_outputs[0], device) - roi_bbox_nms = to_device(nms_outputs[1], device) - roi_class_nms = to_device(nms_outputs[2], device) - roi_batch_splits_nms = to_device(nms_outputs[3], device) - roi_keeps_nms = to_device(nms_outputs[4], device) - roi_keeps_size_nms = to_device(nms_outputs[5], device) - if not tensor_mode: - roi_class_nms = roi_class_nms.to(torch.int64) - - roi_batch_ids = cat( - [ - torch.full((b, 1), i, dtype=dtype, device=device) - for i, b in enumerate(int(x.item()) for x in roi_batch_splits_nms) - ], - dim=0, - ) - - roi_class_nms = alias(roi_class_nms, "class_nms") - roi_score_nms = alias(roi_score_nms, "score_nms") - roi_bbox_nms = alias(roi_bbox_nms, "bbox_nms") - roi_batch_splits_nms = alias(roi_batch_splits_nms, "batch_splits_nms") - roi_keeps_nms = alias(roi_keeps_nms, "keeps_nms") - roi_keeps_size_nms = alias(roi_keeps_size_nms, "keeps_size_nms") - - results = InstancesList( - im_info=im_info, - indices=roi_batch_ids[:, 0], - extra_fields={ - "pred_boxes": Caffe2Boxes(roi_bbox_nms), - "scores": roi_score_nms, - "pred_classes": roi_class_nms, - }, - ) - - if not tensor_mode: - results = InstancesList.to_d2_instances_list(results) - batch_splits = roi_batch_splits_nms.int().tolist() - kept_indices = list(roi_keeps_nms.to(torch.int64).split(batch_splits)) - else: - results = [results] - kept_indices = [roi_keeps_nms] - - return results, kept_indices - - -class Caffe2FastRCNNOutputsInference: - def __init__(self, tensor_mode): - self.tensor_mode = tensor_mode # whether the output is caffe2 tensor mode - - def __call__(self, box_predictor, predictions, proposals): - return caffe2_fast_rcnn_outputs_inference( - self.tensor_mode, box_predictor, predictions, proposals - ) - - -def caffe2_mask_rcnn_inference(pred_mask_logits, pred_instances): - """equivalent to mask_head.mask_rcnn_inference""" - if all(isinstance(x, InstancesList) for x in pred_instances): - assert len(pred_instances) == 1 - mask_probs_pred = pred_mask_logits.sigmoid() - mask_probs_pred = alias(mask_probs_pred, "mask_fcn_probs") - pred_instances[0].set("pred_masks", mask_probs_pred) - else: - mask_rcnn_inference(pred_mask_logits, pred_instances) - - -class Caffe2MaskRCNNInference: - def __call__(self, pred_mask_logits, pred_instances): - return caffe2_mask_rcnn_inference(pred_mask_logits, pred_instances) - - -def caffe2_keypoint_rcnn_inference(use_heatmap_max_keypoint, pred_keypoint_logits, pred_instances): - # just return the keypoint heatmap for now, - # there will be option to call HeatmapMaxKeypointOp - output = alias(pred_keypoint_logits, "kps_score") - if all(isinstance(x, InstancesList) for x in pred_instances): - assert len(pred_instances) == 1 - if use_heatmap_max_keypoint: - device = output.device - output = torch.ops._caffe2.HeatmapMaxKeypoint( - to_device(output, "cpu"), - pred_instances[0].pred_boxes.tensor, - should_output_softmax=True, # worth make it configerable? - ) - output = to_device(output, device) - output = alias(output, "keypoints_out") - pred_instances[0].set("pred_keypoints", output) - return pred_keypoint_logits - - -class Caffe2KeypointRCNNInference: - def __init__(self, use_heatmap_max_keypoint): - self.use_heatmap_max_keypoint = use_heatmap_max_keypoint - - def __call__(self, pred_keypoint_logits, pred_instances): - return caffe2_keypoint_rcnn_inference( - self.use_heatmap_max_keypoint, pred_keypoint_logits, pred_instances - ) diff --git a/detectron2/detectron2/export/caffe2_export.py b/detectron2/detectron2/export/caffe2_export.py deleted file mode 100644 index d609c27c7deb396352967dbcbc79b1e00f2a2de1..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/caffe2_export.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import copy -import io -import logging -import numpy as np -from typing import List -import onnx -import onnx.optimizer -import torch -from caffe2.proto import caffe2_pb2 -from caffe2.python import core -from caffe2.python.onnx.backend import Caffe2Backend -from tabulate import tabulate -from termcolor import colored -from torch.onnx import OperatorExportTypes - -from .shared import ( - ScopedWS, - construct_init_net_from_params, - fuse_alias_placeholder, - fuse_copy_between_cpu_and_gpu, - get_params_from_init_net, - group_norm_replace_aten_with_caffe2, - infer_device_type, - remove_dead_end_ops, - remove_reshape_for_fc, - save_graph, -) - -logger = logging.getLogger(__name__) - - -def export_onnx_model(model, inputs): - """ - Trace and export a model to onnx format. - - Args: - model (nn.Module): - inputs (tuple[args]): the model will be called by `model(*inputs)` - - Returns: - an onnx model - """ - assert isinstance(model, torch.nn.Module) - - # make sure all modules are in eval mode, onnx may change the training state - # of the module if the states are not consistent - def _check_eval(module): - assert not module.training - - model.apply(_check_eval) - - # Export the model to ONNX - with torch.no_grad(): - with io.BytesIO() as f: - torch.onnx.export( - model, - inputs, - f, - operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK, - # verbose=True, # NOTE: uncomment this for debugging - # export_params=True, - ) - onnx_model = onnx.load_from_string(f.getvalue()) - - return onnx_model - - -def _op_stats(net_def): - type_count = {} - for t in [op.type for op in net_def.op]: - type_count[t] = type_count.get(t, 0) + 1 - type_count_list = sorted(type_count.items(), key=lambda kv: kv[0]) # alphabet - type_count_list = sorted(type_count_list, key=lambda kv: -kv[1]) # count - return "\n".join("{:>4}x {}".format(count, name) for name, count in type_count_list) - - -def _assign_device_option( - predict_net: caffe2_pb2.NetDef, init_net: caffe2_pb2.NetDef, tensor_inputs: List[torch.Tensor] -): - """ - ONNX exported network doesn't have concept of device, assign necessary - device option for each op in order to make it runable on GPU runtime. - """ - - def _get_device_type(torch_tensor): - assert torch_tensor.device.type in ["cpu", "cuda"] - assert torch_tensor.device.index == 0 - return torch_tensor.device.type - - def _assign_op_device_option(net_proto, net_ssa, blob_device_types): - for op, ssa_i in zip(net_proto.op, net_ssa): - if op.type in ["CopyCPUToGPU", "CopyGPUToCPU"]: - op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0)) - else: - devices = [blob_device_types[b] for b in ssa_i[0] + ssa_i[1]] - assert all(d == devices[0] for d in devices) - if devices[0] == "cuda": - op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0)) - - # update ops in predict_net - predict_net_input_device_types = { - (name, 0): _get_device_type(tensor) - for name, tensor in zip(predict_net.external_input, tensor_inputs) - } - predict_net_device_types = infer_device_type( - predict_net, known_status=predict_net_input_device_types, device_name_style="pytorch" - ) - predict_net_ssa, _ = core.get_ssa(predict_net) - _assign_op_device_option(predict_net, predict_net_ssa, predict_net_device_types) - - # update ops in init_net - init_net_ssa, versions = core.get_ssa(init_net) - init_net_output_device_types = { - (name, versions[name]): predict_net_device_types[(name, 0)] - for name in init_net.external_output - } - init_net_device_types = infer_device_type( - init_net, known_status=init_net_output_device_types, device_name_style="pytorch" - ) - _assign_op_device_option(init_net, init_net_ssa, init_net_device_types) - - -def export_caffe2_detection_model(model: torch.nn.Module, tensor_inputs: List[torch.Tensor]): - """ - Export a caffe2-compatible Detectron2 model to caffe2 format via ONNX. - - Arg: - model: a caffe2-compatible version of detectron2 model, defined in caffe2_modeling.py - tensor_inputs: a list of tensors that caffe2 model takes as input. - """ - model = copy.deepcopy(model) - assert isinstance(model, torch.nn.Module) - assert hasattr(model, "encode_additional_info") - - # Export via ONNX - logger.info( - "Exporting a {} model via ONNX ...".format(type(model).__name__) - + " Some warnings from ONNX are expected and are usually not to worry about." - ) - onnx_model = export_onnx_model(model, (tensor_inputs,)) - # Convert ONNX model to Caffe2 protobuf - init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model) - ops_table = [[op.type, op.input, op.output] for op in predict_net.op] - table = tabulate(ops_table, headers=["type", "input", "output"], tablefmt="pipe") - logger.info( - "ONNX export Done. Exported predict_net (before optimizations):\n" + colored(table, "cyan") - ) - - # Apply protobuf optimization - fuse_alias_placeholder(predict_net, init_net) - if any(t.device.type != "cpu" for t in tensor_inputs): - fuse_copy_between_cpu_and_gpu(predict_net) - remove_dead_end_ops(init_net) - _assign_device_option(predict_net, init_net, tensor_inputs) - params, device_options = get_params_from_init_net(init_net) - predict_net, params = remove_reshape_for_fc(predict_net, params) - init_net = construct_init_net_from_params(params, device_options) - group_norm_replace_aten_with_caffe2(predict_net) - - # Record necessary information for running the pb model in Detectron2 system. - model.encode_additional_info(predict_net, init_net) - - logger.info("Operators used in predict_net: \n{}".format(_op_stats(predict_net))) - logger.info("Operators used in init_net: \n{}".format(_op_stats(init_net))) - - return predict_net, init_net - - -def run_and_save_graph(predict_net, init_net, tensor_inputs, graph_save_path): - """ - Run the caffe2 model on given inputs, recording the shape and draw the graph. - - predict_net/init_net: caffe2 model. - tensor_inputs: a list of tensors that caffe2 model takes as input. - graph_save_path: path for saving graph of exported model. - """ - - logger.info("Saving graph of ONNX exported model to {} ...".format(graph_save_path)) - save_graph(predict_net, graph_save_path, op_only=False) - - # Run the exported Caffe2 net - logger.info("Running ONNX exported model ...") - with ScopedWS("__ws_tmp__", True) as ws: - ws.RunNetOnce(init_net) - initialized_blobs = set(ws.Blobs()) - uninitialized = [inp for inp in predict_net.external_input if inp not in initialized_blobs] - for name, blob in zip(uninitialized, tensor_inputs): - ws.FeedBlob(name, blob) - - try: - ws.RunNetOnce(predict_net) - except RuntimeError as e: - logger.warning("Encountered RuntimeError: \n{}".format(str(e))) - - ws_blobs = {b: ws.FetchBlob(b) for b in ws.Blobs()} - blob_sizes = {b: ws_blobs[b].shape for b in ws_blobs if isinstance(ws_blobs[b], np.ndarray)} - - logger.info("Saving graph with blob shapes to {} ...".format(graph_save_path)) - save_graph(predict_net, graph_save_path, op_only=False, blob_sizes=blob_sizes) - - return ws_blobs diff --git a/detectron2/detectron2/export/caffe2_inference.py b/detectron2/detectron2/export/caffe2_inference.py deleted file mode 100644 index deb886c0417285ed1d5ad85eb941fa1ac757cdab..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/caffe2_inference.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import logging -import numpy as np -from itertools import count -import torch -from caffe2.proto import caffe2_pb2 -from caffe2.python import core - -from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format -from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type - -logger = logging.getLogger(__name__) - - -# ===== ref: mobile-vision predictor's 'Caffe2Wrapper' class ====== -class ProtobufModel(torch.nn.Module): - """ - Wrapper of a caffe2's protobuf model. - It works just like nn.Module, but running caffe2 under the hood. - Input/Output are tuple[tensor] that match the caffe2 net's external_input/output. - """ - - _ids = count(0) - - def __init__(self, predict_net, init_net): - logger.info(f"Initializing ProtobufModel for: {predict_net.name} ...") - super().__init__() - assert isinstance(predict_net, caffe2_pb2.NetDef) - assert isinstance(init_net, caffe2_pb2.NetDef) - # create unique temporary workspace for each instance - self.ws_name = "__tmp_ProtobufModel_{}__".format(next(self._ids)) - self.net = core.Net(predict_net) - - logger.info("Running init_net once to fill the parameters ...") - with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws: - ws.RunNetOnce(init_net) - uninitialized_external_input = [] - for blob in self.net.Proto().external_input: - if blob not in ws.Blobs(): - uninitialized_external_input.append(blob) - ws.CreateBlob(blob) - ws.CreateNet(self.net) - - self._error_msgs = set() - self._input_blobs = uninitialized_external_input - - def _infer_output_devices(self, inputs): - """ - Returns: - list[str]: list of device for each external output - """ - - def _get_device_type(torch_tensor): - assert torch_tensor.device.type in ["cpu", "cuda"] - assert torch_tensor.device.index == 0 - return torch_tensor.device.type - - predict_net = self.net.Proto() - input_device_types = { - (name, 0): _get_device_type(tensor) for name, tensor in zip(self._input_blobs, inputs) - } - device_type_map = infer_device_type( - predict_net, known_status=input_device_types, device_name_style="pytorch" - ) - ssa, versions = core.get_ssa(predict_net) - versioned_outputs = [(name, versions[name]) for name in predict_net.external_output] - output_devices = [device_type_map[outp] for outp in versioned_outputs] - return output_devices - - def forward(self, inputs): - """ - Args: - inputs (tuple[torch.Tensor]) - - Returns: - tuple[torch.Tensor] - """ - assert len(inputs) == len(self._input_blobs), ( - f"Length of inputs ({len(inputs)}) " - f"doesn't match the required input blobs: {self._input_blobs}" - ) - - with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws: - for b, tensor in zip(self._input_blobs, inputs): - ws.FeedBlob(b, tensor) - - try: - ws.RunNet(self.net.Proto().name) - except RuntimeError as e: - if not str(e) in self._error_msgs: - self._error_msgs.add(str(e)) - logger.warning("Encountered new RuntimeError: \n{}".format(str(e))) - logger.warning("Catch the error and use partial results.") - - c2_outputs = [ws.FetchBlob(b) for b in self.net.Proto().external_output] - # Remove outputs of current run, this is necessary in order to - # prevent fetching the result from previous run if the model fails - # in the middle. - for b in self.net.Proto().external_output: - # Needs to create uninitialized blob to make the net runable. - # This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b), - # but there'no such API. - ws.FeedBlob(b, f"{b}, a C++ native class of type nullptr (uninitialized).") - - # Cast output to torch.Tensor on the desired device - output_devices = ( - self._infer_output_devices(inputs) - if any(t.device.type != "cpu" for t in inputs) - else ["cpu" for _ in self.net.Proto().external_output] - ) - - outputs = [] - for name, c2_output, device in zip( - self.net.Proto().external_output, c2_outputs, output_devices - ): - if not isinstance(c2_output, np.ndarray): - raise RuntimeError( - "Invalid output for blob {}, received: {}".format(name, c2_output) - ) - outputs.append(torch.tensor(c2_output).to(device=device)) - return tuple(outputs) - - -class ProtobufDetectionModel(torch.nn.Module): - """ - A class works just like a pytorch meta arch in terms of inference, but running - caffe2 model under the hood. - """ - - def __init__(self, predict_net, init_net, *, convert_outputs=None): - """ - Args: - predict_net, init_net (core.Net): caffe2 nets - convert_outptus (callable): a function that converts caffe2 - outputs to the same format of the original pytorch model. - By default, use the one defined in the caffe2 meta_arch. - """ - super().__init__() - self.protobuf_model = ProtobufModel(predict_net, init_net) - self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0) - self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii") - - if convert_outputs is None: - meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN") - meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")] - self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net) - else: - self._convert_outputs = convert_outputs - - def _convert_inputs(self, batched_inputs): - # currently all models convert inputs in the same way - return convert_batched_inputs_to_c2_format( - batched_inputs, self.size_divisibility, self.device - ) - - def forward(self, batched_inputs): - c2_inputs = self._convert_inputs(batched_inputs) - c2_results = self.protobuf_model(c2_inputs) - c2_results = dict(zip(self.protobuf_model.net.Proto().external_output, c2_results)) - return self._convert_outputs(batched_inputs, c2_inputs, c2_results) diff --git a/detectron2/detectron2/export/caffe2_modeling.py b/detectron2/detectron2/export/caffe2_modeling.py deleted file mode 100644 index 3e675c45d62f7b363a298099cd520c417832d58c..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/caffe2_modeling.py +++ /dev/null @@ -1,420 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import functools -import io -import struct -import types -import torch - -from detectron2.modeling import meta_arch -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.roi_heads import keypoint_head -from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes - -from .c10 import Caffe2Compatible -from .caffe2_patch import ROIHeadsPatcher, patch_generalized_rcnn -from .shared import ( - alias, - check_set_pb_arg, - get_pb_arg_floats, - get_pb_arg_valf, - get_pb_arg_vali, - get_pb_arg_vals, - mock_torch_nn_functional_interpolate, -) - - -def assemble_rcnn_outputs_by_name(image_sizes, tensor_outputs, force_mask_on=False): - """ - A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor]) - to detectron2's format (i.e. list of Instances instance). - This only works when the model follows the Caffe2 detectron's naming convention. - - Args: - image_sizes (List[List[int, int]]): [H, W] of every image. - tensor_outputs (Dict[str, Tensor]): external_output to its tensor. - - force_mask_on (Bool): if true, the it make sure there'll be pred_masks even - if the mask is not found from tensor_outputs (usually due to model crash) - """ - - results = [Instances(image_size) for image_size in image_sizes] - - batch_splits = tensor_outputs.get("batch_splits", None) - if batch_splits: - raise NotImplementedError() - assert len(image_sizes) == 1 - result = results[0] - - bbox_nms = tensor_outputs["bbox_nms"] - score_nms = tensor_outputs["score_nms"] - class_nms = tensor_outputs["class_nms"] - # Detection will always success because Conv support 0-batch - assert bbox_nms is not None - assert score_nms is not None - assert class_nms is not None - if bbox_nms.shape[1] == 5: - result.pred_boxes = RotatedBoxes(bbox_nms) - else: - result.pred_boxes = Boxes(bbox_nms) - result.scores = score_nms - result.pred_classes = class_nms.to(torch.int64) - - mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None) - if mask_fcn_probs is not None: - # finish the mask pred - mask_probs_pred = mask_fcn_probs - num_masks = mask_probs_pred.shape[0] - class_pred = result.pred_classes - indices = torch.arange(num_masks, device=class_pred.device) - mask_probs_pred = mask_probs_pred[indices, class_pred][:, None] - result.pred_masks = mask_probs_pred - elif force_mask_on: - # NOTE: there's no way to know the height/width of mask here, it won't be - # used anyway when batch size is 0, so just set them to 0. - result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8) - - keypoints_out = tensor_outputs.get("keypoints_out", None) - kps_score = tensor_outputs.get("kps_score", None) - if keypoints_out is not None: - # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob) - keypoints_tensor = keypoints_out - # NOTE: it's possible that prob is not calculated if "should_output_softmax" - # is set to False in HeatmapMaxKeypoint, so just using raw score, seems - # it doesn't affect mAP. TODO: check more carefully. - keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]] - result.pred_keypoints = keypoint_xyp - elif kps_score is not None: - # keypoint heatmap to sparse data structure - pred_keypoint_logits = kps_score - keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result]) - - return results - - -def _cast_to_f32(f64): - return struct.unpack("f", struct.pack("f", f64))[0] - - -def set_caffe2_compatible_tensor_mode(model, enable=True): - def _fn(m): - if isinstance(m, Caffe2Compatible): - m.tensor_mode = enable - - model.apply(_fn) - - -def convert_batched_inputs_to_c2_format(batched_inputs, size_divisibility, device): - """ - See get_caffe2_inputs() below. - """ - assert all(isinstance(x, dict) for x in batched_inputs) - assert all(x["image"].dim() == 3 for x in batched_inputs) - - images = [x["image"] for x in batched_inputs] - images = ImageList.from_tensors(images, size_divisibility) - - im_info = [] - for input_per_image, image_size in zip(batched_inputs, images.image_sizes): - target_height = input_per_image.get("height", image_size[0]) - target_width = input_per_image.get("width", image_size[1]) # noqa - # NOTE: The scale inside im_info is kept as convention and for providing - # post-processing information if further processing is needed. For - # current Caffe2 model definitions that don't include post-processing inside - # the model, this number is not used. - # NOTE: There can be a slight difference between width and height - # scales, using a single number can results in numerical difference - # compared with D2's post-processing. - scale = target_height / image_size[0] - im_info.append([image_size[0], image_size[1], scale]) - im_info = torch.Tensor(im_info) - - return images.tensor.to(device), im_info.to(device) - - -class Caffe2MetaArch(Caffe2Compatible, torch.nn.Module): - """ - Base class for caffe2-compatible implementation of a meta architecture. - The forward is traceable and its traced graph can be converted to caffe2 - graph through ONNX. - """ - - def __init__(self, cfg, torch_model, enable_tensor_mode=True): - """ - Args: - cfg (CfgNode): - torch_model (nn.Module): the detectron2 model (meta_arch) to be - converted. - """ - super().__init__() - self._wrapped_model = torch_model - self.eval() - set_caffe2_compatible_tensor_mode(self, enable_tensor_mode) - - def get_caffe2_inputs(self, batched_inputs): - """ - Convert pytorch-style structured inputs to caffe2-style inputs that - are tuples of tensors. - - Args: - batched_inputs (list[dict]): inputs to a detectron2 model - in its standard format. Each dict has "image" (CHW tensor), and optionally - "height" and "width". - - Returns: - tuple[Tensor]: - tuple of tensors that will be the inputs to the - :meth:`forward` method. For existing models, the first - is an NCHW tensor (padded and batched); the second is - a im_info Nx3 tensor, where the rows are - (height, width, unused legacy parameter) - """ - return convert_batched_inputs_to_c2_format( - batched_inputs, - self._wrapped_model.backbone.size_divisibility, - self._wrapped_model.device, - ) - - def encode_additional_info(self, predict_net, init_net): - """ - Save extra metadata that will be used by inference in the output protobuf. - """ - pass - - def forward(self, inputs): - """ - Run the forward in caffe2-style. It has to use caffe2-compatible ops - and the method will be used for tracing. - - Args: - inputs (tuple[Tensor]): inputs defined by :meth:`get_caffe2_input`. - They will be the inputs of the converted caffe2 graph. - - Returns: - tuple[Tensor]: output tensors. They will be the outputs of the - converted caffe2 graph. - """ - raise NotImplementedError - - def _caffe2_preprocess_image(self, inputs): - """ - Caffe2 implementation of preprocess_image, which is called inside each MetaArch's forward. - It normalizes the input images, and the final caffe2 graph assumes the - inputs have been batched already. - """ - data, im_info = inputs - data = alias(data, "data") - im_info = alias(im_info, "im_info") - mean, std = self._wrapped_model.pixel_mean, self._wrapped_model.pixel_std - normalized_data = (data - mean) / std - normalized_data = alias(normalized_data, "normalized_data") - - # Pack (data, im_info) into ImageList which is recognized by self.inference. - images = ImageList(tensor=normalized_data, image_sizes=im_info) - return images - - @staticmethod - def get_outputs_converter(predict_net, init_net): - """ - Creates a function that converts outputs of the caffe2 model to - detectron2's standard format. - The function uses information in `predict_net` and `init_net` that are - available at inferene time. Therefore the function logic can be used in inference. - - The returned function has the following signature: - - def convert(batched_inputs, c2_inputs, c2_results) -> detectron2_outputs - - Where - - * batched_inputs (list[dict]): the original input format of the meta arch - * c2_inputs (tuple[Tensor]): the caffe2 inputs. - * c2_results (dict[str, Tensor]): the caffe2 output format, - corresponding to the outputs of the :meth:`forward` function. - * detectron2_outputs: the original output format of the meta arch. - - This function can be used to compare the outputs of the original meta arch and - the converted caffe2 graph. - - Returns: - callable: a callable of the above signature. - """ - raise NotImplementedError - - -class Caffe2GeneralizedRCNN(Caffe2MetaArch): - def __init__(self, cfg, torch_model, enable_tensor_mode=True): - assert isinstance(torch_model, meta_arch.GeneralizedRCNN) - torch_model = patch_generalized_rcnn(torch_model) - super().__init__(cfg, torch_model, enable_tensor_mode) - - try: - use_heatmap_max_keypoint = cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT - except AttributeError: - use_heatmap_max_keypoint = False - self.roi_heads_patcher = ROIHeadsPatcher( - self._wrapped_model.roi_heads, use_heatmap_max_keypoint - ) - if self.tensor_mode: - self.roi_heads_patcher.patch_roi_heads() - - def encode_additional_info(self, predict_net, init_net): - size_divisibility = self._wrapped_model.backbone.size_divisibility - check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility) - check_set_pb_arg( - predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii") - ) - check_set_pb_arg(predict_net, "meta_architecture", "s", b"GeneralizedRCNN") - - @mock_torch_nn_functional_interpolate() - def forward(self, inputs): - if not self.tensor_mode: - return self._wrapped_model.inference(inputs) - images = self._caffe2_preprocess_image(inputs) - features = self._wrapped_model.backbone(images.tensor) - proposals, _ = self._wrapped_model.proposal_generator(images, features) - detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals) - return tuple(detector_results[0].flatten()) - - @staticmethod - def get_outputs_converter(predict_net, init_net): - def f(batched_inputs, c2_inputs, c2_results): - _, im_info = c2_inputs - image_sizes = [[int(im[0]), int(im[1])] for im in im_info] - results = assemble_rcnn_outputs_by_name(image_sizes, c2_results) - return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes) - - return f - - -class Caffe2RetinaNet(Caffe2MetaArch): - def __init__(self, cfg, torch_model): - assert isinstance(torch_model, meta_arch.RetinaNet) - super().__init__(cfg, torch_model) - - @mock_torch_nn_functional_interpolate() - def forward(self, inputs): - assert self.tensor_mode - images = self._caffe2_preprocess_image(inputs) - - # explicitly return the images sizes to avoid removing "im_info" by ONNX - # since it's not used in the forward path - return_tensors = [images.image_sizes] - - features = self._wrapped_model.backbone(images.tensor) - features = [features[f] for f in self._wrapped_model.head_in_features] - for i, feature_i in enumerate(features): - features[i] = alias(feature_i, "feature_{}".format(i), is_backward=True) - return_tensors.append(features[i]) - - pred_logits, pred_anchor_deltas = self._wrapped_model.head(features) - for i, (box_cls_i, box_delta_i) in enumerate(zip(pred_logits, pred_anchor_deltas)): - return_tensors.append(alias(box_cls_i, "box_cls_{}".format(i))) - return_tensors.append(alias(box_delta_i, "box_delta_{}".format(i))) - - return tuple(return_tensors) - - def encode_additional_info(self, predict_net, init_net): - size_divisibility = self._wrapped_model.backbone.size_divisibility - check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility) - check_set_pb_arg( - predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii") - ) - check_set_pb_arg(predict_net, "meta_architecture", "s", b"RetinaNet") - - # Inference parameters: - check_set_pb_arg( - predict_net, "score_threshold", "f", _cast_to_f32(self._wrapped_model.test_score_thresh) - ) - check_set_pb_arg( - predict_net, "topk_candidates", "i", self._wrapped_model.test_topk_candidates - ) - check_set_pb_arg( - predict_net, "nms_threshold", "f", _cast_to_f32(self._wrapped_model.test_nms_thresh) - ) - check_set_pb_arg( - predict_net, - "max_detections_per_image", - "i", - self._wrapped_model.max_detections_per_image, - ) - - check_set_pb_arg( - predict_net, - "bbox_reg_weights", - "floats", - [_cast_to_f32(w) for w in self._wrapped_model.box2box_transform.weights], - ) - self._encode_anchor_generator_cfg(predict_net) - - def _encode_anchor_generator_cfg(self, predict_net): - # serialize anchor_generator for future use - serialized_anchor_generator = io.BytesIO() - torch.save(self._wrapped_model.anchor_generator, serialized_anchor_generator) - # Ideally we can put anchor generating inside the model, then we don't - # need to store this information. - bytes = serialized_anchor_generator.getvalue() - check_set_pb_arg(predict_net, "serialized_anchor_generator", "s", bytes) - - @staticmethod - def get_outputs_converter(predict_net, init_net): - self = types.SimpleNamespace() - serialized_anchor_generator = io.BytesIO( - get_pb_arg_vals(predict_net, "serialized_anchor_generator", None) - ) - self.anchor_generator = torch.load(serialized_anchor_generator) - bbox_reg_weights = get_pb_arg_floats(predict_net, "bbox_reg_weights", None) - self.box2box_transform = Box2BoxTransform(weights=tuple(bbox_reg_weights)) - self.test_score_thresh = get_pb_arg_valf(predict_net, "score_threshold", None) - self.test_topk_candidates = get_pb_arg_vali(predict_net, "topk_candidates", None) - self.test_nms_thresh = get_pb_arg_valf(predict_net, "nms_threshold", None) - self.max_detections_per_image = get_pb_arg_vali( - predict_net, "max_detections_per_image", None - ) - - # hack to reuse inference code from RetinaNet - for meth in [ - "forward_inference", - "inference_single_image", - "_transpose_dense_predictions", - "_decode_multi_level_predictions", - "_decode_per_level_predictions", - ]: - setattr(self, meth, functools.partial(getattr(meta_arch.RetinaNet, meth), self)) - - def f(batched_inputs, c2_inputs, c2_results): - _, im_info = c2_inputs - image_sizes = [[int(im[0]), int(im[1])] for im in im_info] - dummy_images = ImageList( - torch.randn( - ( - len(im_info), - 3, - ) - + tuple(image_sizes[0]) - ), - image_sizes, - ) - - num_features = len([x for x in c2_results.keys() if x.startswith("box_cls_")]) - pred_logits = [c2_results["box_cls_{}".format(i)] for i in range(num_features)] - pred_anchor_deltas = [c2_results["box_delta_{}".format(i)] for i in range(num_features)] - - # For each feature level, feature should have the same batch size and - # spatial dimension as the box_cls and box_delta. - dummy_features = [x.clone()[:, 0:0, :, :] for x in pred_logits] - # self.num_classess can be inferred - self.num_classes = pred_logits[0].shape[1] // (pred_anchor_deltas[0].shape[1] // 4) - - results = self.forward_inference( - dummy_images, dummy_features, [pred_logits, pred_anchor_deltas] - ) - return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes) - - return f - - -META_ARCH_CAFFE2_EXPORT_TYPE_MAP = { - "GeneralizedRCNN": Caffe2GeneralizedRCNN, - "RetinaNet": Caffe2RetinaNet, -} diff --git a/detectron2/detectron2/export/caffe2_patch.py b/detectron2/detectron2/export/caffe2_patch.py deleted file mode 100644 index 4ddc2c1c6c5cff3e70df9b6001fcf43aae1d732d..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/caffe2_patch.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import contextlib -from unittest import mock -import torch - -from detectron2.modeling import poolers -from detectron2.modeling.proposal_generator import rpn -from detectron2.modeling.roi_heads import keypoint_head, mask_head -from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers - -from .c10 import ( - Caffe2Compatible, - Caffe2FastRCNNOutputsInference, - Caffe2KeypointRCNNInference, - Caffe2MaskRCNNInference, - Caffe2ROIPooler, - Caffe2RPN, - caffe2_fast_rcnn_outputs_inference, - caffe2_keypoint_rcnn_inference, - caffe2_mask_rcnn_inference, -) - - -class GenericMixin: - pass - - -class Caffe2CompatibleConverter: - """ - A GenericUpdater which implements the `create_from` interface, by modifying - module object and assign it with another class replaceCls. - """ - - def __init__(self, replaceCls): - self.replaceCls = replaceCls - - def create_from(self, module): - # update module's class to the new class - assert isinstance(module, torch.nn.Module) - if issubclass(self.replaceCls, GenericMixin): - # replaceCls should act as mixin, create a new class on-the-fly - new_class = type( - "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__), - (self.replaceCls, module.__class__), - {}, # {"new_method": lambda self: ...}, - ) - module.__class__ = new_class - else: - # replaceCls is complete class, this allow arbitrary class swap - module.__class__ = self.replaceCls - - # initialize Caffe2Compatible - if isinstance(module, Caffe2Compatible): - module.tensor_mode = False - - return module - - -def patch(model, target, updater, *args, **kwargs): - """ - recursively (post-order) update all modules with the target type and its - subclasses, make a initialization/composition/inheritance/... via the - updater.create_from. - """ - for name, module in model.named_children(): - model._modules[name] = patch(module, target, updater, *args, **kwargs) - if isinstance(model, target): - return updater.create_from(model, *args, **kwargs) - return model - - -def patch_generalized_rcnn(model): - ccc = Caffe2CompatibleConverter - model = patch(model, rpn.RPN, ccc(Caffe2RPN)) - model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler)) - - return model - - -@contextlib.contextmanager -def mock_fastrcnn_outputs_inference( - tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers -): - with mock.patch.object( - box_predictor_type, - "inference", - autospec=True, - side_effect=Caffe2FastRCNNOutputsInference(tensor_mode), - ) as mocked_func: - yield - if check: - assert mocked_func.call_count > 0 - - -@contextlib.contextmanager -def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True): - with mock.patch( - "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference() - ) as mocked_func: - yield - if check: - assert mocked_func.call_count > 0 - - -@contextlib.contextmanager -def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True): - with mock.patch( - "{}.keypoint_rcnn_inference".format(patched_module), - side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint), - ) as mocked_func: - yield - if check: - assert mocked_func.call_count > 0 - - -class ROIHeadsPatcher: - def __init__(self, heads, use_heatmap_max_keypoint): - self.heads = heads - self.use_heatmap_max_keypoint = use_heatmap_max_keypoint - self.previous_patched = {} - - @contextlib.contextmanager - def mock_roi_heads(self, tensor_mode=True): - """ - Patching several inference functions inside ROIHeads and its subclasses - - Args: - tensor_mode (bool): whether the inputs/outputs are caffe2's tensor - format or not. Default to True. - """ - # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference` - # are called inside the same file as BaseXxxHead due to using mock.patch. - kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__ - mask_head_mod = mask_head.BaseMaskRCNNHead.__module__ - - mock_ctx_managers = [ - mock_fastrcnn_outputs_inference( - tensor_mode=tensor_mode, - check=True, - box_predictor_type=type(self.heads.box_predictor), - ) - ] - if getattr(self.heads, "keypoint_on", False): - mock_ctx_managers += [ - mock_keypoint_rcnn_inference( - tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint - ) - ] - if getattr(self.heads, "mask_on", False): - mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)] - - with contextlib.ExitStack() as stack: # python 3.3+ - for mgr in mock_ctx_managers: - stack.enter_context(mgr) - yield - - def patch_roi_heads(self, tensor_mode=True): - self.previous_patched["box_predictor"] = self.heads.box_predictor.inference - self.previous_patched["keypoint_rcnn"] = keypoint_head.keypoint_rcnn_inference - self.previous_patched["mask_rcnn"] = mask_head.mask_rcnn_inference - - def patched_fastrcnn_outputs_inference(predictions, proposal): - return caffe2_fast_rcnn_outputs_inference( - True, self.heads.box_predictor, predictions, proposal - ) - - self.heads.box_predictor.inference = patched_fastrcnn_outputs_inference - - if getattr(self.heads, "keypoint_on", False): - - def patched_keypoint_rcnn_inference(pred_keypoint_logits, pred_instances): - return caffe2_keypoint_rcnn_inference( - self.use_heatmap_max_keypoint, pred_keypoint_logits, pred_instances - ) - - keypoint_head.keypoint_rcnn_inference = patched_keypoint_rcnn_inference - - if getattr(self.heads, "mask_on", False): - - def patched_mask_rcnn_inference(pred_mask_logits, pred_instances): - return caffe2_mask_rcnn_inference(pred_mask_logits, pred_instances) - - mask_head.mask_rcnn_inference = patched_mask_rcnn_inference - - def unpatch_roi_heads(self): - self.heads.box_predictor.inference = self.previous_patched["box_predictor"] - keypoint_head.keypoint_rcnn_inference = self.previous_patched["keypoint_rcnn"] - mask_head.mask_rcnn_inference = self.previous_patched["mask_rcnn"] diff --git a/detectron2/detectron2/export/flatten.py b/detectron2/detectron2/export/flatten.py deleted file mode 100644 index f5ba4297567d650f147eebeed361e9d62fab899d..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/flatten.py +++ /dev/null @@ -1,330 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import collections -from dataclasses import dataclass -from typing import Callable, List, Optional, Tuple -import torch -from torch import nn - -from detectron2.structures import Boxes, Instances, ROIMasks -from detectron2.utils.registry import _convert_target_to_string, locate - -from .torchscript_patch import patch_builtin_len - - -@dataclass -class Schema: - """ - A Schema defines how to flatten a possibly hierarchical object into tuple of - primitive objects, so it can be used as inputs/outputs of PyTorch's tracing. - - PyTorch does not support tracing a function that produces rich output - structures (e.g. dict, Instances, Boxes). To trace such a function, we - flatten the rich object into tuple of tensors, and return this tuple of tensors - instead. Meanwhile, we also need to know how to "rebuild" the original object - from the flattened results, so we can evaluate the flattened results. - A Schema defines how to flatten an object, and while flattening it, it records - necessary schemas so that the object can be rebuilt using the flattened outputs. - - The flattened object and the schema object is returned by ``.flatten`` classmethod. - Then the original object can be rebuilt with the ``__call__`` method of schema. - - A Schema is a dataclass that can be serialized easily. - """ - - # inspired by FetchMapper in tensorflow/python/client/session.py - - @classmethod - def flatten(cls, obj): - raise NotImplementedError - - def __call__(self, values): - raise NotImplementedError - - @staticmethod - def _concat(values): - ret = () - sizes = [] - for v in values: - assert isinstance(v, tuple), "Flattened results must be a tuple" - ret = ret + v - sizes.append(len(v)) - return ret, sizes - - @staticmethod - def _split(values, sizes): - if len(sizes): - expected_len = sum(sizes) - assert ( - len(values) == expected_len - ), f"Values has length {len(values)} but expect length {expected_len}." - ret = [] - for k in range(len(sizes)): - begin, end = sum(sizes[:k]), sum(sizes[: k + 1]) - ret.append(values[begin:end]) - return ret - - -@dataclass -class ListSchema(Schema): - schemas: List[Schema] # the schemas that define how to flatten each element in the list - sizes: List[int] # the flattened length of each element - - def __call__(self, values): - values = self._split(values, self.sizes) - if len(values) != len(self.schemas): - raise ValueError( - f"Values has length {len(values)} but schemas " f"has length {len(self.schemas)}!" - ) - values = [m(v) for m, v in zip(self.schemas, values)] - return list(values) - - @classmethod - def flatten(cls, obj): - res = [flatten_to_tuple(k) for k in obj] - values, sizes = cls._concat([k[0] for k in res]) - return values, cls([k[1] for k in res], sizes) - - -@dataclass -class TupleSchema(ListSchema): - def __call__(self, values): - return tuple(super().__call__(values)) - - -@dataclass -class IdentitySchema(Schema): - def __call__(self, values): - return values[0] - - @classmethod - def flatten(cls, obj): - return (obj,), cls() - - -@dataclass -class DictSchema(ListSchema): - keys: List[str] - - def __call__(self, values): - values = super().__call__(values) - return dict(zip(self.keys, values)) - - @classmethod - def flatten(cls, obj): - for k in obj.keys(): - if not isinstance(k, str): - raise KeyError("Only support flattening dictionaries if keys are str.") - keys = sorted(obj.keys()) - values = [obj[k] for k in keys] - ret, schema = ListSchema.flatten(values) - return ret, cls(schema.schemas, schema.sizes, keys) - - -@dataclass -class InstancesSchema(DictSchema): - def __call__(self, values): - image_size, fields = values[-1], values[:-1] - fields = super().__call__(fields) - return Instances(image_size, **fields) - - @classmethod - def flatten(cls, obj): - ret, schema = super().flatten(obj.get_fields()) - size = obj.image_size - if not isinstance(size, torch.Tensor): - size = torch.tensor(size) - return ret + (size,), schema - - -@dataclass -class TensorWrapSchema(Schema): - """ - For classes that are simple wrapper of tensors, e.g. - Boxes, RotatedBoxes, BitMasks - """ - - class_name: str - - def __call__(self, values): - return locate(self.class_name)(values[0]) - - @classmethod - def flatten(cls, obj): - return (obj.tensor,), cls(_convert_target_to_string(type(obj))) - - -# if more custom structures needed in the future, can allow -# passing in extra schemas for custom types -def flatten_to_tuple(obj): - """ - Flatten an object so it can be used for PyTorch tracing. - Also returns how to rebuild the original object from the flattened outputs. - - Returns: - res (tuple): the flattened results that can be used as tracing outputs - schema: an object with a ``__call__`` method such that ``schema(res) == obj``. - It is a pure dataclass that can be serialized. - """ - schemas = [ - ((str, bytes), IdentitySchema), - (list, ListSchema), - (tuple, TupleSchema), - (collections.abc.Mapping, DictSchema), - (Instances, InstancesSchema), - ((Boxes, ROIMasks), TensorWrapSchema), - ] - for klass, schema in schemas: - if isinstance(obj, klass): - F = schema - break - else: - F = IdentitySchema - - return F.flatten(obj) - - -class TracingAdapter(nn.Module): - """ - A model may take rich input/output format (e.g. dict or custom classes), - but `torch.jit.trace` requires tuple of tensors as input/output. - This adapter flattens input/output format of a model so it becomes traceable. - - It also records the necessary schema to rebuild model's inputs/outputs from flattened - inputs/outputs. - - Example: - :: - outputs = model(inputs) # inputs/outputs may be rich structure - adapter = TracingAdapter(model, inputs) - - # can now trace the model, with adapter.flattened_inputs, or another - # tuple of tensors with the same length and meaning - traced = torch.jit.trace(adapter, adapter.flattened_inputs) - - # traced model can only produce flattened outputs (tuple of tensors) - flattened_outputs = traced(*adapter.flattened_inputs) - # adapter knows the schema to convert it back (new_outputs == outputs) - new_outputs = adapter.outputs_schema(flattened_outputs) - """ - - flattened_inputs: Tuple[torch.Tensor] = None - """ - Flattened version of inputs given to this class's constructor. - """ - - inputs_schema: Schema = None - """ - Schema of the inputs given to this class's constructor. - """ - - outputs_schema: Schema = None - """ - Schema of the output produced by calling the given model with inputs. - """ - - def __init__( - self, - model: nn.Module, - inputs, - inference_func: Optional[Callable] = None, - allow_non_tensor: bool = False, - ): - """ - Args: - model: an nn.Module - inputs: An input argument or a tuple of input arguments used to call model. - After flattening, it has to only consist of tensors. - inference_func: a callable that takes (model, *inputs), calls the - model with inputs, and return outputs. By default it - is ``lambda model, *inputs: model(*inputs)``. Can be override - if you need to call the model differently. - allow_non_tensor: allow inputs/outputs to contain non-tensor objects. - This option will filter out non-tensor objects to make the - model traceable, but ``inputs_schema``/``outputs_schema`` cannot be - used anymore because inputs/outputs cannot be rebuilt from pure tensors. - This is useful when you're only interested in the single trace of - execution (e.g. for flop count), but not interested in - generalizing the traced graph to new inputs. - """ - super().__init__() - if isinstance(model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)): - model = model.module - self.model = model - if not isinstance(inputs, tuple): - inputs = (inputs,) - self.inputs = inputs - self.allow_non_tensor = allow_non_tensor - - if inference_func is None: - inference_func = lambda model, *inputs: model(*inputs) # noqa - self.inference_func = inference_func - - self.flattened_inputs, self.inputs_schema = flatten_to_tuple(inputs) - - if all(isinstance(x, torch.Tensor) for x in self.flattened_inputs): - return - if self.allow_non_tensor: - self.flattened_inputs = tuple( - [x for x in self.flattened_inputs if isinstance(x, torch.Tensor)] - ) - self.inputs_schema = None - else: - for input in self.flattened_inputs: - if not isinstance(input, torch.Tensor): - raise ValueError( - "Inputs for tracing must only contain tensors. " - f"Got a {type(input)} instead." - ) - - def forward(self, *args: torch.Tensor): - with torch.no_grad(), patch_builtin_len(): - if self.inputs_schema is not None: - inputs_orig_format = self.inputs_schema(args) - else: - if len(args) != len(self.flattened_inputs) or any( - x is not y for x, y in zip(args, self.flattened_inputs) - ): - raise ValueError( - "TracingAdapter does not contain valid inputs_schema." - " So it cannot generalize to other inputs and must be" - " traced with `.flattened_inputs`." - ) - inputs_orig_format = self.inputs - - outputs = self.inference_func(self.model, *inputs_orig_format) - flattened_outputs, schema = flatten_to_tuple(outputs) - - flattened_output_tensors = tuple( - [x for x in flattened_outputs if isinstance(x, torch.Tensor)] - ) - if len(flattened_output_tensors) < len(flattened_outputs): - if self.allow_non_tensor: - flattened_outputs = flattened_output_tensors - self.outputs_schema = None - else: - raise ValueError( - "Model cannot be traced because some model outputs " - "cannot flatten to tensors." - ) - else: # schema is valid - if self.outputs_schema is None: - self.outputs_schema = schema - else: - assert self.outputs_schema == schema, ( - "Model should always return outputs with the same " - "structure so it can be traced!" - ) - return flattened_outputs - - def _create_wrapper(self, traced_model): - """ - Return a function that has an input/output interface the same as the - original model, but it calls the given traced model under the hood. - """ - - def forward(*args): - flattened_inputs, _ = flatten_to_tuple(args) - flattened_outputs = traced_model(*flattened_inputs) - return self.outputs_schema(flattened_outputs) - - return forward diff --git a/detectron2/detectron2/export/shared.py b/detectron2/detectron2/export/shared.py deleted file mode 100644 index 5d8a3b551a5dc698f5c4c09d2dc7a02052a61ca2..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/shared.py +++ /dev/null @@ -1,1040 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import collections -import copy -import functools -import logging -import numpy as np -import os -from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from unittest import mock -import caffe2.python.utils as putils -import torch -import torch.nn.functional as F -from caffe2.proto import caffe2_pb2 -from caffe2.python import core, net_drawer, workspace -from torch.nn.functional import interpolate as interp - -logger = logging.getLogger(__name__) - - -# ==== torch/utils_toffee/cast.py ======================================= - - -def to_device(t, device_str): - """ - This function is a replacement of .to(another_device) such that it allows the - casting to be traced properly by explicitly calling the underlying copy ops. - It also avoids introducing unncessary op when casting to the same device. - """ - src = t.device - dst = torch.device(device_str) - - if src == dst: - return t - elif src.type == "cuda" and dst.type == "cpu": - return torch.ops._caffe2.CopyGPUToCPU(t) - elif src.type == "cpu" and dst.type == "cuda": - return torch.ops._caffe2.CopyCPUToGPU(t) - else: - raise RuntimeError("Can't cast tensor from device {} to device {}".format(src, dst)) - - -# ==== torch/utils_toffee/interpolate.py ======================================= - - -# Note: borrowed from vision/detection/fair/detectron/detectron/modeling/detector.py -def BilinearInterpolation(tensor_in, up_scale): - assert up_scale % 2 == 0, "Scale should be even" - - def upsample_filt(size): - factor = (size + 1) // 2 - if size % 2 == 1: - center = factor - 1 - else: - center = factor - 0.5 - - og = np.ogrid[:size, :size] - return (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor) - - kernel_size = int(up_scale) * 2 - bil_filt = upsample_filt(kernel_size) - - dim = int(tensor_in.shape[1]) - kernel = np.zeros((dim, dim, kernel_size, kernel_size), dtype=np.float32) - kernel[range(dim), range(dim), :, :] = bil_filt - - tensor_out = F.conv_transpose2d( - tensor_in, - weight=to_device(torch.Tensor(kernel), tensor_in.device), - bias=None, - stride=int(up_scale), - padding=int(up_scale / 2), - ) - - return tensor_out - - -# NOTE: ONNX is incompatible with traced torch.nn.functional.interpolate if -# using dynamic `scale_factor` rather than static `size`. (T43166860) -# NOTE: Caffe2 Int8 conversion might not be able to quantize `size` properly. -def onnx_compatibale_interpolate( - input, size=None, scale_factor=None, mode="nearest", align_corners=None -): - # NOTE: The input dimensions are interpreted in the form: - # `mini-batch x channels x [optional depth] x [optional height] x width`. - if size is None and scale_factor is not None: - if input.dim() == 4: - if isinstance(scale_factor, (int, float)): - height_scale, width_scale = (scale_factor, scale_factor) - else: - assert isinstance(scale_factor, (tuple, list)) - assert len(scale_factor) == 2 - height_scale, width_scale = scale_factor - - assert not align_corners, "No matching C2 op for align_corners == True" - if mode == "nearest": - return torch.ops._caffe2.ResizeNearest( - input, order="NCHW", width_scale=width_scale, height_scale=height_scale - ) - elif mode == "bilinear": - logger.warning( - "Use F.conv_transpose2d for bilinear interpolate" - " because there's no such C2 op, this may cause significant" - " slowdown and the boundary pixels won't be as same as" - " using F.interpolate due to padding." - ) - assert height_scale == width_scale - return BilinearInterpolation(input, up_scale=height_scale) - logger.warning("Output size is not static, it might cause ONNX conversion issue") - - return interp(input, size, scale_factor, mode, align_corners) - - -def mock_torch_nn_functional_interpolate(): - def decorator(func): - @functools.wraps(func) - def _mock_torch_nn_functional_interpolate(*args, **kwargs): - if torch.onnx.is_in_onnx_export(): - with mock.patch( - "torch.nn.functional.interpolate", side_effect=onnx_compatibale_interpolate - ): - return func(*args, **kwargs) - else: - return func(*args, **kwargs) - - return _mock_torch_nn_functional_interpolate - - return decorator - - -# ==== torch/utils_caffe2/ws_utils.py ========================================== - - -class ScopedWS: - def __init__(self, ws_name, is_reset, is_cleanup=False): - self.ws_name = ws_name - self.is_reset = is_reset - self.is_cleanup = is_cleanup - self.org_ws = "" - - def __enter__(self): - self.org_ws = workspace.CurrentWorkspace() - if self.ws_name is not None: - workspace.SwitchWorkspace(self.ws_name, True) - if self.is_reset: - workspace.ResetWorkspace() - - return workspace - - def __exit__(self, *args): - if self.is_cleanup: - workspace.ResetWorkspace() - if self.ws_name is not None: - workspace.SwitchWorkspace(self.org_ws) - - -def fetch_any_blob(name): - bb = None - try: - bb = workspace.FetchBlob(name) - except TypeError: - bb = workspace.FetchInt8Blob(name) - except Exception as e: - logger.error("Get blob {} error: {}".format(name, e)) - - return bb - - -# ==== torch/utils_caffe2/protobuf.py ========================================== - - -def get_pb_arg(pb, arg_name): - for x in pb.arg: - if x.name == arg_name: - return x - return None - - -def get_pb_arg_valf(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return arg.f if arg is not None else default_val - - -def get_pb_arg_floats(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return list(map(float, arg.floats)) if arg is not None else default_val - - -def get_pb_arg_ints(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return list(map(int, arg.ints)) if arg is not None else default_val - - -def get_pb_arg_vali(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return arg.i if arg is not None else default_val - - -def get_pb_arg_vals(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return arg.s if arg is not None else default_val - - -def get_pb_arg_valstrings(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return list(arg.strings) if arg is not None else default_val - - -def check_set_pb_arg(pb, arg_name, arg_attr, arg_value, allow_override=False): - arg = get_pb_arg(pb, arg_name) - if arg is None: - arg = putils.MakeArgument(arg_name, arg_value) - assert hasattr(arg, arg_attr) - pb.arg.extend([arg]) - if allow_override and getattr(arg, arg_attr) != arg_value: - logger.warning( - "Override argument {}: {} -> {}".format(arg_name, getattr(arg, arg_attr), arg_value) - ) - setattr(arg, arg_attr, arg_value) - else: - assert arg is not None - assert getattr(arg, arg_attr) == arg_value, "Existing value {}, new value {}".format( - getattr(arg, arg_attr), arg_value - ) - - -def _create_const_fill_op_from_numpy(name, tensor, device_option=None): - assert type(tensor) is np.ndarray - kTypeNameMapper = { - np.dtype("float32"): "GivenTensorFill", - np.dtype("int32"): "GivenTensorIntFill", - np.dtype("int64"): "GivenTensorInt64Fill", - np.dtype("uint8"): "GivenTensorStringFill", - } - - args_dict = {} - if tensor.dtype == np.dtype("uint8"): - args_dict.update({"values": [str(tensor.data)], "shape": [1]}) - else: - args_dict.update({"values": tensor, "shape": tensor.shape}) - - if device_option is not None: - args_dict["device_option"] = device_option - - return core.CreateOperator(kTypeNameMapper[tensor.dtype], [], [name], **args_dict) - - -def _create_const_fill_op_from_c2_int8_tensor(name, int8_tensor): - assert type(int8_tensor) is workspace.Int8Tensor - kTypeNameMapper = { - np.dtype("int32"): "Int8GivenIntTensorFill", - np.dtype("uint8"): "Int8GivenTensorFill", - } - - tensor = int8_tensor.data - assert tensor.dtype in [np.dtype("uint8"), np.dtype("int32")] - values = tensor.tobytes() if tensor.dtype == np.dtype("uint8") else tensor - - return core.CreateOperator( - kTypeNameMapper[tensor.dtype], - [], - [name], - values=values, - shape=tensor.shape, - Y_scale=int8_tensor.scale, - Y_zero_point=int8_tensor.zero_point, - ) - - -def create_const_fill_op( - name: str, - blob: Union[np.ndarray, workspace.Int8Tensor], - device_option: Optional[caffe2_pb2.DeviceOption] = None, -) -> caffe2_pb2.OperatorDef: - """ - Given a blob object, return the Caffe2 operator that creates this blob - as constant. Currently support NumPy tensor and Caffe2 Int8Tensor. - """ - - tensor_type = type(blob) - assert tensor_type in [ - np.ndarray, - workspace.Int8Tensor, - ], 'Error when creating const fill op for "{}", unsupported blob type: {}'.format( - name, type(blob) - ) - - if tensor_type == np.ndarray: - return _create_const_fill_op_from_numpy(name, blob, device_option) - elif tensor_type == workspace.Int8Tensor: - assert device_option is None - return _create_const_fill_op_from_c2_int8_tensor(name, blob) - - -def construct_init_net_from_params( - params: Dict[str, Any], device_options: Optional[Dict[str, caffe2_pb2.DeviceOption]] = None -) -> caffe2_pb2.NetDef: - """ - Construct the init_net from params dictionary - """ - init_net = caffe2_pb2.NetDef() - device_options = device_options or {} - for name, blob in params.items(): - if isinstance(blob, str): - logger.warning( - ( - "Blob {} with type {} is not supported in generating init net," - " skipped.".format(name, type(blob)) - ) - ) - continue - init_net.op.extend( - [create_const_fill_op(name, blob, device_option=device_options.get(name, None))] - ) - init_net.external_output.append(name) - return init_net - - -def get_producer_map(ssa): - """ - Return dict from versioned blob to (i, j), - where i is index of producer op, j is the index of output of that op. - """ - producer_map = {} - for i in range(len(ssa)): - outputs = ssa[i][1] - for j, outp in enumerate(outputs): - producer_map[outp] = (i, j) - return producer_map - - -def get_consumer_map(ssa): - """ - Return dict from versioned blob to list of (i, j), - where i is index of consumer op, j is the index of input of that op. - """ - consumer_map = collections.defaultdict(list) - for i in range(len(ssa)): - inputs = ssa[i][0] - for j, inp in enumerate(inputs): - consumer_map[inp].append((i, j)) - return consumer_map - - -def get_params_from_init_net( - init_net: caffe2_pb2.NetDef, -) -> [Dict[str, Any], Dict[str, caffe2_pb2.DeviceOption]]: - """ - Take the output blobs from init_net by running it. - Outputs: - params: dict from blob name to numpy array - device_options: dict from blob name to the device option of its creating op - """ - - # NOTE: this assumes that the params is determined by producer op with the - # only exception be CopyGPUToCPU which is CUDA op but returns CPU tensor. - def _get_device_option(producer_op): - if producer_op.type == "CopyGPUToCPU": - return caffe2_pb2.DeviceOption() - else: - return producer_op.device_option - - with ScopedWS("__get_params_from_init_net__", is_reset=True, is_cleanup=True) as ws: - ws.RunNetOnce(init_net) - params = {b: fetch_any_blob(b) for b in init_net.external_output} - ssa, versions = core.get_ssa(init_net) - producer_map = get_producer_map(ssa) - device_options = { - b: _get_device_option(init_net.op[producer_map[(b, versions[b])][0]]) - for b in init_net.external_output - } - return params, device_options - - -def _updater_raise(op, input_types, output_types): - raise RuntimeError( - "Failed to apply updater for op {} given input_types {} and" - " output_types {}".format(op, input_types, output_types) - ) - - -def _generic_status_identifier( - predict_net: caffe2_pb2.NetDef, - status_updater: Callable, - known_status: Dict[Tuple[str, int], Any], -) -> Dict[Tuple[str, int], Any]: - """ - Statically infer the status of each blob, the status can be such as device type - (CPU/GPU), layout (NCHW/NHWC), data type (float32/int8), etc. "Blob" here - is versioned blob (Tuple[str, int]) in the format compatible with ssa. - Inputs: - predict_net: the caffe2 network - status_updater: a callable, given an op and the status of its input/output, - it returns the updated status of input/output. `None` is used for - representing unknown status. - known_status: a dict containing known status, used as initialization. - Outputs: - A dict mapping from versioned blob to its status - """ - ssa, versions = core.get_ssa(predict_net) - versioned_ext_input = [(b, 0) for b in predict_net.external_input] - versioned_ext_output = [(b, versions[b]) for b in predict_net.external_output] - all_versioned_blobs = set().union(*[set(x[0] + x[1]) for x in ssa]) - - allowed_vbs = all_versioned_blobs.union(versioned_ext_input).union(versioned_ext_output) - assert all(k in allowed_vbs for k in known_status) - assert all(v is not None for v in known_status.values()) - _known_status = copy.deepcopy(known_status) - - def _check_and_update(key, value): - assert value is not None - if key in _known_status: - if not _known_status[key] == value: - raise RuntimeError( - "Confilict status for {}, existing status {}, new status {}".format( - key, _known_status[key], value - ) - ) - _known_status[key] = value - - def _update_i(op, ssa_i): - versioned_inputs = ssa_i[0] - versioned_outputs = ssa_i[1] - - inputs_status = [_known_status.get(b, None) for b in versioned_inputs] - outputs_status = [_known_status.get(b, None) for b in versioned_outputs] - - new_inputs_status, new_outputs_status = status_updater(op, inputs_status, outputs_status) - - for versioned_blob, status in zip( - versioned_inputs + versioned_outputs, new_inputs_status + new_outputs_status - ): - if status is not None: - _check_and_update(versioned_blob, status) - - for op, ssa_i in zip(predict_net.op, ssa): - _update_i(op, ssa_i) - for op, ssa_i in zip(reversed(predict_net.op), reversed(ssa)): - _update_i(op, ssa_i) - - # NOTE: This strictly checks all the blob from predict_net must be assgined - # a known status. However sometimes it's impossible (eg. having deadend op), - # we may relax this constraint if - for k in all_versioned_blobs: - if k not in _known_status: - raise NotImplementedError( - "Can not infer the status for {}. Currently only support the case where" - " a single forward and backward pass can identify status for all blobs.".format(k) - ) - - return _known_status - - -def infer_device_type( - predict_net: caffe2_pb2.NetDef, - known_status: Dict[Tuple[str, int], Any], - device_name_style: str = "caffe2", -) -> Dict[Tuple[str, int], str]: - """Return the device type ("cpu" or "gpu"/"cuda") of each (versioned) blob""" - - assert device_name_style in ["caffe2", "pytorch"] - _CPU_STR = "cpu" - _GPU_STR = "gpu" if device_name_style == "caffe2" else "cuda" - - def _copy_cpu_to_gpu_updater(op, input_types, output_types): - if input_types[0] == _GPU_STR or output_types[0] == _CPU_STR: - _updater_raise(op, input_types, output_types) - return ([_CPU_STR], [_GPU_STR]) - - def _copy_gpu_to_cpu_updater(op, input_types, output_types): - if input_types[0] == _CPU_STR or output_types[0] == _GPU_STR: - _updater_raise(op, input_types, output_types) - return ([_GPU_STR], [_CPU_STR]) - - def _other_ops_updater(op, input_types, output_types): - non_none_types = [x for x in input_types + output_types if x is not None] - if len(non_none_types) > 0: - the_type = non_none_types[0] - if not all(x == the_type for x in non_none_types): - _updater_raise(op, input_types, output_types) - else: - the_type = None - return ([the_type for _ in op.input], [the_type for _ in op.output]) - - def _device_updater(op, *args, **kwargs): - return { - "CopyCPUToGPU": _copy_cpu_to_gpu_updater, - "CopyGPUToCPU": _copy_gpu_to_cpu_updater, - }.get(op.type, _other_ops_updater)(op, *args, **kwargs) - - return _generic_status_identifier(predict_net, _device_updater, known_status) - - -# ==== torch/utils_caffe2/vis.py =============================================== - - -def _modify_blob_names(ops, blob_rename_f): - ret = [] - - def _replace_list(blob_list, replaced_list): - del blob_list[:] - blob_list.extend(replaced_list) - - for x in ops: - cur = copy.deepcopy(x) - _replace_list(cur.input, list(map(blob_rename_f, cur.input))) - _replace_list(cur.output, list(map(blob_rename_f, cur.output))) - ret.append(cur) - - return ret - - -def _rename_blob(name, blob_sizes, blob_ranges): - def _list_to_str(bsize): - ret = ", ".join([str(x) for x in bsize]) - ret = "[" + ret + "]" - return ret - - ret = name - if blob_sizes is not None and name in blob_sizes: - ret += "\n" + _list_to_str(blob_sizes[name]) - if blob_ranges is not None and name in blob_ranges: - ret += "\n" + _list_to_str(blob_ranges[name]) - - return ret - - -# graph_name could not contain word 'graph' -def save_graph(net, file_name, graph_name="net", op_only=True, blob_sizes=None, blob_ranges=None): - blob_rename_f = functools.partial(_rename_blob, blob_sizes=blob_sizes, blob_ranges=blob_ranges) - return save_graph_base(net, file_name, graph_name, op_only, blob_rename_f) - - -def save_graph_base(net, file_name, graph_name="net", op_only=True, blob_rename_func=None): - graph = None - ops = net.op - if blob_rename_func is not None: - ops = _modify_blob_names(ops, blob_rename_func) - if not op_only: - graph = net_drawer.GetPydotGraph(ops, graph_name, rankdir="TB") - else: - graph = net_drawer.GetPydotGraphMinimal( - ops, graph_name, rankdir="TB", minimal_dependency=True - ) - - try: - par_dir = os.path.dirname(file_name) - if not os.path.exists(par_dir): - os.makedirs(par_dir) - - format = os.path.splitext(os.path.basename(file_name))[-1] - if format == ".png": - graph.write_png(file_name) - elif format == ".pdf": - graph.write_pdf(file_name) - elif format == ".svg": - graph.write_svg(file_name) - else: - print("Incorrect format {}".format(format)) - except Exception as e: - print("Error when writing graph to image {}".format(e)) - - return graph - - -# ==== torch/utils_toffee/aten_to_caffe2.py ==================================== - - -def group_norm_replace_aten_with_caffe2(predict_net: caffe2_pb2.NetDef): - """ - For ONNX exported model, GroupNorm will be represented as ATen op, - this can be a drop in replacement from ATen to GroupNorm - """ - count = 0 - for op in predict_net.op: - if op.type == "ATen": - op_name = get_pb_arg_vals(op, "operator", None) # return byte in py3 - if op_name and op_name.decode() == "group_norm": - op.arg.remove(get_pb_arg(op, "operator")) - - if get_pb_arg_vali(op, "cudnn_enabled", None): - op.arg.remove(get_pb_arg(op, "cudnn_enabled")) - - num_groups = get_pb_arg_vali(op, "num_groups", None) - if num_groups is not None: - op.arg.remove(get_pb_arg(op, "num_groups")) - check_set_pb_arg(op, "group", "i", num_groups) - - op.type = "GroupNorm" - count += 1 - if count > 1: - logger.info("Replaced {} ATen operator to GroupNormOp".format(count)) - - -# ==== torch/utils_toffee/alias.py ============================================= - - -def alias(x, name, is_backward=False): - if not torch.onnx.is_in_onnx_export(): - return x - assert isinstance(x, torch.Tensor) - return torch.ops._caffe2.AliasWithName(x, name, is_backward=is_backward) - - -def fuse_alias_placeholder(predict_net, init_net): - """Remove AliasWithName placeholder and rename the input/output of it""" - # First we finish all the re-naming - for i, op in enumerate(predict_net.op): - if op.type == "AliasWithName": - assert len(op.input) == 1 - assert len(op.output) == 1 - name = get_pb_arg_vals(op, "name", None).decode() - is_backward = bool(get_pb_arg_vali(op, "is_backward", 0)) - rename_op_input(predict_net, init_net, i, 0, name, from_producer=is_backward) - rename_op_output(predict_net, i, 0, name) - - # Remove AliasWithName, should be very safe since it's a non-op - new_ops = [] - for op in predict_net.op: - if op.type != "AliasWithName": - new_ops.append(op) - else: - # safety check - assert op.input == op.output - assert op.input[0] == op.arg[0].s.decode() - del predict_net.op[:] - predict_net.op.extend(new_ops) - - -# ==== torch/utils_caffe2/graph_transform.py =================================== - - -class IllegalGraphTransformError(ValueError): - """When a graph transform function call can't be executed.""" - - -def _rename_versioned_blob_in_proto( - proto: caffe2_pb2.NetDef, - old_name: str, - new_name: str, - version: int, - ssa: List[Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]], - start_versions: Dict[str, int], - end_versions: Dict[str, int], -): - """In given proto, rename all blobs with matched version""" - # Operater list - for op, i_th_ssa in zip(proto.op, ssa): - versioned_inputs, versioned_outputs = i_th_ssa - for i in range(len(op.input)): - if versioned_inputs[i] == (old_name, version): - op.input[i] = new_name - for i in range(len(op.output)): - if versioned_outputs[i] == (old_name, version): - op.output[i] = new_name - # external_input - if start_versions.get(old_name, 0) == version: - for i in range(len(proto.external_input)): - if proto.external_input[i] == old_name: - proto.external_input[i] = new_name - # external_output - if end_versions.get(old_name, 0) == version: - for i in range(len(proto.external_output)): - if proto.external_output[i] == old_name: - proto.external_output[i] = new_name - - -def rename_op_input( - predict_net: caffe2_pb2.NetDef, - init_net: caffe2_pb2.NetDef, - op_id: int, - input_id: int, - new_name: str, - from_producer: bool = False, -): - """ - Rename the op_id-th operator in predict_net, change it's input_id-th input's - name to the new_name. It also does automatic re-route and change - external_input and init_net if necessary. - - It requires the input is only consumed by this op. - - This function modifies predict_net and init_net in-place. - - When from_producer is enable, this also updates other operators that consumes - the same input. Be cautious because may trigger unintended behavior. - """ - assert isinstance(predict_net, caffe2_pb2.NetDef) - assert isinstance(init_net, caffe2_pb2.NetDef) - - init_net_ssa, init_net_versions = core.get_ssa(init_net) - predict_net_ssa, predict_net_versions = core.get_ssa( - predict_net, copy.deepcopy(init_net_versions) - ) - - versioned_inputs, versioned_outputs = predict_net_ssa[op_id] - old_name, version = versioned_inputs[input_id] - - if from_producer: - producer_map = get_producer_map(predict_net_ssa) - if not (old_name, version) in producer_map: - raise NotImplementedError( - "Can't find producer, the input {} is probably from" - " init_net, this is not supported yet.".format(old_name) - ) - producer = producer_map[(old_name, version)] - rename_op_output(predict_net, producer[0], producer[1], new_name) - return - - def contain_targets(op_ssa): - return (old_name, version) in op_ssa[0] - - is_consumer = [contain_targets(op_ssa) for op_ssa in predict_net_ssa] - if sum(is_consumer) > 1: - raise IllegalGraphTransformError( - ( - "Input '{}' of operator(#{}) are consumed by other ops, please use" - + " rename_op_output on the producer instead. Offending op: \n{}" - ).format(old_name, op_id, predict_net.op[op_id]) - ) - - # update init_net - _rename_versioned_blob_in_proto( - init_net, old_name, new_name, version, init_net_ssa, {}, init_net_versions - ) - # update predict_net - _rename_versioned_blob_in_proto( - predict_net, - old_name, - new_name, - version, - predict_net_ssa, - init_net_versions, - predict_net_versions, - ) - - -def rename_op_output(predict_net: caffe2_pb2.NetDef, op_id: int, output_id: int, new_name: str): - """ - Rename the op_id-th operator in predict_net, change it's output_id-th input's - name to the new_name. It also does automatic re-route and change - external_output and if necessary. - - It allows multiple consumers of its output. - - This function modifies predict_net in-place, doesn't need init_net. - """ - assert isinstance(predict_net, caffe2_pb2.NetDef) - - ssa, blob_versions = core.get_ssa(predict_net) - - versioned_inputs, versioned_outputs = ssa[op_id] - old_name, version = versioned_outputs[output_id] - - # update predict_net - _rename_versioned_blob_in_proto( - predict_net, old_name, new_name, version, ssa, {}, blob_versions - ) - - -def get_sub_graph_external_input_output( - predict_net: caffe2_pb2.NetDef, sub_graph_op_indices: List[int] -) -> Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]: - """ - Return the list of external input/output of sub-graph, - each element is tuple of the name and corresponding version in predict_net. - - external input/output is defined the same way as caffe2 NetDef. - """ - ssa, versions = core.get_ssa(predict_net) - - all_inputs = [] - all_outputs = [] - for op_id in sub_graph_op_indices: - all_inputs += [inp for inp in ssa[op_id][0] if inp not in all_inputs] - all_outputs += list(ssa[op_id][1]) # ssa output won't repeat - - # for versioned blobs, external inputs are just those blob in all_inputs - # but not in all_outputs - ext_inputs = [inp for inp in all_inputs if inp not in all_outputs] - - # external outputs are essentially outputs of this subgraph that are used - # outside of this sub-graph (including predict_net.external_output) - all_other_inputs = sum( - (ssa[i][0] for i in range(len(ssa)) if i not in sub_graph_op_indices), - [(outp, versions[outp]) for outp in predict_net.external_output], - ) - ext_outputs = [outp for outp in all_outputs if outp in set(all_other_inputs)] - - return ext_inputs, ext_outputs - - -class DiGraph: - """A DAG representation of caffe2 graph, each vertice is a versioned blob.""" - - def __init__(self): - self.vertices = set() - self.graph = collections.defaultdict(list) - - def add_edge(self, u, v): - self.graph[u].append(v) - self.vertices.add(u) - self.vertices.add(v) - - # grab from https://www.geeksforgeeks.org/find-paths-given-source-destination/ - def get_all_paths(self, s, d): - visited = {k: False for k in self.vertices} - path = [] - all_paths = [] - - def _get_all_paths_util(graph, u, d, visited, path): - visited[u] = True - path.append(u) - if u == d: - all_paths.append(copy.deepcopy(path)) - else: - for i in graph[u]: - if not visited[i]: - _get_all_paths_util(graph, i, d, visited, path) - path.pop() - visited[u] = False - - _get_all_paths_util(self.graph, s, d, visited, path) - return all_paths - - @staticmethod - def from_ssa(ssa): - graph = DiGraph() - for op_id in range(len(ssa)): - for inp in ssa[op_id][0]: - for outp in ssa[op_id][1]: - graph.add_edge(inp, outp) - return graph - - -def _get_dependency_chain(ssa, versioned_target, versioned_source): - """ - Return the index list of relevant operator to produce target blob from source blob, - if there's no dependency, return empty list. - """ - - # finding all paths between nodes can be O(N!), thus we can only search - # in the subgraph using the op starting from the first consumer of source blob - # to the producer of the target blob. - consumer_map = get_consumer_map(ssa) - producer_map = get_producer_map(ssa) - start_op = min(x[0] for x in consumer_map[versioned_source]) - 15 - end_op = ( - producer_map[versioned_target][0] + 15 if versioned_target in producer_map else start_op - ) - sub_graph_ssa = ssa[start_op : end_op + 1] - if len(sub_graph_ssa) > 30: - logger.warning( - "Subgraph bebetween {} and {} is large (from op#{} to op#{}), it" - " might take non-trival time to find all paths between them.".format( - versioned_source, versioned_target, start_op, end_op - ) - ) - - dag = DiGraph.from_ssa(sub_graph_ssa) - paths = dag.get_all_paths(versioned_source, versioned_target) # include two ends - ops_in_paths = [[producer_map[blob][0] for blob in path[1:]] for path in paths] - return sorted(set().union(*[set(ops) for ops in ops_in_paths])) - - -def identify_reshape_sub_graph(predict_net: caffe2_pb2.NetDef) -> List[List[int]]: - """ - Idenfity the reshape sub-graph in a protobuf. - The reshape sub-graph is defined as matching the following pattern: - - (input_blob) -> Op_1 -> ... -> Op_N -> (new_shape) -─┐ - β””-------------------------------------------> Reshape -> (output_blob) - - Return: - List of sub-graphs, each sub-graph is represented as a list of indices - of the relavent ops, [Op_1, Op_2, ..., Op_N, Reshape] - """ - - ssa, _ = core.get_ssa(predict_net) - - ret = [] - for i, op in enumerate(predict_net.op): - if op.type == "Reshape": - assert len(op.input) == 2 - input_ssa = ssa[i][0] - data_source = input_ssa[0] - shape_source = input_ssa[1] - op_indices = _get_dependency_chain(ssa, shape_source, data_source) - ret.append(op_indices + [i]) - return ret - - -def remove_reshape_for_fc(predict_net, params): - """ - In PyTorch nn.Linear has to take 2D tensor, this often leads to reshape - a 4D tensor to 2D by calling .view(). However this (dynamic) reshaping - doesn't work well with ONNX and Int8 tools, and cause using extra - ops (eg. ExpandDims) that might not be available on mobile. - Luckily Caffe2 supports 4D tensor for FC, so we can remove those reshape - after exporting ONNX model. - """ - from caffe2.python import core - - # find all reshape sub-graph that can be removed, which is now all Reshape - # sub-graph whose output is only consumed by FC. - # TODO: to make it safer, we may need the actually value to better determine - # if a Reshape before FC is removable. - reshape_sub_graphs = identify_reshape_sub_graph(predict_net) - sub_graphs_to_remove = [] - for reshape_sub_graph in reshape_sub_graphs: - reshape_op_id = reshape_sub_graph[-1] - assert predict_net.op[reshape_op_id].type == "Reshape" - ssa, _ = core.get_ssa(predict_net) - reshape_output = ssa[reshape_op_id][1][0] - consumers = [i for i in range(len(ssa)) if reshape_output in ssa[i][0]] - if all(predict_net.op[consumer].type == "FC" for consumer in consumers): - # safety check if the sub-graph is isolated, for this reshape sub-graph, - # it means it has one non-param external input and one external output. - ext_inputs, ext_outputs = get_sub_graph_external_input_output( - predict_net, reshape_sub_graph - ) - non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0] - if len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1: - sub_graphs_to_remove.append(reshape_sub_graph) - - # perform removing subgraph by: - # 1: rename the Reshape's output to its input, then the graph can be - # seen as in-place itentify, meaning whose external input/output are the same. - # 2: simply remove those ops. - remove_op_ids = [] - params_to_remove = [] - for sub_graph in sub_graphs_to_remove: - logger.info( - "Remove Reshape sub-graph:\n{}".format( - "".join(["(#{:>4})\n{}".format(i, predict_net.op[i]) for i in sub_graph]) - ) - ) - reshape_op_id = sub_graph[-1] - new_reshap_output = predict_net.op[reshape_op_id].input[0] - rename_op_output(predict_net, reshape_op_id, 0, new_reshap_output) - ext_inputs, ext_outputs = get_sub_graph_external_input_output(predict_net, sub_graph) - non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0] - params_ext_inputs = [inp for inp in ext_inputs if inp[1] == 0] - assert len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1 - assert ext_outputs[0][0] == non_params_ext_inputs[0][0] - assert ext_outputs[0][1] == non_params_ext_inputs[0][1] + 1 - remove_op_ids.extend(sub_graph) - params_to_remove.extend(params_ext_inputs) - - predict_net = copy.deepcopy(predict_net) - new_ops = [op for i, op in enumerate(predict_net.op) if i not in remove_op_ids] - del predict_net.op[:] - predict_net.op.extend(new_ops) - for versioned_params in params_to_remove: - name = versioned_params[0] - logger.info("Remove params: {} from init_net and predict_net.external_input".format(name)) - del params[name] - predict_net.external_input.remove(name) - - return predict_net, params - - -def fuse_copy_between_cpu_and_gpu(predict_net: caffe2_pb2.NetDef): - """ - In-place fuse extra copy ops between cpu/gpu for the following case: - a -CopyAToB-> b -CopyBToA> c1 -NextOp1-> d1 - -CopyBToA> c2 -NextOp2-> d2 - The fused network will look like: - a -NextOp1-> d1 - -NextOp2-> d2 - """ - - _COPY_OPS = ["CopyCPUToGPU", "CopyGPUToCPU"] - - def _fuse_once(predict_net): - ssa, blob_versions = core.get_ssa(predict_net) - consumer_map = get_consumer_map(ssa) - versioned_external_output = [ - (name, blob_versions[name]) for name in predict_net.external_output - ] - - for op_id, op in enumerate(predict_net.op): - if op.type in _COPY_OPS: - fw_copy_versioned_output = ssa[op_id][1][0] - consumer_ids = [x[0] for x in consumer_map[fw_copy_versioned_output]] - reverse_op_type = _COPY_OPS[1 - _COPY_OPS.index(op.type)] - - is_fusable = ( - len(consumer_ids) > 0 - and fw_copy_versioned_output not in versioned_external_output - and all( - predict_net.op[_op_id].type == reverse_op_type - and ssa[_op_id][1][0] not in versioned_external_output - for _op_id in consumer_ids - ) - ) - - if is_fusable: - for rv_copy_op_id in consumer_ids: - # making each NextOp uses "a" directly and removing Copy ops - rs_copy_versioned_output = ssa[rv_copy_op_id][1][0] - next_op_id, inp_id = consumer_map[rs_copy_versioned_output][0] - predict_net.op[next_op_id].input[inp_id] = op.input[0] - # remove CopyOps - new_ops = [ - op - for i, op in enumerate(predict_net.op) - if i != op_id and i not in consumer_ids - ] - del predict_net.op[:] - predict_net.op.extend(new_ops) - return True - - return False - - # _fuse_once returns False is nothing can be fused - while _fuse_once(predict_net): - pass - - -def remove_dead_end_ops(net_def: caffe2_pb2.NetDef): - """remove ops if its output is not used or not in external_output""" - ssa, versions = core.get_ssa(net_def) - versioned_external_output = [(name, versions[name]) for name in net_def.external_output] - consumer_map = get_consumer_map(ssa) - removed_op_ids = set() - - def _is_dead_end(versioned_blob): - return not ( - versioned_blob in versioned_external_output - or ( - len(consumer_map[versioned_blob]) > 0 - and all(x[0] not in removed_op_ids for x in consumer_map[versioned_blob]) - ) - ) - - for i, ssa_i in reversed(list(enumerate(ssa))): - versioned_outputs = ssa_i[1] - if all(_is_dead_end(outp) for outp in versioned_outputs): - removed_op_ids.add(i) - - # simply removing those deadend ops should have no effect to external_output - new_ops = [op for i, op in enumerate(net_def.op) if i not in removed_op_ids] - del net_def.op[:] - net_def.op.extend(new_ops) diff --git a/detectron2/detectron2/export/torchscript.py b/detectron2/detectron2/export/torchscript.py deleted file mode 100644 index 24fe59bda44225324928542df3f2ef1745375dfd..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/torchscript.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import os -import torch - -from detectron2.utils.file_io import PathManager - -from .torchscript_patch import freeze_training_mode, patch_instances - -__all__ = ["scripting_with_instances", "dump_torchscript_IR"] - - -def scripting_with_instances(model, fields): - """ - Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since - attributes of :class:`Instances` are "dynamically" added in eager mode,it is difficult - for scripting to support it out of the box. This function is made to support scripting - a model that uses :class:`Instances`. It does the following: - - 1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``, - but with all attributes been "static". - The attributes need to be statically declared in the ``fields`` argument. - 2. Register ``new_Instances``, and force scripting compiler to - use it when trying to compile ``Instances``. - - After this function, the process will be reverted. User should be able to script another model - using different fields. - - Example: - Assume that ``Instances`` in the model consist of two attributes named - ``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and - :class:`Tensor` respectively during inference. You can call this function like: - :: - fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor} - torchscipt_model = scripting_with_instances(model, fields) - - Note: - It only support models in evaluation mode. - - Args: - model (nn.Module): The input model to be exported by scripting. - fields (Dict[str, type]): Attribute names and corresponding type that - ``Instances`` will use in the model. Note that all attributes used in ``Instances`` - need to be added, regardless of whether they are inputs/outputs of the model. - Data type not defined in detectron2 is not supported for now. - - Returns: - torch.jit.ScriptModule: the model in torchscript format - """ - assert ( - not model.training - ), "Currently we only support exporting models in evaluation mode to torchscript" - - with freeze_training_mode(model), patch_instances(fields): - scripted_model = torch.jit.script(model) - return scripted_model - - -# alias for old name -export_torchscript_with_instances = scripting_with_instances - - -def dump_torchscript_IR(model, dir): - """ - Dump IR of a TracedModule/ScriptModule/Function in various format (code, graph, - inlined graph). Useful for debugging. - - Args: - model (TracedModule/ScriptModule/ScriptFUnction): traced or scripted module - dir (str): output directory to dump files. - """ - dir = os.path.expanduser(dir) - PathManager.mkdirs(dir) - - def _get_script_mod(mod): - if isinstance(mod, torch.jit.TracedModule): - return mod._actual_script_module - return mod - - # Dump pretty-printed code: https://pytorch.org/docs/stable/jit.html#inspecting-code - with PathManager.open(os.path.join(dir, "model_ts_code.txt"), "w") as f: - - def get_code(mod): - # Try a few ways to get code using private attributes. - try: - # This contains more information than just `mod.code` - return _get_script_mod(mod)._c.code - except AttributeError: - pass - try: - return mod.code - except AttributeError: - return None - - def dump_code(prefix, mod): - code = get_code(mod) - name = prefix or "root model" - if code is None: - f.write(f"Could not found code for {name} (type={mod.original_name})\n") - f.write("\n") - else: - f.write(f"\nCode for {name}, type={mod.original_name}:\n") - f.write(code) - f.write("\n") - f.write("-" * 80) - - for name, m in mod.named_children(): - dump_code(prefix + "." + name, m) - - if isinstance(model, torch.jit.ScriptFunction): - f.write(get_code(model)) - else: - dump_code("", model) - - def _get_graph(model): - try: - # Recursively dump IR of all modules - return _get_script_mod(model)._c.dump_to_str(True, False, False) - except AttributeError: - return model.graph.str() - - with PathManager.open(os.path.join(dir, "model_ts_IR.txt"), "w") as f: - f.write(_get_graph(model)) - - # Dump IR of the entire graph (all submodules inlined) - with PathManager.open(os.path.join(dir, "model_ts_IR_inlined.txt"), "w") as f: - f.write(str(model.inlined_graph)) - - if not isinstance(model, torch.jit.ScriptFunction): - # Dump the model structure in pytorch style - with PathManager.open(os.path.join(dir, "model.txt"), "w") as f: - f.write(str(model)) diff --git a/detectron2/detectron2/export/torchscript_patch.py b/detectron2/detectron2/export/torchscript_patch.py deleted file mode 100644 index da9b324f1582e31d1a16d2fe462ac2989bea56ea..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/export/torchscript_patch.py +++ /dev/null @@ -1,406 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import os -import sys -import tempfile -from contextlib import ExitStack, contextmanager -from copy import deepcopy -from unittest import mock -import torch -from torch import nn - -# need some explicit imports due to https://github.com/pytorch/pytorch/issues/38964 -import detectron2 # noqa F401 -from detectron2.structures import Boxes, Instances -from detectron2.utils.env import _import_file - -_counter = 0 - - -def _clear_jit_cache(): - from torch.jit._recursive import concrete_type_store - from torch.jit._state import _jit_caching_layer - - concrete_type_store.type_store.clear() # for modules - _jit_caching_layer.clear() # for free functions - - -def _add_instances_conversion_methods(newInstances): - """ - Add from_instances methods to the scripted Instances class. - """ - cls_name = newInstances.__name__ - - @torch.jit.unused - def from_instances(instances: Instances): - """ - Create scripted Instances from original Instances - """ - fields = instances.get_fields() - image_size = instances.image_size - ret = newInstances(image_size) - for name, val in fields.items(): - assert hasattr(ret, f"_{name}"), f"No attribute named {name} in {cls_name}" - setattr(ret, name, deepcopy(val)) - return ret - - newInstances.from_instances = from_instances - - -@contextmanager -def patch_instances(fields): - """ - A contextmanager, under which the Instances class in detectron2 is replaced - by a statically-typed scriptable class, defined by `fields`. - See more in `scripting_with_instances`. - """ - - with tempfile.TemporaryDirectory(prefix="detectron2") as dir, tempfile.NamedTemporaryFile( - mode="w", encoding="utf-8", suffix=".py", dir=dir, delete=False - ) as f: - try: - # Objects that use Instances should not reuse previously-compiled - # results in cache, because `Instances` could be a new class each time. - _clear_jit_cache() - - cls_name, s = _gen_instance_module(fields) - f.write(s) - f.flush() - f.close() - - module = _import(f.name) - new_instances = getattr(module, cls_name) - _ = torch.jit.script(new_instances) - # let torchscript think Instances was scripted already - Instances.__torch_script_class__ = True - # let torchscript find new_instances when looking for the jit type of Instances - Instances._jit_override_qualname = torch._jit_internal._qualified_name(new_instances) - - _add_instances_conversion_methods(new_instances) - yield new_instances - finally: - try: - del Instances.__torch_script_class__ - del Instances._jit_override_qualname - except AttributeError: - pass - sys.modules.pop(module.__name__) - - -def _gen_instance_class(fields): - """ - Args: - fields (dict[name: type]) - """ - - class _FieldType: - def __init__(self, name, type_): - assert isinstance(name, str), f"Field name must be str, got {name}" - self.name = name - self.type_ = type_ - self.annotation = f"{type_.__module__}.{type_.__name__}" - - fields = [_FieldType(k, v) for k, v in fields.items()] - - def indent(level, s): - return " " * 4 * level + s - - lines = [] - - global _counter - _counter += 1 - - cls_name = "ScriptedInstances{}".format(_counter) - - field_names = tuple(x.name for x in fields) - extra_args = ", ".join([f"{f.name}: Optional[{f.annotation}] = None" for f in fields]) - lines.append( - f""" -class {cls_name}: - def __init__(self, image_size: Tuple[int, int], {extra_args}): - self.image_size = image_size - self._field_names = {field_names} -""" - ) - - for f in fields: - lines.append( - indent(2, f"self._{f.name} = torch.jit.annotate(Optional[{f.annotation}], {f.name})") - ) - - for f in fields: - lines.append( - f""" - @property - def {f.name}(self) -> {f.annotation}: - # has to use a local for type refinement - # https://pytorch.org/docs/stable/jit_language_reference.html#optional-type-refinement - t = self._{f.name} - assert t is not None, "{f.name} is None and cannot be accessed!" - return t - - @{f.name}.setter - def {f.name}(self, value: {f.annotation}) -> None: - self._{f.name} = value -""" - ) - - # support method `__len__` - lines.append( - """ - def __len__(self) -> int: -""" - ) - for f in fields: - lines.append( - f""" - t = self._{f.name} - if t is not None: - return len(t) -""" - ) - lines.append( - """ - raise NotImplementedError("Empty Instances does not support __len__!") -""" - ) - - # support method `has` - lines.append( - """ - def has(self, name: str) -> bool: -""" - ) - for f in fields: - lines.append( - f""" - if name == "{f.name}": - return self._{f.name} is not None -""" - ) - lines.append( - """ - return False -""" - ) - - # support method `to` - none_args = ", None" * len(fields) - lines.append( - f""" - def to(self, device: torch.device) -> "{cls_name}": - ret = {cls_name}(self.image_size{none_args}) -""" - ) - for f in fields: - if hasattr(f.type_, "to"): - lines.append( - f""" - t = self._{f.name} - if t is not None: - ret._{f.name} = t.to(device) -""" - ) - else: - # For now, ignore fields that cannot be moved to devices. - # Maybe can support other tensor-like classes (e.g. __torch_function__) - pass - lines.append( - """ - return ret -""" - ) - - # support method `getitem` - none_args = ", None" * len(fields) - lines.append( - f""" - def __getitem__(self, item) -> "{cls_name}": - ret = {cls_name}(self.image_size{none_args}) -""" - ) - for f in fields: - lines.append( - f""" - t = self._{f.name} - if t is not None: - ret._{f.name} = t[item] -""" - ) - lines.append( - """ - return ret -""" - ) - - # support method `cat` - # this version does not contain checks that all instances have same size and fields - none_args = ", None" * len(fields) - lines.append( - f""" - def cat(self, instances: List["{cls_name}"]) -> "{cls_name}": - ret = {cls_name}(self.image_size{none_args}) -""" - ) - for f in fields: - lines.append( - f""" - t = self._{f.name} - if t is not None: - values: List[{f.annotation}] = [x.{f.name} for x in instances] - if torch.jit.isinstance(t, torch.Tensor): - ret._{f.name} = torch.cat(values, dim=0) - else: - ret._{f.name} = t.cat(values) -""" - ) - lines.append( - """ - return ret""" - ) - - # support method `get_fields()` - lines.append( - """ - def get_fields(self) -> Dict[str, Tensor]: - ret = {} - """ - ) - for f in fields: - if f.type_ == Boxes: - stmt = "t.tensor" - elif f.type_ == torch.Tensor: - stmt = "t" - else: - stmt = f'assert False, "unsupported type {str(f.type_)}"' - lines.append( - f""" - t = self._{f.name} - if t is not None: - ret["{f.name}"] = {stmt} - """ - ) - lines.append( - """ - return ret""" - ) - return cls_name, os.linesep.join(lines) - - -def _gen_instance_module(fields): - # TODO: find a more automatic way to enable import of other classes - s = """ -from copy import deepcopy -import torch -from torch import Tensor -import typing -from typing import * - -import detectron2 -from detectron2.structures import Boxes, Instances - -""" - - cls_name, cls_def = _gen_instance_class(fields) - s += cls_def - return cls_name, s - - -def _import(path): - return _import_file( - "{}{}".format(sys.modules[__name__].__name__, _counter), path, make_importable=True - ) - - -@contextmanager -def patch_builtin_len(modules=()): - """ - Patch the builtin len() function of a few detectron2 modules - to use __len__ instead, because __len__ does not convert values to - integers and therefore is friendly to tracing. - - Args: - modules (list[stsr]): names of extra modules to patch len(), in - addition to those in detectron2. - """ - - def _new_len(obj): - return obj.__len__() - - with ExitStack() as stack: - MODULES = [ - "detectron2.modeling.roi_heads.fast_rcnn", - "detectron2.modeling.roi_heads.mask_head", - "detectron2.modeling.roi_heads.keypoint_head", - ] + list(modules) - ctxs = [stack.enter_context(mock.patch(mod + ".len")) for mod in MODULES] - for m in ctxs: - m.side_effect = _new_len - yield - - -def patch_nonscriptable_classes(): - """ - Apply patches on a few nonscriptable detectron2 classes. - Should not have side-effects on eager usage. - """ - # __prepare_scriptable__ can also be added to models for easier maintenance. - # But it complicates the clean model code. - - from detectron2.modeling.backbone import ResNet, FPN - - # Due to https://github.com/pytorch/pytorch/issues/36061, - # we change backbone to use ModuleList for scripting. - # (note: this changes param names in state_dict) - - def prepare_resnet(self): - ret = deepcopy(self) - ret.stages = nn.ModuleList(ret.stages) - for k in self.stage_names: - delattr(ret, k) - return ret - - ResNet.__prepare_scriptable__ = prepare_resnet - - def prepare_fpn(self): - ret = deepcopy(self) - ret.lateral_convs = nn.ModuleList(ret.lateral_convs) - ret.output_convs = nn.ModuleList(ret.output_convs) - for name, _ in self.named_children(): - if name.startswith("fpn_"): - delattr(ret, name) - return ret - - FPN.__prepare_scriptable__ = prepare_fpn - - # Annotate some attributes to be constants for the purpose of scripting, - # even though they are not constants in eager mode. - from detectron2.modeling.roi_heads import StandardROIHeads - - if hasattr(StandardROIHeads, "__annotations__"): - # copy first to avoid editing annotations of base class - StandardROIHeads.__annotations__ = deepcopy(StandardROIHeads.__annotations__) - StandardROIHeads.__annotations__["mask_on"] = torch.jit.Final[bool] - StandardROIHeads.__annotations__["keypoint_on"] = torch.jit.Final[bool] - - -# These patches are not supposed to have side-effects. -patch_nonscriptable_classes() - - -@contextmanager -def freeze_training_mode(model): - """ - A context manager that annotates the "training" attribute of every submodule - to constant, so that the training codepath in these modules can be - meta-compiled away. Upon exiting, the annotations are reverted. - """ - classes = {type(x) for x in model.modules()} - # __constants__ is the old way to annotate constants and not compatible - # with __annotations__ . - classes = {x for x in classes if not hasattr(x, "__constants__")} - for cls in classes: - cls.__annotations__["training"] = torch.jit.Final[bool] - yield - for cls in classes: - cls.__annotations__["training"] = bool diff --git a/detectron2/detectron2/layers/__init__.py b/detectron2/detectron2/layers/__init__.py deleted file mode 100644 index 761a3d1c7afa049e9779ee9fc4d299e9aae38cad..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm, CycleBatchNormList -from .deform_conv import DeformConv, ModulatedDeformConv -from .mask_ops import paste_masks_in_image -from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated -from .roi_align import ROIAlign, roi_align -from .roi_align_rotated import ROIAlignRotated, roi_align_rotated -from .shape_spec import ShapeSpec -from .wrappers import ( - BatchNorm2d, - Conv2d, - ConvTranspose2d, - cat, - interpolate, - Linear, - nonzero_tuple, - cross_entropy, - empty_input_loss_func_wrapper, - shapes_to_tensor, - move_device_like, -) -from .blocks import CNNBlockBase, DepthwiseSeparableConv2d -from .aspp import ASPP -from .losses import ciou_loss, diou_loss - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/detectron2/detectron2/layers/aspp.py b/detectron2/detectron2/layers/aspp.py deleted file mode 100644 index 14861aa9ede4fea6a69a49f189bcab997b558148..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/aspp.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -from copy import deepcopy -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from .batch_norm import get_norm -from .blocks import DepthwiseSeparableConv2d -from .wrappers import Conv2d - - -class ASPP(nn.Module): - """ - Atrous Spatial Pyramid Pooling (ASPP). - """ - - def __init__( - self, - in_channels, - out_channels, - dilations, - *, - norm, - activation, - pool_kernel_size=None, - dropout: float = 0.0, - use_depthwise_separable_conv=False, - ): - """ - Args: - in_channels (int): number of input channels for ASPP. - out_channels (int): number of output channels. - dilations (list): a list of 3 dilations in ASPP. - norm (str or callable): normalization for all conv layers. - See :func:`layers.get_norm` for supported format. norm is - applied to all conv layers except the conv following - global average pooling. - activation (callable): activation function. - pool_kernel_size (tuple, list): the average pooling size (kh, kw) - for image pooling layer in ASPP. If set to None, it always - performs global average pooling. If not None, it must be - divisible by the shape of inputs in forward(). It is recommended - to use a fixed input feature size in training, and set this - option to match this size, so that it performs global average - pooling in training, and the size of the pooling window stays - consistent in inference. - dropout (float): apply dropout on the output of ASPP. It is used in - the official DeepLab implementation with a rate of 0.1: - https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/model.py#L532 # noqa - use_depthwise_separable_conv (bool): use DepthwiseSeparableConv2d - for 3x3 convs in ASPP, proposed in :paper:`DeepLabV3+`. - """ - super(ASPP, self).__init__() - assert len(dilations) == 3, "ASPP expects 3 dilations, got {}".format(len(dilations)) - self.pool_kernel_size = pool_kernel_size - self.dropout = dropout - use_bias = norm == "" - self.convs = nn.ModuleList() - # conv 1x1 - self.convs.append( - Conv2d( - in_channels, - out_channels, - kernel_size=1, - bias=use_bias, - norm=get_norm(norm, out_channels), - activation=deepcopy(activation), - ) - ) - weight_init.c2_xavier_fill(self.convs[-1]) - # atrous convs - for dilation in dilations: - if use_depthwise_separable_conv: - self.convs.append( - DepthwiseSeparableConv2d( - in_channels, - out_channels, - kernel_size=3, - padding=dilation, - dilation=dilation, - norm1=norm, - activation1=deepcopy(activation), - norm2=norm, - activation2=deepcopy(activation), - ) - ) - else: - self.convs.append( - Conv2d( - in_channels, - out_channels, - kernel_size=3, - padding=dilation, - dilation=dilation, - bias=use_bias, - norm=get_norm(norm, out_channels), - activation=deepcopy(activation), - ) - ) - weight_init.c2_xavier_fill(self.convs[-1]) - # image pooling - # We do not add BatchNorm because the spatial resolution is 1x1, - # the original TF implementation has BatchNorm. - if pool_kernel_size is None: - image_pooling = nn.Sequential( - nn.AdaptiveAvgPool2d(1), - Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)), - ) - else: - image_pooling = nn.Sequential( - nn.AvgPool2d(kernel_size=pool_kernel_size, stride=1), - Conv2d(in_channels, out_channels, 1, bias=True, activation=deepcopy(activation)), - ) - weight_init.c2_xavier_fill(image_pooling[1]) - self.convs.append(image_pooling) - - self.project = Conv2d( - 5 * out_channels, - out_channels, - kernel_size=1, - bias=use_bias, - norm=get_norm(norm, out_channels), - activation=deepcopy(activation), - ) - weight_init.c2_xavier_fill(self.project) - - def forward(self, x): - size = x.shape[-2:] - if self.pool_kernel_size is not None: - if size[0] % self.pool_kernel_size[0] or size[1] % self.pool_kernel_size[1]: - raise ValueError( - "`pool_kernel_size` must be divisible by the shape of inputs. " - "Input size: {} `pool_kernel_size`: {}".format(size, self.pool_kernel_size) - ) - res = [] - for conv in self.convs: - res.append(conv(x)) - res[-1] = F.interpolate(res[-1], size=size, mode="bilinear", align_corners=False) - res = torch.cat(res, dim=1) - res = self.project(res) - res = F.dropout(res, self.dropout, training=self.training) if self.dropout > 0 else res - return res diff --git a/detectron2/detectron2/layers/batch_norm.py b/detectron2/detectron2/layers/batch_norm.py deleted file mode 100644 index d304061ecf36dc1ebacccf19a154b8ba2fe8e785..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/batch_norm.py +++ /dev/null @@ -1,353 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import torch -import torch.distributed as dist -from fvcore.nn.distributed import differentiable_all_reduce -from torch import nn -from torch.nn import functional as F - -from detectron2.utils import comm, env - -from .wrappers import BatchNorm2d - - -class FrozenBatchNorm2d(nn.Module): - """ - BatchNorm2d where the batch statistics and the affine parameters are fixed. - - It contains non-trainable buffers called - "weight" and "bias", "running_mean", "running_var", - initialized to perform identity transformation. - - The pre-trained backbone models from Caffe2 only contain "weight" and "bias", - which are computed from the original four parameters of BN. - The affine transform `x * weight + bias` will perform the equivalent - computation of `(x - running_mean) / sqrt(running_var) * weight + bias`. - When loading a backbone model from Caffe2, "running_mean" and "running_var" - will be left unchanged as identity transformation. - - Other pre-trained backbone models may contain all 4 parameters. - - The forward is implemented by `F.batch_norm(..., training=False)`. - """ - - _version = 3 - - def __init__(self, num_features, eps=1e-5): - super().__init__() - self.num_features = num_features - self.eps = eps - self.register_buffer("weight", torch.ones(num_features)) - self.register_buffer("bias", torch.zeros(num_features)) - self.register_buffer("running_mean", torch.zeros(num_features)) - self.register_buffer("running_var", torch.ones(num_features) - eps) - self.register_buffer("num_batches_tracked", None) - - def forward(self, x): - if x.requires_grad: - # When gradients are needed, F.batch_norm will use extra memory - # because its backward op computes gradients for weight/bias as well. - scale = self.weight * (self.running_var + self.eps).rsqrt() - bias = self.bias - self.running_mean * scale - scale = scale.reshape(1, -1, 1, 1) - bias = bias.reshape(1, -1, 1, 1) - out_dtype = x.dtype # may be half - return x * scale.to(out_dtype) + bias.to(out_dtype) - else: - # When gradients are not needed, F.batch_norm is a single fused op - # and provide more optimization opportunities. - return F.batch_norm( - x, - self.running_mean, - self.running_var, - self.weight, - self.bias, - training=False, - eps=self.eps, - ) - - def _load_from_state_dict( - self, - state_dict, - prefix, - local_metadata, - strict, - missing_keys, - unexpected_keys, - error_msgs, - ): - version = local_metadata.get("version", None) - - if version is None or version < 2: - # No running_mean/var in early versions - # This will silent the warnings - if prefix + "running_mean" not in state_dict: - state_dict[prefix + "running_mean"] = torch.zeros_like(self.running_mean) - if prefix + "running_var" not in state_dict: - state_dict[prefix + "running_var"] = torch.ones_like(self.running_var) - - super()._load_from_state_dict( - state_dict, - prefix, - local_metadata, - strict, - missing_keys, - unexpected_keys, - error_msgs, - ) - - def __repr__(self): - return "FrozenBatchNorm2d(num_features={}, eps={})".format(self.num_features, self.eps) - - @classmethod - def convert_frozen_batchnorm(cls, module): - """ - Convert all BatchNorm/SyncBatchNorm in module into FrozenBatchNorm. - - Args: - module (torch.nn.Module): - - Returns: - If module is BatchNorm/SyncBatchNorm, returns a new module. - Otherwise, in-place convert module and return it. - - Similar to convert_sync_batchnorm in - https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py - """ - bn_module = nn.modules.batchnorm - bn_module = (bn_module.BatchNorm2d, bn_module.SyncBatchNorm) - res = module - if isinstance(module, bn_module): - res = cls(module.num_features) - if module.affine: - res.weight.data = module.weight.data.clone().detach() - res.bias.data = module.bias.data.clone().detach() - res.running_mean.data = module.running_mean.data - res.running_var.data = module.running_var.data - res.eps = module.eps - res.num_batches_tracked = module.num_batches_tracked - else: - for name, child in module.named_children(): - new_child = cls.convert_frozen_batchnorm(child) - if new_child is not child: - res.add_module(name, new_child) - return res - - @classmethod - def convert_frozenbatchnorm2d_to_batchnorm2d(cls, module: nn.Module) -> nn.Module: - """ - Convert all FrozenBatchNorm2d to BatchNorm2d - - Args: - module (torch.nn.Module): - - Returns: - If module is FrozenBatchNorm2d, returns a new module. - Otherwise, in-place convert module and return it. - - This is needed for quantization: - https://fb.workplace.com/groups/1043663463248667/permalink/1296330057982005/ - """ - - res = module - if isinstance(module, FrozenBatchNorm2d): - res = torch.nn.BatchNorm2d(module.num_features, module.eps) - - res.weight.data = module.weight.data.clone().detach() - res.bias.data = module.bias.data.clone().detach() - res.running_mean.data = module.running_mean.data.clone().detach() - res.running_var.data = module.running_var.data.clone().detach() - res.eps = module.eps - res.num_batches_tracked = module.num_batches_tracked - else: - for name, child in module.named_children(): - new_child = cls.convert_frozenbatchnorm2d_to_batchnorm2d(child) - if new_child is not child: - res.add_module(name, new_child) - return res - - -def get_norm(norm, out_channels): - """ - Args: - norm (str or callable): either one of BN, SyncBN, FrozenBN, GN; - or a callable that takes a channel number and returns - the normalization layer as a nn.Module. - - Returns: - nn.Module or None: the normalization layer - """ - if norm is None: - return None - if isinstance(norm, str): - if len(norm) == 0: - return None - norm = { - "BN": BatchNorm2d, - # Fixed in https://github.com/pytorch/pytorch/pull/36382 - "SyncBN": NaiveSyncBatchNorm if env.TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm, - "FrozenBN": FrozenBatchNorm2d, - "GN": lambda channels: nn.GroupNorm(32, channels), - # for debugging: - "nnSyncBN": nn.SyncBatchNorm, - "naiveSyncBN": NaiveSyncBatchNorm, - # expose stats_mode N as an option to caller, required for zero-len inputs - "naiveSyncBN_N": lambda channels: NaiveSyncBatchNorm(channels, stats_mode="N"), - "LN": lambda channels: LayerNorm(channels), - }[norm] - return norm(out_channels) - - -class NaiveSyncBatchNorm(BatchNorm2d): - """ - In PyTorch<=1.5, ``nn.SyncBatchNorm`` has incorrect gradient - when the batch size on each worker is different. - (e.g., when scale augmentation is used, or when it is applied to mask head). - - This is a slower but correct alternative to `nn.SyncBatchNorm`. - - Note: - There isn't a single definition of Sync BatchNorm. - - When ``stats_mode==""``, this module computes overall statistics by using - statistics of each worker with equal weight. The result is true statistics - of all samples (as if they are all on one worker) only when all workers - have the same (N, H, W). This mode does not support inputs with zero batch size. - - When ``stats_mode=="N"``, this module computes overall statistics by weighting - the statistics of each worker by their ``N``. The result is true statistics - of all samples (as if they are all on one worker) only when all workers - have the same (H, W). It is slower than ``stats_mode==""``. - - Even though the result of this module may not be the true statistics of all samples, - it may still be reasonable because it might be preferrable to assign equal weights - to all workers, regardless of their (H, W) dimension, instead of putting larger weight - on larger images. From preliminary experiments, little difference is found between such - a simplified implementation and an accurate computation of overall mean & variance. - """ - - def __init__(self, *args, stats_mode="", **kwargs): - super().__init__(*args, **kwargs) - assert stats_mode in ["", "N"] - self._stats_mode = stats_mode - - def forward(self, input): - if comm.get_world_size() == 1 or not self.training: - return super().forward(input) - - B, C = input.shape[0], input.shape[1] - - half_input = input.dtype == torch.float16 - if half_input: - # fp16 does not have good enough numerics for the reduction here - input = input.float() - mean = torch.mean(input, dim=[0, 2, 3]) - meansqr = torch.mean(input * input, dim=[0, 2, 3]) - - if self._stats_mode == "": - assert B > 0, 'SyncBatchNorm(stats_mode="") does not support zero batch size.' - vec = torch.cat([mean, meansqr], dim=0) - vec = differentiable_all_reduce(vec) * (1.0 / dist.get_world_size()) - mean, meansqr = torch.split(vec, C) - momentum = self.momentum - else: - if B == 0: - vec = torch.zeros([2 * C + 1], device=mean.device, dtype=mean.dtype) - vec = vec + input.sum() # make sure there is gradient w.r.t input - else: - vec = torch.cat( - [ - mean, - meansqr, - torch.ones([1], device=mean.device, dtype=mean.dtype), - ], - dim=0, - ) - vec = differentiable_all_reduce(vec * B) - - total_batch = vec[-1].detach() - momentum = total_batch.clamp(max=1) * self.momentum # no update if total_batch is 0 - mean, meansqr, _ = torch.split(vec / total_batch.clamp(min=1), C) # avoid div-by-zero - - var = meansqr - mean * mean - invstd = torch.rsqrt(var + self.eps) - scale = self.weight * invstd - bias = self.bias - mean * scale - scale = scale.reshape(1, -1, 1, 1) - bias = bias.reshape(1, -1, 1, 1) - - self.running_mean += momentum * (mean.detach() - self.running_mean) - self.running_var += momentum * (var.detach() - self.running_var) - ret = input * scale + bias - if half_input: - ret = ret.half() - return ret - - -class CycleBatchNormList(nn.ModuleList): - """ - Implement domain-specific BatchNorm by cycling. - - When a BatchNorm layer is used for multiple input domains or input - features, it might need to maintain a separate test-time statistics - for each domain. See Sec 5.2 in :paper:`rethinking-batchnorm`. - - This module implements it by using N separate BN layers - and it cycles through them every time a forward() is called. - - NOTE: The caller of this module MUST guarantee to always call - this module by multiple of N times. Otherwise its test-time statistics - will be incorrect. - """ - - def __init__(self, length: int, bn_class=nn.BatchNorm2d, **kwargs): - """ - Args: - length: number of BatchNorm layers to cycle. - bn_class: the BatchNorm class to use - kwargs: arguments of the BatchNorm class, such as num_features. - """ - self._affine = kwargs.pop("affine", True) - super().__init__([bn_class(**kwargs, affine=False) for k in range(length)]) - if self._affine: - # shared affine, domain-specific BN - channels = self[0].num_features - self.weight = nn.Parameter(torch.ones(channels)) - self.bias = nn.Parameter(torch.zeros(channels)) - self._pos = 0 - - def forward(self, x): - ret = self[self._pos](x) - self._pos = (self._pos + 1) % len(self) - - if self._affine: - w = self.weight.reshape(1, -1, 1, 1) - b = self.bias.reshape(1, -1, 1, 1) - return ret * w + b - else: - return ret - - def extra_repr(self): - return f"affine={self._affine}" - - -class LayerNorm(nn.Module): - """ - A LayerNorm variant, popularized by Transformers, that performs point-wise mean and - variance normalization over the channel dimension for inputs that have shape - (batch_size, channels, height, width). - https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa B950 - """ - - def __init__(self, normalized_shape, eps=1e-6): - super().__init__() - self.weight = nn.Parameter(torch.ones(normalized_shape)) - self.bias = nn.Parameter(torch.zeros(normalized_shape)) - self.eps = eps - self.normalized_shape = (normalized_shape,) - - def forward(self, x): - u = x.mean(1, keepdim=True) - s = (x - u).pow(2).mean(1, keepdim=True) - x = (x - u) / torch.sqrt(s + self.eps) - x = self.weight[:, None, None] * x + self.bias[:, None, None] - return x diff --git a/detectron2/detectron2/layers/blocks.py b/detectron2/detectron2/layers/blocks.py deleted file mode 100644 index 1995a4bf7339e8deb7eaaffda4f819dda55e7ac7..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/blocks.py +++ /dev/null @@ -1,111 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import fvcore.nn.weight_init as weight_init -from torch import nn - -from .batch_norm import FrozenBatchNorm2d, get_norm -from .wrappers import Conv2d - - -""" -CNN building blocks. -""" - - -class CNNBlockBase(nn.Module): - """ - A CNN block is assumed to have input channels, output channels and a stride. - The input and output of `forward()` method must be NCHW tensors. - The method can perform arbitrary computation but must match the given - channels and stride specification. - - Attribute: - in_channels (int): - out_channels (int): - stride (int): - """ - - def __init__(self, in_channels, out_channels, stride): - """ - The `__init__` method of any subclass should also contain these arguments. - - Args: - in_channels (int): - out_channels (int): - stride (int): - """ - super().__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.stride = stride - - def freeze(self): - """ - Make this block not trainable. - This method sets all parameters to `requires_grad=False`, - and convert all BatchNorm layers to FrozenBatchNorm - - Returns: - the block itself - """ - for p in self.parameters(): - p.requires_grad = False - FrozenBatchNorm2d.convert_frozen_batchnorm(self) - return self - - -class DepthwiseSeparableConv2d(nn.Module): - """ - A kxk depthwise convolution + a 1x1 convolution. - - In :paper:`xception`, norm & activation are applied on the second conv. - :paper:`mobilenet` uses norm & activation on both convs. - """ - - def __init__( - self, - in_channels, - out_channels, - kernel_size=3, - padding=1, - dilation=1, - *, - norm1=None, - activation1=None, - norm2=None, - activation2=None, - ): - """ - Args: - norm1, norm2 (str or callable): normalization for the two conv layers. - activation1, activation2 (callable(Tensor) -> Tensor): activation - function for the two conv layers. - """ - super().__init__() - self.depthwise = Conv2d( - in_channels, - in_channels, - kernel_size=kernel_size, - padding=padding, - dilation=dilation, - groups=in_channels, - bias=not norm1, - norm=get_norm(norm1, in_channels), - activation=activation1, - ) - self.pointwise = Conv2d( - in_channels, - out_channels, - kernel_size=1, - bias=not norm2, - norm=get_norm(norm2, out_channels), - activation=activation2, - ) - - # default initialization - weight_init.c2_msra_fill(self.depthwise) - weight_init.c2_msra_fill(self.pointwise) - - def forward(self, x): - return self.pointwise(self.depthwise(x)) diff --git a/detectron2/detectron2/layers/csrc/README.md b/detectron2/detectron2/layers/csrc/README.md deleted file mode 100644 index 778ed3da0bae89820831bcd8a72ff7b9cad8d4dd..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/README.md +++ /dev/null @@ -1,7 +0,0 @@ - - -To add a new Op: - -1. Create a new directory -2. Implement new ops there -3. Delcare its Python interface in `vision.cpp`. diff --git a/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h b/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h deleted file mode 100644 index 03f4211003f42f601f0cfcf4a690f5da4a0a1f67..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#pragma once -#include - -namespace detectron2 { - -at::Tensor ROIAlignRotated_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor ROIAlignRotated_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio); - -#if defined(WITH_CUDA) || defined(WITH_HIP) -at::Tensor ROIAlignRotated_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor ROIAlignRotated_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio); -#endif - -// Interface for Python -inline at::Tensor ROIAlignRotated_forward( - const at::Tensor& input, - const at::Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width, - const int64_t sampling_ratio) { - if (input.is_cuda()) { -#if defined(WITH_CUDA) || defined(WITH_HIP) - return ROIAlignRotated_forward_cuda( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio); -#else - AT_ERROR("Detectron2 is not compiled with GPU support!"); -#endif - } - return ROIAlignRotated_forward_cpu( - input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); -} - -inline at::Tensor ROIAlignRotated_backward( - const at::Tensor& grad, - const at::Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width, - const int64_t batch_size, - const int64_t channels, - const int64_t height, - const int64_t width, - const int64_t sampling_ratio) { - if (grad.is_cuda()) { -#if defined(WITH_CUDA) || defined(WITH_HIP) - return ROIAlignRotated_backward_cuda( - grad, - rois, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width, - sampling_ratio); -#else - AT_ERROR("Detectron2 is not compiled with GPU support!"); -#endif - } - return ROIAlignRotated_backward_cpu( - grad, - rois, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width, - sampling_ratio); -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp b/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp deleted file mode 100644 index 2a3d3056cc71a4acaafb570739a9dd247a7eb1ed..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp +++ /dev/null @@ -1,522 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#include -#include "ROIAlignRotated.h" - -// Note: this implementation originates from the Caffe2 ROIAlignRotated Op -// and PyTorch ROIAlign (non-rotated) Op implementations. -// The key difference between this implementation and those ones is -// we don't do "legacy offset" in this version, as there aren't many previous -// works, if any, using the "legacy" ROIAlignRotated Op. -// This would make the interface a bit cleaner. - -namespace detectron2 { - -namespace { -template -struct PreCalc { - int pos1; - int pos2; - int pos3; - int pos4; - T w1; - T w2; - T w3; - T w4; -}; - -template -void pre_calc_for_bilinear_interpolate( - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int iy_upper, - const int ix_upper, - T roi_start_h, - T roi_start_w, - T bin_size_h, - T bin_size_w, - int roi_bin_grid_h, - int roi_bin_grid_w, - T roi_center_h, - T roi_center_w, - T cos_theta, - T sin_theta, - std::vector>& pre_calc) { - int pre_calc_index = 0; - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - for (int iy = 0; iy < iy_upper; iy++) { - const T yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < ix_upper; ix++) { - const T xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - // Rotate by theta around the center and translate - // In image space, (y, x) is the order for Right Handed System, - // and this is essentially multiplying the point by a rotation matrix - // to rotate it counterclockwise through angle theta. - T y = yy * cos_theta - xx * sin_theta + roi_center_h; - T x = yy * sin_theta + xx * cos_theta + roi_center_w; - // deal with: inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - PreCalc pc; - pc.pos1 = 0; - pc.pos2 = 0; - pc.pos3 = 0; - pc.pos4 = 0; - pc.w1 = 0; - pc.w2 = 0; - pc.w3 = 0; - pc.w4 = 0; - pre_calc[pre_calc_index] = pc; - pre_calc_index += 1; - continue; - } - - if (y < 0) { - y = 0; - } - if (x < 0) { - x = 0; - } - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - // save weights and indices - PreCalc pc; - pc.pos1 = y_low * width + x_low; - pc.pos2 = y_low * width + x_high; - pc.pos3 = y_high * width + x_low; - pc.pos4 = y_high * width + x_high; - pc.w1 = w1; - pc.w2 = w2; - pc.w3 = w3; - pc.w4 = w4; - pre_calc[pre_calc_index] = pc; - - pre_calc_index += 1; - } - } - } - } -} - -template -void bilinear_interpolate_gradient( - const int height, - const int width, - T y, - T x, - T& w1, - T& w2, - T& w3, - T& w4, - int& x_low, - int& x_high, - int& y_low, - int& y_high) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y < 0) { - y = 0; - } - - if (x < 0) { - x = 0; - } - - y_low = (int)y; - x_low = (int)x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - - // reference in forward - // T v1 = input[y_low * width + x_low]; - // T v2 = input[y_low * width + x_high]; - // T v3 = input[y_high * width + x_low]; - // T v4 = input[y_high * width + x_high]; - // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} - -template -inline void add(T* address, const T& val) { - *address += val; -} - -} // namespace - -template -void ROIAlignRotatedForward( - const int nthreads, - const T* input, - const T& spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const T* rois, - T* output) { - int n_rois = nthreads / channels / pooled_width / pooled_height; - // (n, c, ph, pw) is an element in the pooled output - // can be parallelized using omp - // #pragma omp parallel for num_threads(32) - for (int n = 0; n < n_rois; n++) { - int index_n = n * channels * pooled_width * pooled_height; - - const T* current_roi = rois + n * 6; - int roi_batch_ind = current_roi[0]; - - // Do not use rounding; this implementation detail is critical - // ROIAlignRotated supports align == true, i.e., continuous coordinate - // by default, thus the 0.5 offset - T offset = (T)0.5; - T roi_center_w = current_roi[1] * spatial_scale - offset; - T roi_center_h = current_roi[2] * spatial_scale - offset; - T roi_width = current_roi[3] * spatial_scale; - T roi_height = current_roi[4] * spatial_scale; - T theta = current_roi[5] * M_PI / 180.0; - T cos_theta = cos(theta); - T sin_theta = sin(theta); - - AT_ASSERTM( - roi_width >= 0 && roi_height >= 0, - "ROIs in ROIAlignRotated do not have non-negative size!"); - - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 - - // we want to precalculate indices and weights shared by all channels, - // this is the key point of optimization - std::vector> pre_calc( - roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); - - // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). - // Appropriate translation needs to be applied after. - T roi_start_h = -roi_height / 2.0; - T roi_start_w = -roi_width / 2.0; - - pre_calc_for_bilinear_interpolate( - height, - width, - pooled_height, - pooled_width, - roi_bin_grid_h, - roi_bin_grid_w, - roi_start_h, - roi_start_w, - bin_size_h, - bin_size_w, - roi_bin_grid_h, - roi_bin_grid_w, - roi_center_h, - roi_center_w, - cos_theta, - sin_theta, - pre_calc); - - for (int c = 0; c < channels; c++) { - int index_n_c = index_n + c * pooled_width * pooled_height; - const T* offset_input = - input + (roi_batch_ind * channels + c) * height * width; - int pre_calc_index = 0; - - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - int index = index_n_c + ph * pooled_width + pw; - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy++) { - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - PreCalc pc = pre_calc[pre_calc_index]; - output_val += pc.w1 * offset_input[pc.pos1] + - pc.w2 * offset_input[pc.pos2] + - pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4]; - - pre_calc_index += 1; - } - } - output_val /= count; - - output[index] = output_val; - } // for pw - } // for ph - } // for c - } // for n -} - -template -void ROIAlignRotatedBackward( - const int nthreads, - // may not be contiguous. should index using n_stride, etc - const T* grad_output, - const T& spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - T* grad_input, - const T* rois, - const int n_stride, - const int c_stride, - const int h_stride, - const int w_stride) { - for (int index = 0; index < nthreads; index++) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* current_roi = rois + n * 6; - int roi_batch_ind = current_roi[0]; - - // Do not use rounding; this implementation detail is critical - // ROIAlignRotated supports align == true, i.e., continuous coordinate - // by default, thus the 0.5 offset - T offset = (T)0.5; - T roi_center_w = current_roi[1] * spatial_scale - offset; - T roi_center_h = current_roi[2] * spatial_scale - offset; - T roi_width = current_roi[3] * spatial_scale; - T roi_height = current_roi[4] * spatial_scale; - T theta = current_roi[5] * M_PI / 180.0; - T cos_theta = cos(theta); - T sin_theta = sin(theta); - - AT_ASSERTM( - roi_width >= 0 && roi_height >= 0, - "ROIs in ROIAlignRotated do not have non-negative size!"); - - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - T* offset_grad_input = - grad_input + ((roi_batch_ind * channels + c) * height * width); - - int output_offset = n * n_stride + c * c_stride; - const T* offset_grad_output = grad_output + output_offset; - const T grad_output_this_bin = - offset_grad_output[ph * h_stride + pw * w_stride]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). - // Appropriate translation needs to be applied after. - T roi_start_h = -roi_height / 2.0; - T roi_start_w = -roi_width / 2.0; - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy++) { - const T yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - // Rotate by theta around the center and translate - T y = yy * cos_theta - xx * sin_theta + roi_center_h; - T x = yy * sin_theta + xx * cos_theta + roi_center_w; - - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient( - height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); - - T g1 = grad_output_this_bin * w1 / count; - T g2 = grad_output_this_bin * w2 / count; - T g3 = grad_output_this_bin * w3 / count; - T g4 = grad_output_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - // atomic add is not needed for now since it is single threaded - add(offset_grad_input + y_low * width + x_low, static_cast(g1)); - add(offset_grad_input + y_low * width + x_high, static_cast(g2)); - add(offset_grad_input + y_high * width + x_low, static_cast(g3)); - add(offset_grad_input + y_high * width + x_high, static_cast(g4)); - } // if - } // ix - } // iy - } // for -} // ROIAlignRotatedBackward - -at::Tensor ROIAlignRotated_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio) { - AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor"); - AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); - - at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlign_forward_cpu"; - at::checkAllSameType(c, {input_t, rois_t}); - - auto num_rois = rois.size(0); - auto channels = input.size(1); - auto height = input.size(2); - auto width = input.size(3); - - at::Tensor output = at::zeros( - {num_rois, channels, pooled_height, pooled_width}, input.options()); - - auto output_size = num_rois * pooled_height * pooled_width * channels; - - if (output.numel() == 0) { - return output; - } - - auto input_ = input.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ROIAlignRotated_forward", [&] { - ROIAlignRotatedForward( - output_size, - input_.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - rois_.data_ptr(), - output.data_ptr()); - }); - return output; -} - -at::Tensor ROIAlignRotated_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio) { - AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor"); - AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); - - at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlignRotated_backward_cpu"; - at::checkAllSameType(c, {grad_t, rois_t}); - - at::Tensor grad_input = - at::zeros({batch_size, channels, height, width}, grad.options()); - - // handle possibly empty gradients - if (grad.numel() == 0) { - return grad_input; - } - - // get stride values to ensure indexing into gradients is correct. - int n_stride = grad.stride(0); - int c_stride = grad.stride(1); - int h_stride = grad.stride(2); - int w_stride = grad.stride(3); - - auto rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ROIAlignRotated_forward", [&] { - ROIAlignRotatedBackward( - grad.numel(), - grad.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - grad_input.data_ptr(), - rois_.data_ptr(), - n_stride, - c_stride, - h_stride, - w_stride); - }); - return grad_input; -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu b/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu deleted file mode 100644 index fca186519143b168a912c880a4cf495a0a5a9322..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu +++ /dev/null @@ -1,443 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#include -#include -#include -#include - -// TODO make it in a common file -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -// Note: this implementation originates from the Caffe2 ROIAlignRotated Op -// and PyTorch ROIAlign (non-rotated) Op implementations. -// The key difference between this implementation and those ones is -// we don't do "legacy offset" in this version, as there aren't many previous -// works, if any, using the "legacy" ROIAlignRotated Op. -// This would make the interface a bit cleaner. - -namespace detectron2 { - -namespace { - -template -__device__ T bilinear_interpolate( - const T* input, - const int height, - const int width, - T y, - T x) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - return 0; - } - - if (y < 0) { - y = 0; - } - - if (x < 0) { - x = 0; - } - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - // do bilinear interpolation - T v1 = input[y_low * width + x_low]; - T v2 = input[y_low * width + x_high]; - T v3 = input[y_high * width + x_low]; - T v4 = input[y_high * width + x_high]; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - return val; -} - -template -__device__ void bilinear_interpolate_gradient( - const int height, - const int width, - T y, - T x, - T& w1, - T& w2, - T& w3, - T& w4, - int& x_low, - int& x_high, - int& y_low, - int& y_high) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y < 0) { - y = 0; - } - - if (x < 0) { - x = 0; - } - - y_low = (int)y; - x_low = (int)x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - - // reference in forward - // T v1 = input[y_low * width + x_low]; - // T v2 = input[y_low * width + x_high]; - // T v3 = input[y_high * width + x_low]; - // T v4 = input[y_high * width + x_high]; - // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} - -} // namespace - -template -__global__ void RoIAlignRotatedForward( - const int nthreads, - const T* input, - const T spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const T* rois, - T* top_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* current_roi = rois + n * 6; - int roi_batch_ind = current_roi[0]; - - // Do not use rounding; this implementation detail is critical - // ROIAlignRotated supports align == true, i.e., continuous coordinate - // by default, thus the 0.5 offset - T offset = (T)0.5; - T roi_center_w = current_roi[1] * spatial_scale - offset; - T roi_center_h = current_roi[2] * spatial_scale - offset; - T roi_width = current_roi[3] * spatial_scale; - T roi_height = current_roi[4] * spatial_scale; - T theta = current_roi[5] * M_PI / 180.0; - T cos_theta = cos(theta); - T sin_theta = sin(theta); - - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - const T* offset_input = - input + (roi_batch_ind * channels + c) * height * width; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). - // Appropriate translation needs to be applied after. - T roi_start_h = -roi_height / 2.0; - T roi_start_w = -roi_width / 2.0; - - // We do average (inte gral) pooling inside a bin - const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 - { - const T yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - // Rotate by theta around the center and translate - T y = yy * cos_theta - xx * sin_theta + roi_center_h; - T x = yy * sin_theta + xx * cos_theta + roi_center_w; - - T val = bilinear_interpolate(offset_input, height, width, y, x); - output_val += val; - } - } - output_val /= count; - - top_data[index] = output_val; - } -} - -template -__global__ void RoIAlignRotatedBackwardFeature( - const int nthreads, - const T* top_diff, - const int num_rois, - const T spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - T* bottom_diff, - const T* rois) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* current_roi = rois + n * 6; - int roi_batch_ind = current_roi[0]; - - // Do not use rounding; this implementation detail is critical - // ROIAlignRotated supports align == true, i.e., continuous coordinate - // by default, thus the 0.5 offset - T offset = (T)0.5; - T roi_center_w = current_roi[1] * spatial_scale - offset; - T roi_center_h = current_roi[2] * spatial_scale - offset; - T roi_width = current_roi[3] * spatial_scale; - T roi_height = current_roi[4] * spatial_scale; - T theta = current_roi[5] * M_PI / 180.0; - T cos_theta = cos(theta); - T sin_theta = sin(theta); - - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - T* offset_bottom_diff = - bottom_diff + (roi_batch_ind * channels + c) * height * width; - - int top_offset = (n * channels + c) * pooled_height * pooled_width; - const T* offset_top_diff = top_diff + top_offset; - const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). - // Appropriate translation needs to be applied after. - T roi_start_h = -roi_height / 2.0; - T roi_start_w = -roi_width / 2.0; - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 - { - const T yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - // Rotate by theta around the center and translate - T y = yy * cos_theta - xx * sin_theta + roi_center_h; - T x = yy * sin_theta + xx * cos_theta + roi_center_w; - - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient( - height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); - - T g1 = top_diff_this_bin * w1 / count; - T g2 = top_diff_this_bin * w2 / count; - T g3 = top_diff_this_bin * w3 / count; - T g4 = top_diff_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - atomicAdd( - offset_bottom_diff + y_low * width + x_low, static_cast(g1)); - atomicAdd( - offset_bottom_diff + y_low * width + x_high, static_cast(g2)); - atomicAdd( - offset_bottom_diff + y_high * width + x_low, static_cast(g3)); - atomicAdd( - offset_bottom_diff + y_high * width + x_high, static_cast(g4)); - } // if - } // ix - } // iy - } // CUDA_1D_KERNEL_LOOP -} // RoIAlignRotatedBackward - -at::Tensor ROIAlignRotated_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio) { - AT_ASSERTM(input.device().is_cuda(), "input must be a CUDA tensor"); - AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); - at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlignRotated_forward_cuda"; - at::checkAllSameGPU(c, {input_t, rois_t}); - at::checkAllSameType(c, {input_t, rois_t}); - at::cuda::CUDAGuard device_guard(input.device()); - - auto num_rois = rois.size(0); - auto channels = input.size(1); - auto height = input.size(2); - auto width = input.size(3); - - auto output = at::empty( - {num_rois, channels, pooled_height, pooled_width}, input.options()); - auto output_size = num_rois * pooled_height * pooled_width * channels; - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min( - at::cuda::ATenCeilDiv( - static_cast(output_size), static_cast(512)), - static_cast(4096))); - dim3 block(512); - - if (output.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return output; - } - - auto input_ = input.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES( - input.scalar_type(), "ROIAlignRotated_forward", [&] { - RoIAlignRotatedForward<<>>( - output_size, - input_.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - rois_.data_ptr(), - output.data_ptr()); - }); - cudaDeviceSynchronize(); - AT_CUDA_CHECK(cudaGetLastError()); - return output; -} - -// TODO remove the dependency on input and use instead its sizes -> save memory -at::Tensor ROIAlignRotated_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio) { - AT_ASSERTM(grad.device().is_cuda(), "grad must be a CUDA tensor"); - AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); - - at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "ROIAlign_backward_cuda"; - at::checkAllSameGPU(c, {grad_t, rois_t}); - at::checkAllSameType(c, {grad_t, rois_t}); - at::cuda::CUDAGuard device_guard(grad.device()); - - auto num_rois = rois.size(0); - auto grad_input = - at::zeros({batch_size, channels, height, width}, grad.options()); - - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min( - at::cuda::ATenCeilDiv( - static_cast(grad.numel()), static_cast(512)), - static_cast(4096))); - dim3 block(512); - - // handle possibly empty gradients - if (grad.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return grad_input; - } - - auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES( - grad.scalar_type(), "ROIAlignRotated_backward", [&] { - RoIAlignRotatedBackwardFeature<<>>( - grad.numel(), - grad_.data_ptr(), - num_rois, - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - grad_input.data_ptr(), - rois_.data_ptr()); - }); - AT_CUDA_CHECK(cudaGetLastError()); - return grad_input; -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h b/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h deleted file mode 100644 index 3bf383b8ed9b358b5313d433a9682c294dfb77e4..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#pragma once -#include - -namespace detectron2 { - -at::Tensor box_iou_rotated_cpu( - const at::Tensor& boxes1, - const at::Tensor& boxes2); - -#if defined(WITH_CUDA) || defined(WITH_HIP) -at::Tensor box_iou_rotated_cuda( - const at::Tensor& boxes1, - const at::Tensor& boxes2); -#endif - -// Interface for Python -// inline is needed to prevent multiple function definitions when this header is -// included by different cpps -inline at::Tensor box_iou_rotated( - const at::Tensor& boxes1, - const at::Tensor& boxes2) { - assert(boxes1.device().is_cuda() == boxes2.device().is_cuda()); - if (boxes1.device().is_cuda()) { -#if defined(WITH_CUDA) || defined(WITH_HIP) - return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous()); -#else - AT_ERROR("Detectron2 is not compiled with GPU support!"); -#endif - } - - return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous()); -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp b/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp deleted file mode 100644 index c843487b5fa4e8077dd27402ec99009266ddda8d..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#include "box_iou_rotated.h" -#include "box_iou_rotated_utils.h" - -namespace detectron2 { - -template -void box_iou_rotated_cpu_kernel( - const at::Tensor& boxes1, - const at::Tensor& boxes2, - at::Tensor& ious) { - auto num_boxes1 = boxes1.size(0); - auto num_boxes2 = boxes2.size(0); - - for (int i = 0; i < num_boxes1; i++) { - for (int j = 0; j < num_boxes2; j++) { - ious[i * num_boxes2 + j] = single_box_iou_rotated( - boxes1[i].data_ptr(), boxes2[j].data_ptr()); - } - } -} - -at::Tensor box_iou_rotated_cpu( - // input must be contiguous: - const at::Tensor& boxes1, - const at::Tensor& boxes2) { - auto num_boxes1 = boxes1.size(0); - auto num_boxes2 = boxes2.size(0); - at::Tensor ious = - at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); - - box_iou_rotated_cpu_kernel(boxes1, boxes2, ious); - - // reshape from 1d array to 2d array - auto shape = std::vector{num_boxes1, num_boxes2}; - return ious.reshape(shape); -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu b/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu deleted file mode 100644 index 952710e53041187907fbd113f8d0d0fa24134a86..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#include -#include -#include -#include -#include "box_iou_rotated_utils.h" - -namespace detectron2 { - -// 2D block with 32 * 16 = 512 threads per block -const int BLOCK_DIM_X = 32; -const int BLOCK_DIM_Y = 16; - -template -__global__ void box_iou_rotated_cuda_kernel( - const int n_boxes1, - const int n_boxes2, - const T* dev_boxes1, - const T* dev_boxes2, - T* dev_ious) { - const int row_start = blockIdx.x * blockDim.x; - const int col_start = blockIdx.y * blockDim.y; - - const int row_size = min(n_boxes1 - row_start, blockDim.x); - const int col_size = min(n_boxes2 - col_start, blockDim.y); - - __shared__ float block_boxes1[BLOCK_DIM_X * 5]; - __shared__ float block_boxes2[BLOCK_DIM_Y * 5]; - - // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y - if (threadIdx.x < row_size && threadIdx.y == 0) { - block_boxes1[threadIdx.x * 5 + 0] = - dev_boxes1[(row_start + threadIdx.x) * 5 + 0]; - block_boxes1[threadIdx.x * 5 + 1] = - dev_boxes1[(row_start + threadIdx.x) * 5 + 1]; - block_boxes1[threadIdx.x * 5 + 2] = - dev_boxes1[(row_start + threadIdx.x) * 5 + 2]; - block_boxes1[threadIdx.x * 5 + 3] = - dev_boxes1[(row_start + threadIdx.x) * 5 + 3]; - block_boxes1[threadIdx.x * 5 + 4] = - dev_boxes1[(row_start + threadIdx.x) * 5 + 4]; - } - - if (threadIdx.x < col_size && threadIdx.y == 0) { - block_boxes2[threadIdx.x * 5 + 0] = - dev_boxes2[(col_start + threadIdx.x) * 5 + 0]; - block_boxes2[threadIdx.x * 5 + 1] = - dev_boxes2[(col_start + threadIdx.x) * 5 + 1]; - block_boxes2[threadIdx.x * 5 + 2] = - dev_boxes2[(col_start + threadIdx.x) * 5 + 2]; - block_boxes2[threadIdx.x * 5 + 3] = - dev_boxes2[(col_start + threadIdx.x) * 5 + 3]; - block_boxes2[threadIdx.x * 5 + 4] = - dev_boxes2[(col_start + threadIdx.x) * 5 + 4]; - } - __syncthreads(); - - if (threadIdx.x < row_size && threadIdx.y < col_size) { - int offset = (row_start + threadIdx.x) * n_boxes2 + col_start + threadIdx.y; - dev_ious[offset] = single_box_iou_rotated( - block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5); - } -} - -at::Tensor box_iou_rotated_cuda( - // input must be contiguous - const at::Tensor& boxes1, - const at::Tensor& boxes2) { - using scalar_t = float; - AT_ASSERTM( - boxes1.scalar_type() == at::kFloat, "boxes1 must be a float tensor"); - AT_ASSERTM( - boxes2.scalar_type() == at::kFloat, "boxes2 must be a float tensor"); - AT_ASSERTM(boxes1.is_cuda(), "boxes1 must be a CUDA tensor"); - AT_ASSERTM(boxes2.is_cuda(), "boxes2 must be a CUDA tensor"); - at::cuda::CUDAGuard device_guard(boxes1.device()); - - auto num_boxes1 = boxes1.size(0); - auto num_boxes2 = boxes2.size(0); - - at::Tensor ious = - at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); - - bool transpose = false; - if (num_boxes1 > 0 && num_boxes2 > 0) { - scalar_t *data1 = boxes1.data_ptr(), - *data2 = boxes2.data_ptr(); - - if (num_boxes2 > 65535 * BLOCK_DIM_Y) { - AT_ASSERTM( - num_boxes1 <= 65535 * BLOCK_DIM_Y, - "Too many boxes for box_iou_rotated_cuda!"); - // x dim is allowed to be large, but y dim cannot, - // so we transpose the two to avoid "invalid configuration argument" - // error. We assume one of them is small. Otherwise the result is hard to - // fit in memory anyway. - std::swap(num_boxes1, num_boxes2); - std::swap(data1, data2); - transpose = true; - } - - const int blocks_x = - at::cuda::ATenCeilDiv(static_cast(num_boxes1), BLOCK_DIM_X); - const int blocks_y = - at::cuda::ATenCeilDiv(static_cast(num_boxes2), BLOCK_DIM_Y); - - dim3 blocks(blocks_x, blocks_y); - dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - box_iou_rotated_cuda_kernel<<>>( - num_boxes1, - num_boxes2, - data1, - data2, - (scalar_t*)ious.data_ptr()); - - AT_CUDA_CHECK(cudaGetLastError()); - } - - // reshape from 1d array to 2d array - auto shape = std::vector{num_boxes1, num_boxes2}; - if (transpose) { - return ious.view(shape).t(); - } else { - return ious.view(shape); - } -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h b/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h deleted file mode 100644 index bc6967a76884a40581a94554e91e6e72c6f8b527..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h +++ /dev/null @@ -1,391 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#pragma once - -#include -#include - -#if defined(__CUDACC__) || __HCC__ == 1 || __HIP__ == 1 -// Designates functions callable from the host (CPU) and the device (GPU) -#define HOST_DEVICE __host__ __device__ -#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__ -#else -#include -#define HOST_DEVICE -#define HOST_DEVICE_INLINE HOST_DEVICE inline -#endif - -namespace detectron2 { - -namespace { - -template -struct RotatedBox { - T x_ctr, y_ctr, w, h, a; -}; - -template -struct Point { - T x, y; - HOST_DEVICE_INLINE Point(const T& px = 0, const T& py = 0) : x(px), y(py) {} - HOST_DEVICE_INLINE Point operator+(const Point& p) const { - return Point(x + p.x, y + p.y); - } - HOST_DEVICE_INLINE Point& operator+=(const Point& p) { - x += p.x; - y += p.y; - return *this; - } - HOST_DEVICE_INLINE Point operator-(const Point& p) const { - return Point(x - p.x, y - p.y); - } - HOST_DEVICE_INLINE Point operator*(const T coeff) const { - return Point(x * coeff, y * coeff); - } -}; - -template -HOST_DEVICE_INLINE T dot_2d(const Point& A, const Point& B) { - return A.x * B.x + A.y * B.y; -} - -// R: result type. can be different from input type -template -HOST_DEVICE_INLINE R cross_2d(const Point& A, const Point& B) { - return static_cast(A.x) * static_cast(B.y) - - static_cast(B.x) * static_cast(A.y); -} - -template -HOST_DEVICE_INLINE void get_rotated_vertices( - const RotatedBox& box, - Point (&pts)[4]) { - // M_PI / 180. == 0.01745329251 - double theta = box.a * 0.01745329251; - T cosTheta2 = (T)cos(theta) * 0.5f; - T sinTheta2 = (T)sin(theta) * 0.5f; - - // y: top --> down; x: left --> right - pts[0].x = box.x_ctr + sinTheta2 * box.h + cosTheta2 * box.w; - pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w; - pts[1].x = box.x_ctr - sinTheta2 * box.h + cosTheta2 * box.w; - pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w; - pts[2].x = 2 * box.x_ctr - pts[0].x; - pts[2].y = 2 * box.y_ctr - pts[0].y; - pts[3].x = 2 * box.x_ctr - pts[1].x; - pts[3].y = 2 * box.y_ctr - pts[1].y; -} - -template -HOST_DEVICE_INLINE int get_intersection_points( - const Point (&pts1)[4], - const Point (&pts2)[4], - Point (&intersections)[24]) { - // Line vector - // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1] - Point vec1[4], vec2[4]; - for (int i = 0; i < 4; i++) { - vec1[i] = pts1[(i + 1) % 4] - pts1[i]; - vec2[i] = pts2[(i + 1) % 4] - pts2[i]; - } - - // When computing the intersection area, it doesn't hurt if we have - // more (duplicated/approximate) intersections/vertices than needed, - // while it can cause drastic difference if we miss an intersection/vertex. - // Therefore, we add an epsilon to relax the comparisons between - // the float point numbers that decide the intersection points. - double EPS = 1e-5; - - // Line test - test all line combos for intersection - int num = 0; // number of intersections - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - // Solve for 2x2 Ax=b - T det = cross_2d(vec2[j], vec1[i]); - - // This takes care of parallel lines - if (fabs(det) <= 1e-14) { - continue; - } - - auto vec12 = pts2[j] - pts1[i]; - - T t1 = cross_2d(vec2[j], vec12) / det; - T t2 = cross_2d(vec1[i], vec12) / det; - - if (t1 > -EPS && t1 < 1.0f + EPS && t2 > -EPS && t2 < 1.0f + EPS) { - intersections[num++] = pts1[i] + vec1[i] * t1; - } - } - } - - // Check for vertices of rect1 inside rect2 - { - const auto& AB = vec2[0]; - const auto& DA = vec2[3]; - auto ABdotAB = dot_2d(AB, AB); - auto ADdotAD = dot_2d(DA, DA); - for (int i = 0; i < 4; i++) { - // assume ABCD is the rectangle, and P is the point to be judged - // P is inside ABCD iff. P's projection on AB lies within AB - // and P's projection on AD lies within AD - - auto AP = pts1[i] - pts2[0]; - - auto APdotAB = dot_2d(AP, AB); - auto APdotAD = -dot_2d(AP, DA); - - if ((APdotAB > -EPS) && (APdotAD > -EPS) && (APdotAB < ABdotAB + EPS) && - (APdotAD < ADdotAD + EPS)) { - intersections[num++] = pts1[i]; - } - } - } - - // Reverse the check - check for vertices of rect2 inside rect1 - { - const auto& AB = vec1[0]; - const auto& DA = vec1[3]; - auto ABdotAB = dot_2d(AB, AB); - auto ADdotAD = dot_2d(DA, DA); - for (int i = 0; i < 4; i++) { - auto AP = pts2[i] - pts1[0]; - - auto APdotAB = dot_2d(AP, AB); - auto APdotAD = -dot_2d(AP, DA); - - if ((APdotAB > -EPS) && (APdotAD > -EPS) && (APdotAB < ABdotAB + EPS) && - (APdotAD < ADdotAD + EPS)) { - intersections[num++] = pts2[i]; - } - } - } - - return num; -} - -template -HOST_DEVICE_INLINE int convex_hull_graham( - const Point (&p)[24], - const int& num_in, - Point (&q)[24], - bool shift_to_zero = false) { - assert(num_in >= 2); - - // Step 1: - // Find point with minimum y - // if more than 1 points have the same minimum y, - // pick the one with the minimum x. - int t = 0; - for (int i = 1; i < num_in; i++) { - if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) { - t = i; - } - } - auto& start = p[t]; // starting point - - // Step 2: - // Subtract starting point from every points (for sorting in the next step) - for (int i = 0; i < num_in; i++) { - q[i] = p[i] - start; - } - - // Swap the starting point to position 0 - auto tmp = q[0]; - q[0] = q[t]; - q[t] = tmp; - - // Step 3: - // Sort point 1 ~ num_in according to their relative cross-product values - // (essentially sorting according to angles) - // If the angles are the same, sort according to their distance to origin - T dist[24]; -#if defined(__CUDACC__) || __HCC__ == 1 || __HIP__ == 1 - // compute distance to origin before sort, and sort them together with the - // points - for (int i = 0; i < num_in; i++) { - dist[i] = dot_2d(q[i], q[i]); - } - - // CUDA version - // In the future, we can potentially use thrust - // for sorting here to improve speed (though not guaranteed) - for (int i = 1; i < num_in - 1; i++) { - for (int j = i + 1; j < num_in; j++) { - T crossProduct = cross_2d(q[i], q[j]); - if ((crossProduct < -1e-6) || - (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) { - auto q_tmp = q[i]; - q[i] = q[j]; - q[j] = q_tmp; - auto dist_tmp = dist[i]; - dist[i] = dist[j]; - dist[j] = dist_tmp; - } - } - } -#else - // CPU version - // std::sort( - // q + 1, q + num_in, [](const Point& A, const Point& B) -> bool { - // T temp = cross_2d(A, B); - - // if (fabs(temp) < 1e-6) { - // return dot_2d(A, A) < dot_2d(B, B); - // } else { - // return temp > 0; - // } - // }); - for (int i = 0; i < num_in; i++) { - dist[i] = dot_2d(q[i], q[i]); - } - - for (int i = 1; i < num_in - 1; i++) { - for (int j = i + 1; j < num_in; j++) { - T crossProduct = cross_2d(q[i], q[j]); - if ((crossProduct < -1e-6) || - (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) { - auto q_tmp = q[i]; - q[i] = q[j]; - q[j] = q_tmp; - auto dist_tmp = dist[i]; - dist[i] = dist[j]; - dist[j] = dist_tmp; - } - } - } - - // compute distance to origin after sort, since the points are now different. - for (int i = 0; i < num_in; i++) { - dist[i] = dot_2d(q[i], q[i]); - } - -#endif - - // Step 4: - // Make sure there are at least 2 points (that don't overlap with each other) - // in the stack - int k; // index of the non-overlapped second point - for (k = 1; k < num_in; k++) { - if (dist[k] > 1e-8) { - break; - } - } - if (k == num_in) { - // We reach the end, which means the convex hull is just one point - q[0] = p[t]; - return 1; - } - q[1] = q[k]; - int m = 2; // 2 points in the stack - // Step 5: - // Finally we can start the scanning process. - // When a non-convex relationship between the 3 points is found - // (either concave shape or duplicated points), - // we pop the previous point from the stack - // until the 3-point relationship is convex again, or - // until the stack only contains two points - for (int i = k + 1; i < num_in; i++) { - while (m > 1) { - auto q1 = q[i] - q[m - 2], q2 = q[m - 1] - q[m - 2]; - // cross_2d() uses FMA and therefore computes round(round(q1.x*q2.y) - - // q2.x*q1.y) So it may not return 0 even when q1==q2. Therefore we - // compare round(q1.x*q2.y) and round(q2.x*q1.y) directly. (round means - // round to nearest floating point). - if (q1.x * q2.y >= q2.x * q1.y) - m--; - else - break; - } - // Using double also helps, but float can solve the issue for now. - // while (m > 1 && cross_2d(q[i] - q[m - 2], q[m - 1] - q[m - 2]) - // >= 0) { - // m--; - // } - q[m++] = q[i]; - } - - // Step 6 (Optional): - // In general sense we need the original coordinates, so we - // need to shift the points back (reverting Step 2) - // But if we're only interested in getting the area/perimeter of the shape - // We can simply return. - if (!shift_to_zero) { - for (int i = 0; i < m; i++) { - q[i] += start; - } - } - - return m; -} - -template -HOST_DEVICE_INLINE T polygon_area(const Point (&q)[24], const int& m) { - if (m <= 2) { - return 0; - } - - T area = 0; - for (int i = 1; i < m - 1; i++) { - area += fabs(cross_2d(q[i] - q[0], q[i + 1] - q[0])); - } - - return area / 2.0; -} - -template -HOST_DEVICE_INLINE T rotated_boxes_intersection( - const RotatedBox& box1, - const RotatedBox& box2) { - // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned - // from rotated_rect_intersection_pts - Point intersectPts[24], orderedPts[24]; - - Point pts1[4]; - Point pts2[4]; - get_rotated_vertices(box1, pts1); - get_rotated_vertices(box2, pts2); - - int num = get_intersection_points(pts1, pts2, intersectPts); - - if (num <= 2) { - return 0.0; - } - - // Convex Hull to order the intersection points in clockwise order and find - // the contour area. - int num_convex = convex_hull_graham(intersectPts, num, orderedPts, true); - return polygon_area(orderedPts, num_convex); -} - -} // namespace - -template -HOST_DEVICE_INLINE T -single_box_iou_rotated(T const* const box1_raw, T const* const box2_raw) { - // shift center to the middle point to achieve higher precision in result - RotatedBox box1, box2; - auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0; - auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0; - box1.x_ctr = box1_raw[0] - center_shift_x; - box1.y_ctr = box1_raw[1] - center_shift_y; - box1.w = box1_raw[2]; - box1.h = box1_raw[3]; - box1.a = box1_raw[4]; - box2.x_ctr = box2_raw[0] - center_shift_x; - box2.y_ctr = box2_raw[1] - center_shift_y; - box2.w = box2_raw[2]; - box2.h = box2_raw[3]; - box2.a = box2_raw[4]; - - T area1 = box1.w * box1.h; - T area2 = box2.w * box2.h; - if (area1 < 1e-14 || area2 < 1e-14) { - return 0.f; - } - - T intersection = rotated_boxes_intersection(box1, box2); - T iou = intersection / (area1 + area2 - intersection); - return iou; -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.cpp b/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.cpp deleted file mode 100644 index 0a5b7b907c06720fefc77b0dfd921b8ec3ecf2be..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.cpp +++ /dev/null @@ -1,507 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#include "cocoeval.h" -#include -#include -#include -#include - -using namespace pybind11::literals; - -namespace detectron2 { - -namespace COCOeval { - -// Sort detections from highest score to lowest, such that -// detection_instances[detection_sorted_indices[t]] >= -// detection_instances[detection_sorted_indices[t+1]]. Use stable_sort to match -// original COCO API -void SortInstancesByDetectionScore( - const std::vector& detection_instances, - std::vector* detection_sorted_indices) { - detection_sorted_indices->resize(detection_instances.size()); - std::iota( - detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); - std::stable_sort( - detection_sorted_indices->begin(), - detection_sorted_indices->end(), - [&detection_instances](size_t j1, size_t j2) { - return detection_instances[j1].score > detection_instances[j2].score; - }); -} - -// Partition the ground truth objects based on whether or not to ignore them -// based on area -void SortInstancesByIgnore( - const std::array& area_range, - const std::vector& ground_truth_instances, - std::vector* ground_truth_sorted_indices, - std::vector* ignores) { - ignores->clear(); - ignores->reserve(ground_truth_instances.size()); - for (auto o : ground_truth_instances) { - ignores->push_back( - o.ignore || o.area < area_range[0] || o.area > area_range[1]); - } - - ground_truth_sorted_indices->resize(ground_truth_instances.size()); - std::iota( - ground_truth_sorted_indices->begin(), - ground_truth_sorted_indices->end(), - 0); - std::stable_sort( - ground_truth_sorted_indices->begin(), - ground_truth_sorted_indices->end(), - [&ignores](size_t j1, size_t j2) { - return (int)(*ignores)[j1] < (int)(*ignores)[j2]; - }); -} - -// For each IOU threshold, greedily match each detected instance to a ground -// truth instance (if possible) and store the results -void MatchDetectionsToGroundTruth( - const std::vector& detection_instances, - const std::vector& detection_sorted_indices, - const std::vector& ground_truth_instances, - const std::vector& ground_truth_sorted_indices, - const std::vector& ignores, - const std::vector>& ious, - const std::vector& iou_thresholds, - const std::array& area_range, - ImageEvaluation* results) { - // Initialize memory to store return data matches and ignore - const int num_iou_thresholds = iou_thresholds.size(); - const int num_ground_truth = ground_truth_sorted_indices.size(); - const int num_detections = detection_sorted_indices.size(); - std::vector ground_truth_matches( - num_iou_thresholds * num_ground_truth, 0); - std::vector& detection_matches = results->detection_matches; - std::vector& detection_ignores = results->detection_ignores; - std::vector& ground_truth_ignores = results->ground_truth_ignores; - detection_matches.resize(num_iou_thresholds * num_detections, 0); - detection_ignores.resize(num_iou_thresholds * num_detections, false); - ground_truth_ignores.resize(num_ground_truth); - for (auto g = 0; g < num_ground_truth; ++g) { - ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]]; - } - - for (auto t = 0; t < num_iou_thresholds; ++t) { - for (auto d = 0; d < num_detections; ++d) { - // information about best match so far (match=-1 -> unmatched) - double best_iou = std::min(iou_thresholds[t], 1 - 1e-10); - int match = -1; - for (auto g = 0; g < num_ground_truth; ++g) { - // if this ground truth instance is already matched and not a - // crowd, it cannot be matched to another detection - if (ground_truth_matches[t * num_ground_truth + g] > 0 && - !ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) { - continue; - } - - // if detected instance matched to a regular ground truth - // instance, we can break on the first ground truth instance - // tagged as ignore (because they are sorted by the ignore tag) - if (match >= 0 && !ground_truth_ignores[match] && - ground_truth_ignores[g]) { - break; - } - - // if IOU overlap is the best so far, store the match appropriately - if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) { - best_iou = ious[d][ground_truth_sorted_indices[g]]; - match = g; - } - } - // if match was made, store id of match for both detection and - // ground truth - if (match >= 0) { - detection_ignores[t * num_detections + d] = ground_truth_ignores[match]; - detection_matches[t * num_detections + d] = - ground_truth_instances[ground_truth_sorted_indices[match]].id; - ground_truth_matches[t * num_ground_truth + match] = - detection_instances[detection_sorted_indices[d]].id; - } - - // set unmatched detections outside of area range to ignore - const InstanceAnnotation& detection = - detection_instances[detection_sorted_indices[d]]; - detection_ignores[t * num_detections + d] = - detection_ignores[t * num_detections + d] || - (detection_matches[t * num_detections + d] == 0 && - (detection.area < area_range[0] || detection.area > area_range[1])); - } - } - - // store detection score results - results->detection_scores.resize(detection_sorted_indices.size()); - for (size_t d = 0; d < detection_sorted_indices.size(); ++d) { - results->detection_scores[d] = - detection_instances[detection_sorted_indices[d]].score; - } -} - -std::vector EvaluateImages( - const std::vector>& area_ranges, - int max_detections, - const std::vector& iou_thresholds, - const ImageCategoryInstances>& image_category_ious, - const ImageCategoryInstances& - image_category_ground_truth_instances, - const ImageCategoryInstances& - image_category_detection_instances) { - const int num_area_ranges = area_ranges.size(); - const int num_images = image_category_ground_truth_instances.size(); - const int num_categories = - image_category_ious.size() > 0 ? image_category_ious[0].size() : 0; - std::vector detection_sorted_indices; - std::vector ground_truth_sorted_indices; - std::vector ignores; - std::vector results_all( - num_images * num_area_ranges * num_categories); - - // Store results for each image, category, and area range combination. Results - // for each IOU threshold are packed into the same ImageEvaluation object - for (auto i = 0; i < num_images; ++i) { - for (auto c = 0; c < num_categories; ++c) { - const std::vector& ground_truth_instances = - image_category_ground_truth_instances[i][c]; - const std::vector& detection_instances = - image_category_detection_instances[i][c]; - - SortInstancesByDetectionScore( - detection_instances, &detection_sorted_indices); - if ((int)detection_sorted_indices.size() > max_detections) { - detection_sorted_indices.resize(max_detections); - } - - for (size_t a = 0; a < area_ranges.size(); ++a) { - SortInstancesByIgnore( - area_ranges[a], - ground_truth_instances, - &ground_truth_sorted_indices, - &ignores); - - MatchDetectionsToGroundTruth( - detection_instances, - detection_sorted_indices, - ground_truth_instances, - ground_truth_sorted_indices, - ignores, - image_category_ious[i][c], - iou_thresholds, - area_ranges[a], - &results_all - [c * num_area_ranges * num_images + a * num_images + i]); - } - } - } - - return results_all; -} - -// Convert a python list to a vector -template -std::vector list_to_vec(const py::list& l) { - std::vector v(py::len(l)); - for (int i = 0; i < (int)py::len(l); ++i) { - v[i] = l[i].cast(); - } - return v; -} - -// Helper function to Accumulate() -// Considers the evaluation results applicable to a particular category, area -// range, and max_detections parameter setting, which begin at -// evaluations[evaluation_index]. Extracts a sorted list of length n of all -// applicable detection instances concatenated across all images in the dataset, -// which are represented by the outputs evaluation_indices, detection_scores, -// image_detection_indices, and detection_sorted_indices--all of which are -// length n. evaluation_indices[i] stores the applicable index into -// evaluations[] for instance i, which has detection score detection_score[i], -// and is the image_detection_indices[i]'th of the list of detections -// for the image containing i. detection_sorted_indices[] defines a sorted -// permutation of the 3 other outputs -int BuildSortedDetectionList( - const std::vector& evaluations, - const int64_t evaluation_index, - const int64_t num_images, - const int max_detections, - std::vector* evaluation_indices, - std::vector* detection_scores, - std::vector* detection_sorted_indices, - std::vector* image_detection_indices) { - assert(evaluations.size() >= evaluation_index + num_images); - - // Extract a list of object instances of the applicable category, area - // range, and max detections requirements such that they can be sorted - image_detection_indices->clear(); - evaluation_indices->clear(); - detection_scores->clear(); - image_detection_indices->reserve(num_images * max_detections); - evaluation_indices->reserve(num_images * max_detections); - detection_scores->reserve(num_images * max_detections); - int num_valid_ground_truth = 0; - for (auto i = 0; i < num_images; ++i) { - const ImageEvaluation& evaluation = evaluations[evaluation_index + i]; - - for (int d = 0; - d < (int)evaluation.detection_scores.size() && d < max_detections; - ++d) { // detected instances - evaluation_indices->push_back(evaluation_index + i); - image_detection_indices->push_back(d); - detection_scores->push_back(evaluation.detection_scores[d]); - } - for (auto ground_truth_ignore : evaluation.ground_truth_ignores) { - if (!ground_truth_ignore) { - ++num_valid_ground_truth; - } - } - } - - // Sort detections by decreasing score, using stable sort to match - // python implementation - detection_sorted_indices->resize(detection_scores->size()); - std::iota( - detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); - std::stable_sort( - detection_sorted_indices->begin(), - detection_sorted_indices->end(), - [&detection_scores](size_t j1, size_t j2) { - return (*detection_scores)[j1] > (*detection_scores)[j2]; - }); - - return num_valid_ground_truth; -} - -// Helper function to Accumulate() -// Compute a precision recall curve given a sorted list of detected instances -// encoded in evaluations, evaluation_indices, detection_scores, -// detection_sorted_indices, image_detection_indices (see -// BuildSortedDetectionList()). Using vectors precisions and recalls -// and temporary storage, output the results into precisions_out, recalls_out, -// and scores_out, which are large buffers containing many precion/recall curves -// for all possible parameter settings, with precisions_out_index and -// recalls_out_index defining the applicable indices to store results. -void ComputePrecisionRecallCurve( - const int64_t precisions_out_index, - const int64_t precisions_out_stride, - const int64_t recalls_out_index, - const std::vector& recall_thresholds, - const int iou_threshold_index, - const int num_iou_thresholds, - const int num_valid_ground_truth, - const std::vector& evaluations, - const std::vector& evaluation_indices, - const std::vector& detection_scores, - const std::vector& detection_sorted_indices, - const std::vector& image_detection_indices, - std::vector* precisions, - std::vector* recalls, - std::vector* precisions_out, - std::vector* scores_out, - std::vector* recalls_out) { - assert(recalls_out->size() > recalls_out_index); - - // Compute precision/recall for each instance in the sorted list of detections - int64_t true_positives_sum = 0, false_positives_sum = 0; - precisions->clear(); - recalls->clear(); - precisions->reserve(detection_sorted_indices.size()); - recalls->reserve(detection_sorted_indices.size()); - assert(!evaluations.empty() || detection_sorted_indices.empty()); - for (auto detection_sorted_index : detection_sorted_indices) { - const ImageEvaluation& evaluation = - evaluations[evaluation_indices[detection_sorted_index]]; - const auto num_detections = - evaluation.detection_matches.size() / num_iou_thresholds; - const auto detection_index = iou_threshold_index * num_detections + - image_detection_indices[detection_sorted_index]; - assert(evaluation.detection_matches.size() > detection_index); - assert(evaluation.detection_ignores.size() > detection_index); - const int64_t detection_match = - evaluation.detection_matches[detection_index]; - const bool detection_ignores = - evaluation.detection_ignores[detection_index]; - const auto true_positive = detection_match > 0 && !detection_ignores; - const auto false_positive = detection_match == 0 && !detection_ignores; - if (true_positive) { - ++true_positives_sum; - } - if (false_positive) { - ++false_positives_sum; - } - - const double recall = - static_cast(true_positives_sum) / num_valid_ground_truth; - recalls->push_back(recall); - const int64_t num_valid_detections = - true_positives_sum + false_positives_sum; - const double precision = num_valid_detections > 0 - ? static_cast(true_positives_sum) / num_valid_detections - : 0.0; - precisions->push_back(precision); - } - - (*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0; - - for (int64_t i = static_cast(precisions->size()) - 1; i > 0; --i) { - if ((*precisions)[i] > (*precisions)[i - 1]) { - (*precisions)[i - 1] = (*precisions)[i]; - } - } - - // Sample the per instance precision/recall list at each recall threshold - for (size_t r = 0; r < recall_thresholds.size(); ++r) { - // first index in recalls >= recall_thresholds[r] - std::vector::iterator low = std::lower_bound( - recalls->begin(), recalls->end(), recall_thresholds[r]); - size_t precisions_index = low - recalls->begin(); - - const auto results_ind = precisions_out_index + r * precisions_out_stride; - assert(results_ind < precisions_out->size()); - assert(results_ind < scores_out->size()); - if (precisions_index < precisions->size()) { - (*precisions_out)[results_ind] = (*precisions)[precisions_index]; - (*scores_out)[results_ind] = - detection_scores[detection_sorted_indices[precisions_index]]; - } else { - (*precisions_out)[results_ind] = 0; - (*scores_out)[results_ind] = 0; - } - } -} -py::dict Accumulate( - const py::object& params, - const std::vector& evaluations) { - const std::vector recall_thresholds = - list_to_vec(params.attr("recThrs")); - const std::vector max_detections = - list_to_vec(params.attr("maxDets")); - const int num_iou_thresholds = py::len(params.attr("iouThrs")); - const int num_recall_thresholds = py::len(params.attr("recThrs")); - const int num_categories = params.attr("useCats").cast() == 1 - ? py::len(params.attr("catIds")) - : 1; - const int num_area_ranges = py::len(params.attr("areaRng")); - const int num_max_detections = py::len(params.attr("maxDets")); - const int num_images = py::len(params.attr("imgIds")); - - std::vector precisions_out( - num_iou_thresholds * num_recall_thresholds * num_categories * - num_area_ranges * num_max_detections, - -1); - std::vector recalls_out( - num_iou_thresholds * num_categories * num_area_ranges * - num_max_detections, - -1); - std::vector scores_out( - num_iou_thresholds * num_recall_thresholds * num_categories * - num_area_ranges * num_max_detections, - -1); - - // Consider the list of all detected instances in the entire dataset in one - // large list. evaluation_indices, detection_scores, - // image_detection_indices, and detection_sorted_indices all have the same - // length as this list, such that each entry corresponds to one detected - // instance - std::vector evaluation_indices; // indices into evaluations[] - std::vector detection_scores; // detection scores of each instance - std::vector detection_sorted_indices; // sorted indices of all - // instances in the dataset - std::vector - image_detection_indices; // indices into the list of detected instances in - // the same image as each instance - std::vector precisions, recalls; - - for (auto c = 0; c < num_categories; ++c) { - for (auto a = 0; a < num_area_ranges; ++a) { - for (auto m = 0; m < num_max_detections; ++m) { - // The COCO PythonAPI assumes evaluations[] (the return value of - // COCOeval::EvaluateImages() is one long list storing results for each - // combination of category, area range, and image id, with categories in - // the outermost loop and images in the innermost loop. - const int64_t evaluations_index = - c * num_area_ranges * num_images + a * num_images; - int num_valid_ground_truth = BuildSortedDetectionList( - evaluations, - evaluations_index, - num_images, - max_detections[m], - &evaluation_indices, - &detection_scores, - &detection_sorted_indices, - &image_detection_indices); - - if (num_valid_ground_truth == 0) { - continue; - } - - for (auto t = 0; t < num_iou_thresholds; ++t) { - // recalls_out is a flattened vectors representing a - // num_iou_thresholds X num_categories X num_area_ranges X - // num_max_detections matrix - const int64_t recalls_out_index = - t * num_categories * num_area_ranges * num_max_detections + - c * num_area_ranges * num_max_detections + - a * num_max_detections + m; - - // precisions_out and scores_out are flattened vectors - // representing a num_iou_thresholds X num_recall_thresholds X - // num_categories X num_area_ranges X num_max_detections matrix - const int64_t precisions_out_stride = - num_categories * num_area_ranges * num_max_detections; - const int64_t precisions_out_index = t * num_recall_thresholds * - num_categories * num_area_ranges * num_max_detections + - c * num_area_ranges * num_max_detections + - a * num_max_detections + m; - - ComputePrecisionRecallCurve( - precisions_out_index, - precisions_out_stride, - recalls_out_index, - recall_thresholds, - t, - num_iou_thresholds, - num_valid_ground_truth, - evaluations, - evaluation_indices, - detection_scores, - detection_sorted_indices, - image_detection_indices, - &precisions, - &recalls, - &precisions_out, - &scores_out, - &recalls_out); - } - } - } - } - - time_t rawtime; - struct tm local_time; - std::array buffer; - time(&rawtime); -#ifdef _WIN32 - localtime_s(&local_time, &rawtime); -#else - localtime_r(&rawtime, &local_time); -#endif - strftime( - buffer.data(), 200, "%Y-%m-%d %H:%num_max_detections:%S", &local_time); - return py::dict( - "params"_a = params, - "counts"_a = std::vector( - {num_iou_thresholds, - num_recall_thresholds, - num_categories, - num_area_ranges, - num_max_detections}), - "date"_a = buffer, - "precision"_a = precisions_out, - "recall"_a = recalls_out, - "scores"_a = scores_out); -} - -} // namespace COCOeval - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.h b/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.h deleted file mode 100644 index db246e49a026b7cd989b305f4d3d98100be3c912..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/cocoeval/cocoeval.h +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#pragma once - -#include -#include -#include -#include -#include - -namespace py = pybind11; - -namespace detectron2 { - -namespace COCOeval { - -// Annotation data for a single object instance in an image -struct InstanceAnnotation { - InstanceAnnotation( - uint64_t id, - double score, - double area, - bool is_crowd, - bool ignore) - : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {} - uint64_t id; - double score = 0.; - double area = 0.; - bool is_crowd = false; - bool ignore = false; -}; - -// Stores intermediate results for evaluating detection results for a single -// image that has D detected instances and G ground truth instances. This stores -// matches between detected and ground truth instances -struct ImageEvaluation { - // For each of the D detected instances, the id of the matched ground truth - // instance, or 0 if unmatched - std::vector detection_matches; - - // The detection score of each of the D detected instances - std::vector detection_scores; - - // Marks whether or not each of G instances was ignored from evaluation (e.g., - // because it's outside area_range) - std::vector ground_truth_ignores; - - // Marks whether or not each of D instances was ignored from evaluation (e.g., - // because it's outside aRng) - std::vector detection_ignores; -}; - -template -using ImageCategoryInstances = std::vector>>; - -// C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg(). For each -// combination of image, category, area range settings, and IOU thresholds to -// evaluate, it matches detected instances to ground truth instances and stores -// the results into a vector of ImageEvaluation results, which will be -// interpreted by the COCOeval::Accumulate() function to produce precion-recall -// curves. The parameters of nested vectors have the following semantics: -// image_category_ious[i][c][d][g] is the intersection over union of the d'th -// detected instance and g'th ground truth instance of -// category category_ids[c] in image image_ids[i] -// image_category_ground_truth_instances[i][c] is a vector of ground truth -// instances in image image_ids[i] of category category_ids[c] -// image_category_detection_instances[i][c] is a vector of detected -// instances in image image_ids[i] of category category_ids[c] -std::vector EvaluateImages( - const std::vector>& area_ranges, // vector of 2-tuples - int max_detections, - const std::vector& iou_thresholds, - const ImageCategoryInstances>& image_category_ious, - const ImageCategoryInstances& - image_category_ground_truth_instances, - const ImageCategoryInstances& - image_category_detection_instances); - -// C++ implementation of COCOeval.accumulate(), which generates precision -// recall curves for each set of category, IOU threshold, detection area range, -// and max number of detections parameters. It is assumed that the parameter -// evaluations is the return value of the functon COCOeval::EvaluateImages(), -// which was called with the same parameter settings params -py::dict Accumulate( - const py::object& params, - const std::vector& evalutations); - -} // namespace COCOeval -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/cuda_version.cu b/detectron2/detectron2/layers/csrc/cuda_version.cu deleted file mode 100644 index b74fddab798485a0a9b14b028289f7ec35044836..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/cuda_version.cu +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. - -#include - -namespace detectron2 { -int get_cudart_version() { -// Not a ROCM platform: Either HIP is not used, or -// it is used, but platform is not ROCM (i.e. it is CUDA) -#if !defined(__HIP_PLATFORM_AMD__) - return CUDART_VERSION; -#else - int version = 0; - -#if HIP_VERSION_MAJOR != 0 - // Create a convention similar to that of CUDA, as assumed by other - // parts of the code. - - version = HIP_VERSION_MINOR; - version += (HIP_VERSION_MAJOR * 100); -#else - hipRuntimeGetVersion(&version); -#endif - return version; -#endif -} -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/deformable/deform_conv.h b/detectron2/detectron2/layers/csrc/deformable/deform_conv.h deleted file mode 100644 index 965c1bfd47b58f9802d1c3fd69a5962517b2da61..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/deformable/deform_conv.h +++ /dev/null @@ -1,377 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#pragma once -#include - -namespace detectron2 { - -#if defined(WITH_CUDA) || defined(WITH_HIP) -int deform_conv_forward_cuda( - at::Tensor input, - at::Tensor weight, - at::Tensor offset, - at::Tensor output, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step); - -int deform_conv_backward_input_cuda( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradInput, - at::Tensor gradOffset, - at::Tensor weight, - at::Tensor columns, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step); - -int deform_conv_backward_parameters_cuda( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradWeight, // at::Tensor gradBias, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - float scale, - int im2col_step); - -void modulated_deform_conv_cuda_forward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor output, - at::Tensor columns, - int kernel_h, - int kernel_w, - const int stride_h, - const int stride_w, - const int pad_h, - const int pad_w, - const int dilation_h, - const int dilation_w, - const int group, - const int deformable_group, - const bool with_bias); - -void modulated_deform_conv_cuda_backward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor columns, - at::Tensor grad_input, - at::Tensor grad_weight, - at::Tensor grad_bias, - at::Tensor grad_offset, - at::Tensor grad_mask, - at::Tensor grad_output, - int kernel_h, - int kernel_w, - int stride_h, - int stride_w, - int pad_h, - int pad_w, - int dilation_h, - int dilation_w, - int group, - int deformable_group, - const bool with_bias); - -#endif - -inline int deform_conv_forward( - at::Tensor input, - at::Tensor weight, - at::Tensor offset, - at::Tensor output, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step) { - if (input.is_cuda()) { -#if defined(WITH_CUDA) || defined(WITH_HIP) - TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); - TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); - return deform_conv_forward_cuda( - input, - weight, - offset, - output, - columns, - ones, - kW, - kH, - dW, - dH, - padW, - padH, - dilationW, - dilationH, - group, - deformable_group, - im2col_step); -#else - AT_ERROR("Detectron2 is not compiled with GPU support!"); -#endif - } - AT_ERROR("This operator is not implemented on CPU"); -} - -inline int deform_conv_backward_input( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradInput, - at::Tensor gradOffset, - at::Tensor weight, - at::Tensor columns, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step) { - if (gradOutput.is_cuda()) { -#if defined(WITH_CUDA) || defined(WITH_HIP) - TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); - TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); - TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); - return deform_conv_backward_input_cuda( - input, - offset, - gradOutput, - gradInput, - gradOffset, - weight, - columns, - kW, - kH, - dW, - dH, - padW, - padH, - dilationW, - dilationH, - group, - deformable_group, - im2col_step); -#else - AT_ERROR("Detectron2 is not compiled with GPU support!"); -#endif - } - AT_ERROR("This operator is not implemented on CPU"); -} - -inline int deform_conv_backward_filter( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradWeight, // at::Tensor gradBias, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - float scale, - int im2col_step) { - if (gradOutput.is_cuda()) { -#if defined(WITH_CUDA) || defined(WITH_HIP) - TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); - TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); - return deform_conv_backward_parameters_cuda( - input, - offset, - gradOutput, - gradWeight, - columns, - ones, - kW, - kH, - dW, - dH, - padW, - padH, - dilationW, - dilationH, - group, - deformable_group, - scale, - im2col_step); -#else - AT_ERROR("Detectron2 is not compiled with GPU support!"); -#endif - } - AT_ERROR("This operator is not implemented on CPU"); -} - -inline void modulated_deform_conv_forward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor output, - at::Tensor columns, - int kernel_h, - int kernel_w, - const int stride_h, - const int stride_w, - const int pad_h, - const int pad_w, - const int dilation_h, - const int dilation_w, - const int group, - const int deformable_group, - const bool with_bias) { - if (input.is_cuda()) { -#if defined(WITH_CUDA) || defined(WITH_HIP) - TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); - TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!"); - TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); - return modulated_deform_conv_cuda_forward( - input, - weight, - bias, - ones, - offset, - mask, - output, - columns, - kernel_h, - kernel_w, - stride_h, - stride_w, - pad_h, - pad_w, - dilation_h, - dilation_w, - group, - deformable_group, - with_bias); -#else - AT_ERROR("Detectron2 is not compiled with GPU support!"); -#endif - } - AT_ERROR("This operator is not implemented on CPU"); -} - -inline void modulated_deform_conv_backward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor columns, - at::Tensor grad_input, - at::Tensor grad_weight, - at::Tensor grad_bias, - at::Tensor grad_offset, - at::Tensor grad_mask, - at::Tensor grad_output, - int kernel_h, - int kernel_w, - int stride_h, - int stride_w, - int pad_h, - int pad_w, - int dilation_h, - int dilation_w, - int group, - int deformable_group, - const bool with_bias) { - if (grad_output.is_cuda()) { -#if defined(WITH_CUDA) || defined(WITH_HIP) - TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); - TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); - TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!"); - TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); - return modulated_deform_conv_cuda_backward( - input, - weight, - bias, - ones, - offset, - mask, - columns, - grad_input, - grad_weight, - grad_bias, - grad_offset, - grad_mask, - grad_output, - kernel_h, - kernel_w, - stride_h, - stride_w, - pad_h, - pad_w, - dilation_h, - dilation_w, - group, - deformable_group, - with_bias); -#else - AT_ERROR("Detectron2 is not compiled with GPU support!"); -#endif - } - AT_ERROR("This operator is not implemented on CPU"); -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu b/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu deleted file mode 100644 index 2072bb856ec40b61c3826cead2fb7bb7c971a089..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu +++ /dev/null @@ -1,1223 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. - -// modified from -// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp -// Original license: Apache 2.0 - -// modify from -// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c -// Original license: Apache 2.0 - -#include - -#include "deform_conv.h" - -#include -#include - -namespace detectron2 { - -void deformable_im2col( - const at::Tensor data_im, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor data_col); - -void deformable_col2im( - const at::Tensor data_col, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor grad_im); - -void deformable_col2im_coord( - const at::Tensor data_col, - const at::Tensor data_im, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor grad_offset); - -void modulated_deformable_im2col_cuda( - const at::Tensor data_im, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kenerl_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor data_col); - -void modulated_deformable_col2im_cuda( - const at::Tensor data_col, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kenerl_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor grad_im); - -void modulated_deformable_col2im_coord_cuda( - const at::Tensor data_col, - const at::Tensor data_im, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kenerl_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor grad_offset, - at::Tensor grad_mask); - -void shape_check( - at::Tensor input, - at::Tensor offset, - at::Tensor* gradOutput, - at::Tensor weight, - int kH, - int kW, - int dH, - int dW, - int padH, - int padW, - int dilationH, - int dilationW, - int group, - int deformable_group) { - TORCH_CHECK( - weight.ndimension() == 4, - "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " - "but got: %s", - weight.ndimension()); - - TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - - TORCH_CHECK( - kW > 0 && kH > 0, - "kernel size should be greater than zero, but got kH: %d kW: %d", - kH, - kW); - - TORCH_CHECK( - (weight.size(2) == kH && weight.size(3) == kW), - "kernel size should be consistent with weight, ", - "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", - kH, - kW, - weight.size(2), - weight.size(3)); - - TORCH_CHECK( - dW > 0 && dH > 0, - "stride should be greater than zero, but got dH: %d dW: %d", - dH, - dW); - - TORCH_CHECK( - dilationW > 0 && dilationH > 0, - "dilation should be greater than 0, but got dilationH: %d dilationW: %d", - dilationH, - dilationW); - - int ndim = input.ndimension(); - int dimf = 0; - int dimh = 1; - int dimw = 2; - - if (ndim == 4) { - dimf++; - dimh++; - dimw++; - } - - TORCH_CHECK( - ndim == 3 || ndim == 4, - "3D or 4D input tensor expected but got: %s", - ndim); - - long nInputPlane = weight.size(1) * group; - long inputHeight = input.size(dimh); - long inputWidth = input.size(dimw); - long nOutputPlane = weight.size(0); - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - - TORCH_CHECK( - nInputPlane % deformable_group == 0, - "input channels must divide deformable group size"); - - if (outputWidth < 1 || outputHeight < 1) - AT_ERROR( - "Given input size: (%ld x %ld x %ld). " - "Calculated output size: (%ld x %ld x %ld). Output size is too small", - nInputPlane, - inputHeight, - inputWidth, - nOutputPlane, - outputHeight, - outputWidth); - - TORCH_CHECK( - input.size(1) == nInputPlane, - "invalid number of input planes, expected: %d, but got: %d", - nInputPlane, - input.size(1)); - - TORCH_CHECK( - (inputHeight + 2 * padH >= kH && inputWidth + 2 * padW >= kW), - "input image is smaller than kernel"); - - TORCH_CHECK( - (offset.size(2) == outputHeight && offset.size(3) == outputWidth), - "invalid spatial size of offset, expected height: %d width: %d, but " - "got height: %d width: %d", - outputHeight, - outputWidth, - offset.size(2), - offset.size(3)); - - TORCH_CHECK( - (offset.size(1) == deformable_group * 2 * kH * kW), - "invalid number of channels of offset"); - - if (gradOutput != NULL) { - TORCH_CHECK( - gradOutput->size(dimf) == nOutputPlane, - "invalid number of gradOutput planes, expected: %d, but got: %d", - nOutputPlane, - gradOutput->size(dimf)); - - TORCH_CHECK( - (gradOutput->size(dimh) == outputHeight && - gradOutput->size(dimw) == outputWidth), - "invalid size of gradOutput, expected height: %d width: %d , but " - "got height: %d width: %d", - outputHeight, - outputWidth, - gradOutput->size(dimh), - gradOutput->size(dimw)); - } -} - -int deform_conv_forward_cuda( - at::Tensor input, - at::Tensor weight, - at::Tensor offset, - at::Tensor output, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step) { - // todo: resize columns to include im2col: done - // todo: add im2col_step as input - // todo: add new output buffer and transpose it to output (or directly - // transpose output) todo: possibly change data indexing because of - // parallel_imgs - - shape_check( - input, - offset, - NULL, - weight, - kH, - kW, - dH, - dW, - padH, - padW, - dilationH, - dilationW, - group, - deformable_group); - - input = input.contiguous(); - offset = offset.contiguous(); - weight = weight.contiguous(); - - int batch = 1; - if (input.ndimension() == 3) { - // Force batch - batch = 0; - input.unsqueeze_(0); - offset.unsqueeze_(0); - } - - // todo: assert batchsize dividable by im2col_step - - long batchSize = input.size(0); - long nInputPlane = input.size(1); - long inputHeight = input.size(2); - long inputWidth = input.size(3); - - long nOutputPlane = weight.size(0); - - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - - TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); - - output = output.view( - {batchSize / im2col_step, - im2col_step, - nOutputPlane, - outputHeight, - outputWidth}); - columns = at::zeros( - {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, - input.options()); - - if (ones.ndimension() != 2 || - ones.size(0) * ones.size(1) < outputHeight * outputWidth) { - ones = at::ones({outputHeight, outputWidth}, input.options()); - } - - input = input.view( - {batchSize / im2col_step, - im2col_step, - nInputPlane, - inputHeight, - inputWidth}); - offset = offset.view( - {batchSize / im2col_step, - im2col_step, - deformable_group * 2 * kH * kW, - outputHeight, - outputWidth}); - - at::Tensor output_buffer = at::zeros( - {batchSize / im2col_step, - nOutputPlane, - im2col_step * outputHeight, - outputWidth}, - output.options()); - - output_buffer = output_buffer.view( - {output_buffer.size(0), - group, - output_buffer.size(1) / group, - output_buffer.size(2), - output_buffer.size(3)}); - - for (int elt = 0; elt < batchSize / im2col_step; elt++) { - deformable_im2col( - input[elt], - offset[elt], - nInputPlane, - inputHeight, - inputWidth, - kH, - kW, - padH, - padW, - dH, - dW, - dilationH, - dilationW, - im2col_step, - deformable_group, - columns); - - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - weight = weight.view( - {group, - weight.size(0) / group, - weight.size(1), - weight.size(2), - weight.size(3)}); - - for (int g = 0; g < group; g++) { - output_buffer[elt][g] = output_buffer[elt][g] - .flatten(1) - .addmm_(weight[g].flatten(1), columns[g]) - .view_as(output_buffer[elt][g]); - } - } - - output_buffer = output_buffer.view( - {output_buffer.size(0), - output_buffer.size(1) * output_buffer.size(2), - output_buffer.size(3), - output_buffer.size(4)}); - - output_buffer = output_buffer.view( - {batchSize / im2col_step, - nOutputPlane, - im2col_step, - outputHeight, - outputWidth}); - output_buffer.transpose_(1, 2); - output.copy_(output_buffer); - output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); - - input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); - offset = offset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - if (batch == 0) { - output = output.view({nOutputPlane, outputHeight, outputWidth}); - input = input.view({nInputPlane, inputHeight, inputWidth}); - offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); - } - - return 1; -} - -int deform_conv_backward_input_cuda( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradInput, - at::Tensor gradOffset, - at::Tensor weight, - at::Tensor columns, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step) { - shape_check( - input, - offset, - &gradOutput, - weight, - kH, - kW, - dH, - dW, - padH, - padW, - dilationH, - dilationW, - group, - deformable_group); - - input = input.contiguous(); - offset = offset.contiguous(); - gradOutput = gradOutput.contiguous(); - weight = weight.contiguous(); - - int batch = 1; - - if (input.ndimension() == 3) { - // Force batch - batch = 0; - input = input.view({1, input.size(0), input.size(1), input.size(2)}); - offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)}); - gradOutput = gradOutput.view( - {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); - } - - long batchSize = input.size(0); - long nInputPlane = input.size(1); - long inputHeight = input.size(2); - long inputWidth = input.size(3); - - long nOutputPlane = weight.size(0); - - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - - TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); - gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); - columns = at::zeros( - {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, - input.options()); - - // change order of grad output - gradOutput = gradOutput.view( - {batchSize / im2col_step, - im2col_step, - nOutputPlane, - outputHeight, - outputWidth}); - gradOutput.transpose_(1, 2); - - gradInput = gradInput.view( - {batchSize / im2col_step, - im2col_step, - nInputPlane, - inputHeight, - inputWidth}); - input = input.view( - {batchSize / im2col_step, - im2col_step, - nInputPlane, - inputHeight, - inputWidth}); - gradOffset = gradOffset.view( - {batchSize / im2col_step, - im2col_step, - deformable_group * 2 * kH * kW, - outputHeight, - outputWidth}); - offset = offset.view( - {batchSize / im2col_step, - im2col_step, - deformable_group * 2 * kH * kW, - outputHeight, - outputWidth}); - - for (int elt = 0; elt < batchSize / im2col_step; elt++) { - // divide into groups - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - weight = weight.view( - {group, - weight.size(0) / group, - weight.size(1), - weight.size(2), - weight.size(3)}); - gradOutput = gradOutput.view( - {gradOutput.size(0), - group, - gradOutput.size(1) / group, - gradOutput.size(2), - gradOutput.size(3), - gradOutput.size(4)}); - - for (int g = 0; g < group; g++) { - columns[g] = columns[g].addmm_( - weight[g].flatten(1).transpose(0, 1), - gradOutput[elt][g].flatten(1), - 0.0f, - 1.0f); - } - - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - gradOutput = gradOutput.view( - {gradOutput.size(0), - gradOutput.size(1) * gradOutput.size(2), - gradOutput.size(3), - gradOutput.size(4), - gradOutput.size(5)}); - - deformable_col2im_coord( - columns, - input[elt], - offset[elt], - nInputPlane, - inputHeight, - inputWidth, - kH, - kW, - padH, - padW, - dH, - dW, - dilationH, - dilationW, - im2col_step, - deformable_group, - gradOffset[elt]); - - deformable_col2im( - columns, - offset[elt], - nInputPlane, - inputHeight, - inputWidth, - kH, - kW, - padH, - padW, - dH, - dW, - dilationH, - dilationW, - im2col_step, - deformable_group, - gradInput[elt]); - } - - gradOutput.transpose_(1, 2); - gradOutput = - gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); - - gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); - input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); - gradOffset = gradOffset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - offset = offset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - if (batch == 0) { - gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); - input = input.view({nInputPlane, inputHeight, inputWidth}); - gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); - offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); - gradOffset = - gradOffset.view({offset.size(1), offset.size(2), offset.size(3)}); - } - - return 1; -} - -int deform_conv_backward_parameters_cuda( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradWeight, // at::Tensor gradBias, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - float scale, - int im2col_step) { - // todo: transpose and reshape outGrad - // todo: reshape columns - // todo: add im2col_step as input - - shape_check( - input, - offset, - &gradOutput, - gradWeight, - kH, - kW, - dH, - dW, - padH, - padW, - dilationH, - dilationW, - group, - deformable_group); - - input = input.contiguous(); - offset = offset.contiguous(); - gradOutput = gradOutput.contiguous(); - - int batch = 1; - - if (input.ndimension() == 3) { - // Force batch - batch = 0; - input = input.view( - at::IntList({1, input.size(0), input.size(1), input.size(2)})); - gradOutput = gradOutput.view( - {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); - } - - long batchSize = input.size(0); - long nInputPlane = input.size(1); - long inputHeight = input.size(2); - long inputWidth = input.size(3); - - long nOutputPlane = gradWeight.size(0); - - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - - TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); - - columns = at::zeros( - {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, - input.options()); - - gradOutput = gradOutput.view( - {batchSize / im2col_step, - im2col_step, - nOutputPlane, - outputHeight, - outputWidth}); - gradOutput.transpose_(1, 2); - - at::Tensor gradOutputBuffer = at::zeros_like(gradOutput); - gradOutputBuffer = gradOutputBuffer.view( - {batchSize / im2col_step, - nOutputPlane, - im2col_step, - outputHeight, - outputWidth}); - gradOutputBuffer.copy_(gradOutput); - // gradOutput is not contiguous, so we do reshape (instead of view) next - gradOutputBuffer = gradOutputBuffer.reshape( - {batchSize / im2col_step, - nOutputPlane, - im2col_step * outputHeight, - outputWidth}); - - gradOutput.transpose_(1, 2); - gradOutput = - gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); - - input = input.view( - {batchSize / im2col_step, - im2col_step, - nInputPlane, - inputHeight, - inputWidth}); - offset = offset.view( - {batchSize / im2col_step, - im2col_step, - deformable_group * 2 * kH * kW, - outputHeight, - outputWidth}); - - for (int elt = 0; elt < batchSize / im2col_step; elt++) { - deformable_im2col( - input[elt], - offset[elt], - nInputPlane, - inputHeight, - inputWidth, - kH, - kW, - padH, - padW, - dH, - dW, - dilationH, - dilationW, - im2col_step, - deformable_group, - columns); - - // divide into group - gradOutputBuffer = gradOutputBuffer.view( - {gradOutputBuffer.size(0), - group, - gradOutputBuffer.size(1) / group, - gradOutputBuffer.size(2), - gradOutputBuffer.size(3)}); - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - gradWeight = gradWeight.view( - {group, - gradWeight.size(0) / group, - gradWeight.size(1), - gradWeight.size(2), - gradWeight.size(3)}); - - for (int g = 0; g < group; g++) { - gradWeight[g] = gradWeight[g] - .flatten(1) - .addmm_( - gradOutputBuffer[elt][g].flatten(1), - columns[g].transpose(1, 0), - 1.0, - scale) - .view_as(gradWeight[g]); - } - gradOutputBuffer = gradOutputBuffer.view( - {gradOutputBuffer.size(0), - gradOutputBuffer.size(1) * gradOutputBuffer.size(2), - gradOutputBuffer.size(3), - gradOutputBuffer.size(4)}); - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - gradWeight = gradWeight.view( - {gradWeight.size(0) * gradWeight.size(1), - gradWeight.size(2), - gradWeight.size(3), - gradWeight.size(4)}); - } - - input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); - offset = offset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - if (batch == 0) { - gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); - input = input.view({nInputPlane, inputHeight, inputWidth}); - } - - return 1; -} - -void modulated_deform_conv_cuda_forward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor output, - at::Tensor columns, - int kernel_h, - int kernel_w, - const int stride_h, - const int stride_w, - const int pad_h, - const int pad_w, - const int dilation_h, - const int dilation_w, - const int group, - const int deformable_group, - const bool with_bias) { - shape_check( - input, - offset, - NULL, - weight, - kernel_h, - kernel_w, - stride_h, - stride_w, - pad_h, - pad_w, - dilation_h, - dilation_w, - group, - deformable_group); - - TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - - const int batch = input.size(0); - const int channels = input.size(1); - const int height = input.size(2); - const int width = input.size(3); - - const int channels_out = weight.size(0); - const int channels_kernel = weight.size(1); - const int kernel_h_ = weight.size(2); - const int kernel_w_ = weight.size(3); - - if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) - AT_ERROR( - "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", - kernel_h_, - kernel_w, - kernel_h_, - kernel_w_); - if (channels != channels_kernel * group) - AT_ERROR( - "Input shape and kernel channels wont match: (%d vs %d).", - channels, - channels_kernel * group); - - const int height_out = - (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; - const int width_out = - (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - - // mask shape check - TORCH_CHECK( - (mask.size(2) == height_out && mask.size(3) == width_out), - "invalid spatial size of mask, expected height: %d width: %d, but " - "got height: %d width: %d", - height_out, - width_out, - mask.size(2), - mask.size(3)); - - TORCH_CHECK( - (mask.size(1) == deformable_group * kernel_h * kernel_w), - "invalid number of channels of mask"); - - if (ones.ndimension() != 2 || - ones.size(0) * ones.size(1) < height_out * width_out) { - // Resize plane and fill with ones... - ones = at::ones({height_out, width_out}, input.options()); - } - - // resize output - output = output.view({batch, channels_out, height_out, width_out}).zero_(); - // resize temporary columns - columns = at::zeros( - {channels * kernel_h * kernel_w, 1 * height_out * width_out}, - input.options()); - - output = output.view( - {output.size(0), - group, - output.size(1) / group, - output.size(2), - output.size(3)}); - - for (int b = 0; b < batch; b++) { - modulated_deformable_im2col_cuda( - input[b], - offset[b], - mask[b], - 1, - channels, - height, - width, - height_out, - width_out, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - deformable_group, - columns); - - // divide into group - weight = weight.view( - {group, - weight.size(0) / group, - weight.size(1), - weight.size(2), - weight.size(3)}); - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - - for (int g = 0; g < group; g++) { - output[b][g] = output[b][g] - .flatten(1) - .addmm_(weight[g].flatten(1), columns[g]) - .view_as(output[b][g]); - } - - weight = weight.view( - {weight.size(0) * weight.size(1), - weight.size(2), - weight.size(3), - weight.size(4)}); - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - } - - output = output.view( - {output.size(0), - output.size(1) * output.size(2), - output.size(3), - output.size(4)}); - - if (with_bias) { - output += bias.view({1, bias.size(0), 1, 1}); - } -} - -void modulated_deform_conv_cuda_backward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor columns, - at::Tensor grad_input, - at::Tensor grad_weight, - at::Tensor grad_bias, - at::Tensor grad_offset, - at::Tensor grad_mask, - at::Tensor grad_output, - int kernel_h, - int kernel_w, - int stride_h, - int stride_w, - int pad_h, - int pad_w, - int dilation_h, - int dilation_w, - int group, - int deformable_group, - const bool with_bias) { - shape_check( - input, - offset, - &grad_output, - weight, - kernel_h, - kernel_w, - stride_h, - stride_w, - pad_h, - pad_w, - dilation_h, - dilation_w, - group, - deformable_group); - - TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - - const int batch = input.size(0); - const int channels = input.size(1); - const int height = input.size(2); - const int width = input.size(3); - - const int channels_kernel = weight.size(1); - const int kernel_h_ = weight.size(2); - const int kernel_w_ = weight.size(3); - if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) - AT_ERROR( - "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", - kernel_h_, - kernel_w, - kernel_h_, - kernel_w_); - if (channels != channels_kernel * group) - AT_ERROR( - "Input shape and kernel channels wont match: (%d vs %d).", - channels, - channels_kernel * group); - - const int height_out = - (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; - const int width_out = - (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - - // mask shape check - TORCH_CHECK( - (mask.size(2) == height_out && mask.size(3) == width_out), - "invalid spatial size of mask, expected height: %d width: %d, but " - "got height: %d width: %d", - height_out, - width_out, - mask.size(2), - mask.size(3)); - - TORCH_CHECK( - (mask.size(1) == deformable_group * kernel_h * kernel_w), - "invalid number of channels of mask"); - - if (ones.ndimension() != 2 || - ones.size(0) * ones.size(1) < height_out * width_out) { - // Resize plane and fill with ones... - ones = at::ones({height_out, width_out}, input.options()); - } - - grad_input = grad_input.view({batch, channels, height, width}); - columns = at::zeros( - {channels * kernel_h * kernel_w, height_out * width_out}, - input.options()); - - grad_output = grad_output.view( - {grad_output.size(0), - group, - grad_output.size(1) / group, - grad_output.size(2), - grad_output.size(3)}); - - for (int b = 0; b < batch; b++) { - // divide int group - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - weight = weight.view( - {group, - weight.size(0) / group, - weight.size(1), - weight.size(2), - weight.size(3)}); - - for (int g = 0; g < group; g++) { - columns[g].addmm_( - weight[g].flatten(1).transpose(0, 1), - grad_output[b][g].flatten(1), - 0.0f, - 1.0f); - } - - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - weight = weight.view( - {weight.size(0) * weight.size(1), - weight.size(2), - weight.size(3), - weight.size(4)}); - - // gradient w.r.t. input coordinate data - modulated_deformable_col2im_coord_cuda( - columns, - input[b], - offset[b], - mask[b], - 1, - channels, - height, - width, - height_out, - width_out, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - deformable_group, - grad_offset[b], - grad_mask[b]); - // gradient w.r.t. input data - modulated_deformable_col2im_cuda( - columns, - offset[b], - mask[b], - 1, - channels, - height, - width, - height_out, - width_out, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - deformable_group, - grad_input[b]); - - // gradient w.r.t. weight, dWeight should accumulate across the batch and - // group - modulated_deformable_im2col_cuda( - input[b], - offset[b], - mask[b], - 1, - channels, - height, - width, - height_out, - width_out, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - deformable_group, - columns); - - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - grad_weight = grad_weight.view( - {group, - grad_weight.size(0) / group, - grad_weight.size(1), - grad_weight.size(2), - grad_weight.size(3)}); - if (with_bias) - grad_bias = grad_bias.view({group, grad_bias.size(0) / group}); - - for (int g = 0; g < group; g++) { - grad_weight[g] = - grad_weight[g] - .flatten(1) - .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1)) - .view_as(grad_weight[g]); - if (with_bias) { - grad_bias[g] = - grad_bias[g] - .view({-1, 1}) - .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1})) - .view(-1); - } - } - - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - grad_weight = grad_weight.view( - {grad_weight.size(0) * grad_weight.size(1), - grad_weight.size(2), - grad_weight.size(3), - grad_weight.size(4)}); - if (with_bias) - grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)}); - } - grad_output = grad_output.view( - {grad_output.size(0) * grad_output.size(1), - grad_output.size(2), - grad_output.size(3), - grad_output.size(4)}); -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu b/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu deleted file mode 100644 index f299c7add116685e9c87a187a85ea63f9f808867..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu +++ /dev/null @@ -1,1288 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. - -// modified from -// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu -// Original license: Apache 2.0 -// clang-format off - -// modify from -// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu - -/*! - ******************* BEGIN Caffe Copyright Notice and Disclaimer ***************** - * - * COPYRIGHT - * - * All contributions by the University of California: - * Copyright (c) 2014-2017 The Regents of the University of California (Regents) - * All rights reserved. - * - * All other contributions: - * Copyright (c) 2014-2017, the respective contributors - * All rights reserved. - * - * Caffe uses a shared copyright model: each contributor holds copyright over - * their contributions to Caffe. The project versioning records all such - * contribution and copyright details. If a contributor wants to further mark - * their specific copyright on a particular contribution, they should indicate - * their copyright solely in the commit message of the change when it is - * committed. - * - * LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE - *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * CONTRIBUTION AGREEMENT - * - * By contributing to the BVLC/caffe repository through pull-request, comment, - * or otherwise, the contributor releases their content to the - * license and copyright terms herein. - * - ***************** END Caffe Copyright Notice and Disclaimer ********************* - * - * Copyright (c) 2018 Microsoft - * Licensed under The MIT License [see LICENSE for details] - * \file modulated_deformable_im2col.cuh - * \brief Function definitions of converting an image to - * column matrix based on kernel, padding, dilation, and offset. - * These functions are mainly used in deformable convolution operators. - * \ref: https://arxiv.org/abs/1703.06211 - * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng - */ - -#include -#include -#include -#include -#include -#include - -using namespace at; - -#define CUDA_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ - i += blockDim.x * gridDim.x) - - -namespace { - -const int CUDA_NUM_THREADS = 1024; -const int kMaxGridNum = 65535; - -inline int GET_BLOCKS(const int N) { - return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS); -} - -} - -template -__device__ scalar_t deformable_im2col_bilinear( - const scalar_t* bottom_data, - const int data_width, - const int height, - const int width, - scalar_t h, - scalar_t w) { - int h_low = floor(h); - int w_low = floor(w); - int h_high = h_low + 1; - int w_high = w_low + 1; - - scalar_t lh = h - h_low; - scalar_t lw = w - w_low; - scalar_t hh = 1 - lh, hw = 1 - lw; - - scalar_t v1 = 0; - if (h_low >= 0 && w_low >= 0) - v1 = bottom_data[h_low * data_width + w_low]; - scalar_t v2 = 0; - if (h_low >= 0 && w_high <= width - 1) - v2 = bottom_data[h_low * data_width + w_high]; - scalar_t v3 = 0; - if (h_high <= height - 1 && w_low >= 0) - v3 = bottom_data[h_high * data_width + w_low]; - scalar_t v4 = 0; - if (h_high <= height - 1 && w_high <= width - 1) - v4 = bottom_data[h_high * data_width + w_high]; - - scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; - - scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - return val; -} - -template -__device__ scalar_t get_gradient_weight( - scalar_t argmax_h, - scalar_t argmax_w, - const int h, - const int w, - const int height, - const int width) { - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || - argmax_w >= width) { - // empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - if (h == argmax_h_low && w == argmax_w_low) - weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); - if (h == argmax_h_low && w == argmax_w_high) - weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); - if (h == argmax_h_high && w == argmax_w_low) - weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); - if (h == argmax_h_high && w == argmax_w_high) - weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); - return weight; -} - -template -__device__ scalar_t get_coordinate_weight( - scalar_t argmax_h, - scalar_t argmax_w, - const int height, - const int width, - const scalar_t* im_data, - const int data_width, - const int bp_dir) { - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || - argmax_w >= width) { - // empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - - if (bp_dir == 0) { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_w_low + 1 - argmax_w) * - im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += -1 * (argmax_w - argmax_w_low) * - im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += (argmax_w_low + 1 - argmax_w) * - im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_w - argmax_w_low) * - im_data[argmax_h_high * data_width + argmax_w_high]; - } else if (bp_dir == 1) { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_h_low + 1 - argmax_h) * - im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += (argmax_h_low + 1 - argmax_h) * - im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += -1 * (argmax_h - argmax_h_low) * - im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_h - argmax_h_low) * - im_data[argmax_h_high * data_width + argmax_w_high]; - } - - return weight; -} - -template -__global__ void deformable_im2col_gpu_kernel( - const int n, - const scalar_t* data_im, - const scalar_t* data_offset, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int num_channels, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* data_col) { - CUDA_KERNEL_LOOP(index, n) { - // index index of output matrix - const int w_col = index % width_col; - const int h_col = (index / width_col) % height_col; - const int b_col = (index / width_col / height_col) % batch_size; - const int c_im = (index / width_col / height_col) / batch_size; - const int c_col = c_im * kernel_h * kernel_w; - - // compute deformable group index - const int deformable_group_index = c_im / channel_per_deformable_group; - - const int h_in = h_col * stride_h - pad_h; - const int w_in = w_col * stride_w - pad_w; - scalar_t* data_col_ptr = data_col + - ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; - // const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * - // height + h_in) * width + w_in; - const scalar_t* data_im_ptr = - data_im + (b_col * num_channels + c_im) * height * width; - const scalar_t* data_offset_ptr = data_offset + - (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - - for (int i = 0; i < kernel_h; ++i) { - for (int j = 0; j < kernel_w; ++j) { - const int data_offset_h_ptr = - ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; - const int data_offset_w_ptr = - ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + - w_col; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - scalar_t val = static_cast(0); - const scalar_t h_im = h_in + i * dilation_h + offset_h; - const scalar_t w_im = w_in + j * dilation_w + offset_w; - if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { - // const scalar_t map_h = i * dilation_h + offset_h; - // const scalar_t map_w = j * dilation_w + offset_w; - // const int cur_height = height - h_in; - // const int cur_width = width - w_in; - // val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, - // cur_width, map_h, map_w); - val = deformable_im2col_bilinear( - data_im_ptr, width, height, width, h_im, w_im); - } - *data_col_ptr = val; - data_col_ptr += batch_size * height_col * width_col; - } - } - } -} - - -template -__global__ void deformable_col2im_gpu_kernel( - const int n, - const scalar_t* data_col, - const scalar_t* data_offset, - const int channels, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* grad_im) { - CUDA_KERNEL_LOOP(index, n) { - const int j = (index / width_col / height_col / batch_size) % kernel_w; - const int i = - (index / width_col / height_col / batch_size / kernel_w) % kernel_h; - const int c = - index / width_col / height_col / batch_size / kernel_w / kernel_h; - // compute the start and end of the output - - const int deformable_group_index = c / channel_per_deformable_group; - - int w_out = index % width_col; - int h_out = (index / width_col) % height_col; - int b = (index / width_col / height_col) % batch_size; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - - const scalar_t* data_offset_ptr = data_offset + - (b * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - const int data_offset_h_ptr = - ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; - const int data_offset_w_ptr = - ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; - const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; - - const scalar_t cur_top_grad = data_col[index]; - const int cur_h = (int)cur_inv_h_data; - const int cur_w = (int)cur_inv_w_data; - for (int dy = -2; dy <= 2; dy++) { - for (int dx = -2; dx <= 2; dx++) { - if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && - cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && - abs(cur_inv_w_data - (cur_w + dx)) < 1) { - int cur_bottom_grad_pos = - ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; - scalar_t weight = get_gradient_weight( - cur_inv_h_data, - cur_inv_w_data, - cur_h + dy, - cur_w + dx, - height, - width); - atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); - } - } - } - } -} - - -template -__global__ void deformable_col2im_coord_gpu_kernel( - const int n, - const scalar_t* data_col, - const scalar_t* data_im, - const scalar_t* data_offset, - const int channels, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int offset_channels, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* grad_offset) { - CUDA_KERNEL_LOOP(index, n) { - scalar_t val = 0; - int w = index % width_col; - int h = (index / width_col) % height_col; - int c = (index / width_col / height_col) % offset_channels; - int b = (index / width_col / height_col) / offset_channels; - // compute the start and end of the output - - const int deformable_group_index = c / (2 * kernel_h * kernel_w); - const int col_step = kernel_h * kernel_w; - int cnt = 0; - const scalar_t* data_col_ptr = data_col + - deformable_group_index * channel_per_deformable_group * batch_size * - width_col * height_col; - const scalar_t* data_im_ptr = data_im + - (b * deformable_group + deformable_group_index) * - channel_per_deformable_group / kernel_h / kernel_w * height * width; - const scalar_t* data_offset_ptr = data_offset + - (b * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - - const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; - - for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; - col_c += col_step) { - const int col_pos = - (((col_c * batch_size + b) * height_col) + h) * width_col + w; - const int bp_dir = offset_c % 2; - - int j = (col_pos / width_col / height_col / batch_size) % kernel_w; - int i = - (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; - int w_out = col_pos % width_col; - int h_out = (col_pos / width_col) % height_col; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - const int data_offset_h_ptr = - (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); - const int data_offset_w_ptr = - (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + - w_out); - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - scalar_t inv_h = h_in + i * dilation_h + offset_h; - scalar_t inv_w = w_in + j * dilation_w + offset_w; - if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { - inv_h = inv_w = -2; - } - const scalar_t weight = get_coordinate_weight( - inv_h, - inv_w, - height, - width, - data_im_ptr + cnt * height * width, - width, - bp_dir); - val += weight * data_col_ptr[col_pos]; - cnt += 1; - } - - grad_offset[index] = val; - } -} - - -namespace detectron2 { - -void deformable_im2col( - const at::Tensor data_im, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor data_col) { - // num_axes should be smaller than block size - // todo: check parallel_imgs is correctly passed in - int height_col = - (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; - int width_col = - (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; - int num_kernels = channels * height_col * width_col * parallel_imgs; - int channel_per_deformable_group = channels / deformable_group; - - at::cuda::CUDAGuard device_guard(data_im.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_im.scalar_type(), "deformable_im2col_gpu", ([&] { - const scalar_t* data_im_ = data_im.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - scalar_t* data_col_ = data_col.data_ptr(); - - deformable_im2col_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_im_, - data_offset_, - height, - width, - ksize_h, - ksize_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - parallel_imgs, - channels, - deformable_group, - height_col, - width_col, - data_col_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf("error in deformable_im2col: %s\n", cudaGetErrorString(err)); - } -} - - -void deformable_col2im( - const at::Tensor data_col, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor grad_im) { - // todo: make sure parallel_imgs is passed in correctly - int height_col = - (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; - int width_col = - (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; - int num_kernels = - channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; - int channel_per_deformable_group = channels / deformable_group; - - at::cuda::CUDAGuard device_guard(data_col.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "deformable_col2im_gpu", ([&] { - const scalar_t* data_col_ = data_col.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - scalar_t* grad_im_ = grad_im.data_ptr(); - - deformable_col2im_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_col_, - data_offset_, - channels, - height, - width, - ksize_h, - ksize_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - parallel_imgs, - deformable_group, - height_col, - width_col, - grad_im_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf("error in deformable_col2im: %s\n", cudaGetErrorString(err)); - } -} - - -void deformable_col2im_coord( - const at::Tensor data_col, - const at::Tensor data_im, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor grad_offset) { - int height_col = - (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; - int width_col = - (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; - int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * - deformable_group * parallel_imgs; - int channel_per_deformable_group = - channels * ksize_h * ksize_w / deformable_group; - - at::cuda::CUDAGuard device_guard(data_col.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { - const scalar_t* data_col_ = data_col.data_ptr(); - const scalar_t* data_im_ = data_im.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - scalar_t* grad_offset_ = grad_offset.data_ptr(); - - deformable_col2im_coord_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_col_, - data_im_, - data_offset_, - channels, - height, - width, - ksize_h, - ksize_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - parallel_imgs, - 2 * ksize_h * ksize_w * deformable_group, - deformable_group, - height_col, - width_col, - grad_offset_); - })); -} - -} // namespace detectron2 - - -template -__device__ scalar_t dmcn_im2col_bilinear( - const scalar_t* bottom_data, - const int data_width, - const int height, - const int width, - scalar_t h, - scalar_t w) { - int h_low = floor(h); - int w_low = floor(w); - int h_high = h_low + 1; - int w_high = w_low + 1; - - scalar_t lh = h - h_low; - scalar_t lw = w - w_low; - scalar_t hh = 1 - lh, hw = 1 - lw; - - scalar_t v1 = 0; - if (h_low >= 0 && w_low >= 0) - v1 = bottom_data[h_low * data_width + w_low]; - scalar_t v2 = 0; - if (h_low >= 0 && w_high <= width - 1) - v2 = bottom_data[h_low * data_width + w_high]; - scalar_t v3 = 0; - if (h_high <= height - 1 && w_low >= 0) - v3 = bottom_data[h_high * data_width + w_low]; - scalar_t v4 = 0; - if (h_high <= height - 1 && w_high <= width - 1) - v4 = bottom_data[h_high * data_width + w_high]; - - scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; - - scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - return val; -} - -template -__device__ scalar_t dmcn_get_gradient_weight( - scalar_t argmax_h, - scalar_t argmax_w, - const int h, - const int w, - const int height, - const int width) { - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || - argmax_w >= width) { - // empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - if (h == argmax_h_low && w == argmax_w_low) - weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); - if (h == argmax_h_low && w == argmax_w_high) - weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); - if (h == argmax_h_high && w == argmax_w_low) - weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); - if (h == argmax_h_high && w == argmax_w_high) - weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); - return weight; -} - -template -__device__ scalar_t dmcn_get_coordinate_weight( - scalar_t argmax_h, - scalar_t argmax_w, - const int height, - const int width, - const scalar_t* im_data, - const int data_width, - const int bp_dir) { - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || - argmax_w >= width) { - // empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - - if (bp_dir == 0) { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_w_low + 1 - argmax_w) * - im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += -1 * (argmax_w - argmax_w_low) * - im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += (argmax_w_low + 1 - argmax_w) * - im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_w - argmax_w_low) * - im_data[argmax_h_high * data_width + argmax_w_high]; - } else if (bp_dir == 1) { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_h_low + 1 - argmax_h) * - im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += (argmax_h_low + 1 - argmax_h) * - im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += -1 * (argmax_h - argmax_h_low) * - im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_h - argmax_h_low) * - im_data[argmax_h_high * data_width + argmax_w_high]; - } - - return weight; -} - -template -__global__ void modulated_deformable_im2col_gpu_kernel( - const int n, - const scalar_t* data_im, - const scalar_t* data_offset, - const scalar_t* data_mask, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int num_channels, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* data_col) { - CUDA_KERNEL_LOOP(index, n) { - // index index of output matrix - const int w_col = index % width_col; - const int h_col = (index / width_col) % height_col; - const int b_col = (index / width_col / height_col) % batch_size; - const int c_im = (index / width_col / height_col) / batch_size; - const int c_col = c_im * kernel_h * kernel_w; - - // compute deformable group index - const int deformable_group_index = c_im / channel_per_deformable_group; - - const int h_in = h_col * stride_h - pad_h; - const int w_in = w_col * stride_w - pad_w; - - scalar_t* data_col_ptr = data_col + - ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; - // const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * - // height + h_in) * width + w_in; - const scalar_t* data_im_ptr = - data_im + (b_col * num_channels + c_im) * height * width; - const scalar_t* data_offset_ptr = data_offset + - (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - - const scalar_t* data_mask_ptr = data_mask + - (b_col * deformable_group + deformable_group_index) * kernel_h * - kernel_w * height_col * width_col; - - for (int i = 0; i < kernel_h; ++i) { - for (int j = 0; j < kernel_w; ++j) { - const int data_offset_h_ptr = - ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; - const int data_offset_w_ptr = - ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + - w_col; - const int data_mask_hw_ptr = - ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; - scalar_t val = static_cast(0); - const scalar_t h_im = h_in + i * dilation_h + offset_h; - const scalar_t w_im = w_in + j * dilation_w + offset_w; - // if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) { - if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { - // const float map_h = i * dilation_h + offset_h; - // const float map_w = j * dilation_w + offset_w; - // const int cur_height = height - h_in; - // const int cur_width = width - w_in; - // val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, - // cur_width, map_h, map_w); - val = dmcn_im2col_bilinear( - data_im_ptr, width, height, width, h_im, w_im); - } - *data_col_ptr = val * mask; - data_col_ptr += batch_size * height_col * width_col; - // data_col_ptr += height_col * width_col; - } - } - } -} - -template -__global__ void modulated_deformable_col2im_gpu_kernel( - const int n, - const scalar_t* data_col, - const scalar_t* data_offset, - const scalar_t* data_mask, - const int channels, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* grad_im) { - CUDA_KERNEL_LOOP(index, n) { - const int j = (index / width_col / height_col / batch_size) % kernel_w; - const int i = - (index / width_col / height_col / batch_size / kernel_w) % kernel_h; - const int c = - index / width_col / height_col / batch_size / kernel_w / kernel_h; - // compute the start and end of the output - - const int deformable_group_index = c / channel_per_deformable_group; - - int w_out = index % width_col; - int h_out = (index / width_col) % height_col; - int b = (index / width_col / height_col) % batch_size; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - - const scalar_t* data_offset_ptr = data_offset + - (b * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - const scalar_t* data_mask_ptr = data_mask + - (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * - height_col * width_col; - const int data_offset_h_ptr = - ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; - const int data_offset_w_ptr = - ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; - const int data_mask_hw_ptr = - ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; - const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; - const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; - - const scalar_t cur_top_grad = data_col[index] * mask; - const int cur_h = (int)cur_inv_h_data; - const int cur_w = (int)cur_inv_w_data; - for (int dy = -2; dy <= 2; dy++) { - for (int dx = -2; dx <= 2; dx++) { - if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && - cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && - abs(cur_inv_w_data - (cur_w + dx)) < 1) { - int cur_bottom_grad_pos = - ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; - scalar_t weight = dmcn_get_gradient_weight( - cur_inv_h_data, - cur_inv_w_data, - cur_h + dy, - cur_w + dx, - height, - width); - atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); - } - } - } - } -} - -template -__global__ void modulated_deformable_col2im_coord_gpu_kernel( - const int n, - const scalar_t* data_col, - const scalar_t* data_im, - const scalar_t* data_offset, - const scalar_t* data_mask, - const int channels, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int offset_channels, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* grad_offset, - scalar_t* grad_mask) { - CUDA_KERNEL_LOOP(index, n) { - scalar_t val = 0, mval = 0; - int w = index % width_col; - int h = (index / width_col) % height_col; - int c = (index / width_col / height_col) % offset_channels; - int b = (index / width_col / height_col) / offset_channels; - // compute the start and end of the output - - const int deformable_group_index = c / (2 * kernel_h * kernel_w); - const int col_step = kernel_h * kernel_w; - int cnt = 0; - const scalar_t* data_col_ptr = data_col + - deformable_group_index * channel_per_deformable_group * batch_size * - width_col * height_col; - const scalar_t* data_im_ptr = data_im + - (b * deformable_group + deformable_group_index) * - channel_per_deformable_group / kernel_h / kernel_w * height * width; - const scalar_t* data_offset_ptr = data_offset + - (b * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - const scalar_t* data_mask_ptr = data_mask + - (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * - height_col * width_col; - - const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; - - for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; - col_c += col_step) { - const int col_pos = - (((col_c * batch_size + b) * height_col) + h) * width_col + w; - const int bp_dir = offset_c % 2; - - int j = (col_pos / width_col / height_col / batch_size) % kernel_w; - int i = - (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; - int w_out = col_pos % width_col; - int h_out = (col_pos / width_col) % height_col; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - const int data_offset_h_ptr = - (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); - const int data_offset_w_ptr = - (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + - w_out); - const int data_mask_hw_ptr = - (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; - scalar_t inv_h = h_in + i * dilation_h + offset_h; - scalar_t inv_w = w_in + j * dilation_w + offset_w; - if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { - inv_h = inv_w = -2; - } else { - mval += data_col_ptr[col_pos] * - dmcn_im2col_bilinear( - data_im_ptr + cnt * height * width, - width, - height, - width, - inv_h, - inv_w); - } - const scalar_t weight = dmcn_get_coordinate_weight( - inv_h, - inv_w, - height, - width, - data_im_ptr + cnt * height * width, - width, - bp_dir); - val += weight * data_col_ptr[col_pos] * mask; - cnt += 1; - } - // KERNEL_ASSIGN(grad_offset[index], offset_req, val); - grad_offset[index] = val; - if (offset_c % 2 == 0) - // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + - // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * - // height_col + h) * width_col + w], mask_req, mval); - grad_mask - [(((b * deformable_group + deformable_group_index) * kernel_h * - kernel_w + - offset_c / 2) * - height_col + - h) * - width_col + - w] = mval; - } -} - - -namespace detectron2 { - -void modulated_deformable_im2col_cuda( - const at::Tensor data_im, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kenerl_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor data_col) { - // num_axes should be smaller than block size - const int channel_per_deformable_group = channels / deformable_group; - const int num_kernels = channels * batch_size * height_col * width_col; - - at::cuda::CUDAGuard device_guard(data_im.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { - const scalar_t* data_im_ = data_im.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - const scalar_t* data_mask_ = data_mask.data_ptr(); - scalar_t* data_col_ = data_col.data_ptr(); - - modulated_deformable_im2col_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_im_, - data_offset_, - data_mask_, - height_im, - width_im, - kernel_h, - kenerl_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - batch_size, - channels, - deformable_group, - height_col, - width_col, - data_col_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf( - "error in modulated_deformable_im2col_cuda: %s\n", - cudaGetErrorString(err)); - } -} - -void modulated_deformable_col2im_cuda( - const at::Tensor data_col, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor grad_im) { - const int channel_per_deformable_group = channels / deformable_group; - const int num_kernels = - channels * kernel_h * kernel_w * batch_size * height_col * width_col; - - at::cuda::CUDAGuard device_guard(data_col.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { - const scalar_t* data_col_ = data_col.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - const scalar_t* data_mask_ = data_mask.data_ptr(); - scalar_t* grad_im_ = grad_im.data_ptr(); - - modulated_deformable_col2im_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_col_, - data_offset_, - data_mask_, - channels, - height_im, - width_im, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - batch_size, - deformable_group, - height_col, - width_col, - grad_im_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf( - "error in modulated_deformable_col2im_cuda: %s\n", - cudaGetErrorString(err)); - } -} - -void modulated_deformable_col2im_coord_cuda( - const at::Tensor data_col, - const at::Tensor data_im, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor grad_offset, - at::Tensor grad_mask) { - const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * - kernel_w * deformable_group; - const int channel_per_deformable_group = - channels * kernel_h * kernel_w / deformable_group; - - at::cuda::CUDAGuard device_guard(data_col.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { - const scalar_t* data_col_ = data_col.data_ptr(); - const scalar_t* data_im_ = data_im.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - const scalar_t* data_mask_ = data_mask.data_ptr(); - scalar_t* grad_offset_ = grad_offset.data_ptr(); - scalar_t* grad_mask_ = grad_mask.data_ptr(); - - modulated_deformable_col2im_coord_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_col_, - data_im_, - data_offset_, - data_mask_, - channels, - height_im, - width_im, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - batch_size, - 2 * kernel_h * kernel_w * deformable_group, - deformable_group, - height_col, - width_col, - grad_offset_, - grad_mask_); - })); - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf( - "error in modulated_deformable_col2im_coord_cuda: %s\n", - cudaGetErrorString(err)); - } -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h b/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h deleted file mode 100644 index 12aca388e47b12dafd20999f2991a9d42f4b904b..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#pragma once -#include - -namespace detectron2 { - -at::Tensor nms_rotated_cpu( - const at::Tensor& dets, - const at::Tensor& scores, - const double iou_threshold); - -#if defined(WITH_CUDA) || defined(WITH_HIP) -at::Tensor nms_rotated_cuda( - const at::Tensor& dets, - const at::Tensor& scores, - const double iou_threshold); -#endif - -// Interface for Python -// inline is needed to prevent multiple function definitions when this header is -// included by different cpps -inline at::Tensor nms_rotated( - const at::Tensor& dets, - const at::Tensor& scores, - const double iou_threshold) { - assert(dets.device().is_cuda() == scores.device().is_cuda()); - if (dets.device().is_cuda()) { -#if defined(WITH_CUDA) || defined(WITH_HIP) - return nms_rotated_cuda( - dets.contiguous(), scores.contiguous(), iou_threshold); -#else - AT_ERROR("Detectron2 is not compiled with GPU support!"); -#endif - } - - return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold); -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp b/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp deleted file mode 100644 index d7556e645b604aa83d86cc702b783fd8ecedffcc..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#include "../box_iou_rotated/box_iou_rotated_utils.h" -#include "nms_rotated.h" - -namespace detectron2 { - -template -at::Tensor nms_rotated_cpu_kernel( - const at::Tensor& dets, - const at::Tensor& scores, - const double iou_threshold) { - // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel, - // however, the code in this function is much shorter because - // we delegate the IoU computation for rotated boxes to - // the single_box_iou_rotated function in box_iou_rotated_utils.h - AT_ASSERTM(dets.device().is_cpu(), "dets must be a CPU tensor"); - AT_ASSERTM(scores.device().is_cpu(), "scores must be a CPU tensor"); - AT_ASSERTM( - dets.scalar_type() == scores.scalar_type(), - "dets should have the same type as scores"); - - if (dets.numel() == 0) { - return at::empty({0}, dets.options().dtype(at::kLong)); - } - - auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); - - auto ndets = dets.size(0); - at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); - at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); - - auto suppressed = suppressed_t.data_ptr(); - auto keep = keep_t.data_ptr(); - auto order = order_t.data_ptr(); - - int64_t num_to_keep = 0; - - for (int64_t _i = 0; _i < ndets; _i++) { - auto i = order[_i]; - if (suppressed[i] == 1) { - continue; - } - - keep[num_to_keep++] = i; - - for (int64_t _j = _i + 1; _j < ndets; _j++) { - auto j = order[_j]; - if (suppressed[j] == 1) { - continue; - } - - auto ovr = single_box_iou_rotated( - dets[i].data_ptr(), dets[j].data_ptr()); - if (ovr >= iou_threshold) { - suppressed[j] = 1; - } - } - } - return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); -} - -at::Tensor nms_rotated_cpu( - // input must be contiguous - const at::Tensor& dets, - const at::Tensor& scores, - const double iou_threshold) { - auto result = at::empty({0}, dets.options()); - - AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] { - result = nms_rotated_cpu_kernel(dets, scores, iou_threshold); - }); - return result; -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu b/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu deleted file mode 100644 index 2a3db5c62e7a2da52ccf5bac980653c943d630fd..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#include -#include -#include -#include -#ifdef WITH_CUDA -#include "../box_iou_rotated/box_iou_rotated_utils.h" -#endif -// TODO avoid this when pytorch supports "same directory" hipification -#ifdef WITH_HIP -#include "box_iou_rotated/box_iou_rotated_utils.h" -#endif - -using namespace detectron2; - -namespace { -int const threadsPerBlock = sizeof(unsigned long long) * 8; -} - -template -__global__ void nms_rotated_cuda_kernel( - const int n_boxes, - const double iou_threshold, - const T* dev_boxes, - unsigned long long* dev_mask) { - // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel - - const int row_start = blockIdx.y; - const int col_start = blockIdx.x; - - // if (row_start > col_start) return; - - const int row_size = - min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); - const int col_size = - min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); - - // Compared to nms_cuda_kernel, where each box is represented with 4 values - // (x1, y1, x2, y2), each rotated box is represented with 5 values - // (x_center, y_center, width, height, angle_degrees) here. - __shared__ T block_boxes[threadsPerBlock * 5]; - if (threadIdx.x < col_size) { - block_boxes[threadIdx.x * 5 + 0] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; - block_boxes[threadIdx.x * 5 + 1] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; - block_boxes[threadIdx.x * 5 + 2] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; - block_boxes[threadIdx.x * 5 + 3] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; - block_boxes[threadIdx.x * 5 + 4] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; - } - __syncthreads(); - - if (threadIdx.x < row_size) { - const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; - const T* cur_box = dev_boxes + cur_box_idx * 5; - int i = 0; - unsigned long long t = 0; - int start = 0; - if (row_start == col_start) { - start = threadIdx.x + 1; - } - for (i = start; i < col_size; i++) { - // Instead of devIoU used by original horizontal nms, here - // we use the single_box_iou_rotated function from box_iou_rotated_utils.h - if (single_box_iou_rotated(cur_box, block_boxes + i * 5) > - iou_threshold) { - t |= 1ULL << i; - } - } - const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock); - dev_mask[cur_box_idx * col_blocks + col_start] = t; - } -} - -namespace detectron2 { - -at::Tensor nms_rotated_cuda( - // input must be contiguous - const at::Tensor& dets, - const at::Tensor& scores, - double iou_threshold) { - // using scalar_t = float; - AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor"); - AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor"); - at::cuda::CUDAGuard device_guard(dets.device()); - - auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); - auto dets_sorted = dets.index_select(0, order_t); - - auto dets_num = dets.size(0); - - const int col_blocks = - at::cuda::ATenCeilDiv(static_cast(dets_num), threadsPerBlock); - - at::Tensor mask = - at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong)); - - dim3 blocks(col_blocks, col_blocks); - dim3 threads(threadsPerBlock); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES( - dets_sorted.scalar_type(), "nms_rotated_kernel_cuda", [&] { - nms_rotated_cuda_kernel<<>>( - dets_num, - iou_threshold, - dets_sorted.data_ptr(), - (unsigned long long*)mask.data_ptr()); - }); - - at::Tensor mask_cpu = mask.to(at::kCPU); - unsigned long long* mask_host = - (unsigned long long*)mask_cpu.data_ptr(); - - std::vector remv(col_blocks); - memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); - - at::Tensor keep = - at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU)); - int64_t* keep_out = keep.data_ptr(); - - int num_to_keep = 0; - for (int i = 0; i < dets_num; i++) { - int nblock = i / threadsPerBlock; - int inblock = i % threadsPerBlock; - - if (!(remv[nblock] & (1ULL << inblock))) { - keep_out[num_to_keep++] = i; - unsigned long long* p = mask_host + i * col_blocks; - for (int j = nblock; j < col_blocks; j++) { - remv[j] |= p[j]; - } - } - } - - AT_CUDA_CHECK(cudaGetLastError()); - return order_t.index( - {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep) - .to(order_t.device(), keep.scalar_type())}); -} - -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/csrc/vision.cpp b/detectron2/detectron2/layers/csrc/vision.cpp deleted file mode 100644 index c9a2cd4f20e6f58be1c5783d67c64232dd59b560..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/csrc/vision.cpp +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. - -#include -#include "ROIAlignRotated/ROIAlignRotated.h" -#include "box_iou_rotated/box_iou_rotated.h" -#include "cocoeval/cocoeval.h" -#include "deformable/deform_conv.h" -#include "nms_rotated/nms_rotated.h" - -namespace detectron2 { - -#if defined(WITH_CUDA) || defined(WITH_HIP) -extern int get_cudart_version(); -#endif - -std::string get_cuda_version() { -#if defined(WITH_CUDA) || defined(WITH_HIP) - std::ostringstream oss; - -#if defined(WITH_CUDA) - oss << "CUDA "; -#else - oss << "HIP "; -#endif - - // copied from - // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 - auto printCudaStyleVersion = [&](int v) { - oss << (v / 1000) << "." << (v / 10 % 100); - if (v % 10 != 0) { - oss << "." << (v % 10); - } - }; - printCudaStyleVersion(get_cudart_version()); - return oss.str(); -#else // neither CUDA nor HIP - return std::string("not available"); -#endif -} - -bool has_cuda() { -#if defined(WITH_CUDA) - return true; -#else - return false; -#endif -} - -// similar to -// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp -std::string get_compiler_version() { - std::ostringstream ss; -#if defined(__GNUC__) -#ifndef __clang__ - -#if ((__GNUC__ <= 4) && (__GNUC_MINOR__ <= 8)) -#error "GCC >= 4.9 is required!" -#endif - - { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } -#endif -#endif - -#if defined(__clang_major__) - { - ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." - << __clang_patchlevel__; - } -#endif - -#if defined(_MSC_VER) - { ss << "MSVC " << _MSC_FULL_VER; } -#endif - return ss.str(); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); - m.def("get_cuda_version", &get_cuda_version, "get_cuda_version"); - m.def("has_cuda", &has_cuda, "has_cuda"); - - m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); - m.def( - "deform_conv_backward_input", - &deform_conv_backward_input, - "deform_conv_backward_input"); - m.def( - "deform_conv_backward_filter", - &deform_conv_backward_filter, - "deform_conv_backward_filter"); - m.def( - "modulated_deform_conv_forward", - &modulated_deform_conv_forward, - "modulated_deform_conv_forward"); - m.def( - "modulated_deform_conv_backward", - &modulated_deform_conv_backward, - "modulated_deform_conv_backward"); - - m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate"); - m.def( - "COCOevalEvaluateImages", - &COCOeval::EvaluateImages, - "COCOeval::EvaluateImages"); - pybind11::class_(m, "InstanceAnnotation") - .def(pybind11::init()); - pybind11::class_(m, "ImageEvaluation") - .def(pybind11::init<>()); -} - -TORCH_LIBRARY(detectron2, m) { - m.def("nms_rotated", &nms_rotated); - m.def("box_iou_rotated", &box_iou_rotated); - m.def("roi_align_rotated_forward", &ROIAlignRotated_forward); - m.def("roi_align_rotated_backward", &ROIAlignRotated_backward); -} -} // namespace detectron2 diff --git a/detectron2/detectron2/layers/deform_conv.py b/detectron2/detectron2/layers/deform_conv.py deleted file mode 100644 index dffb720c2a8d10d9273752dbdd291a3714f91338..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/deform_conv.py +++ /dev/null @@ -1,514 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import math -from functools import lru_cache -import torch -from torch import nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair -from torchvision.ops import deform_conv2d - -from detectron2.utils.develop import create_dummy_class, create_dummy_func - -from .wrappers import _NewEmptyTensorOp - - -class _DeformConv(Function): - @staticmethod - def forward( - ctx, - input, - offset, - weight, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - im2col_step=64, - ): - if input is not None and input.dim() != 4: - raise ValueError( - "Expected 4D tensor as input, got {}D tensor instead.".format(input.dim()) - ) - ctx.stride = _pair(stride) - ctx.padding = _pair(padding) - ctx.dilation = _pair(dilation) - ctx.groups = groups - ctx.deformable_groups = deformable_groups - ctx.im2col_step = im2col_step - - ctx.save_for_backward(input, offset, weight) - - output = input.new_empty( - _DeformConv._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride) - ) - - ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones - - if not input.is_cuda: - # TODO: let torchvision support full features of our deformconv. - if deformable_groups != 1: - raise NotImplementedError( - "Deformable Conv with deformable_groups != 1 is not supported on CPUs!" - ) - return deform_conv2d( - input, offset, weight, stride=stride, padding=padding, dilation=dilation - ) - else: - cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step) - assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize" - - _C.deform_conv_forward( - input, - weight, - offset, - output, - ctx.bufs_[0], - ctx.bufs_[1], - weight.size(3), - weight.size(2), - ctx.stride[1], - ctx.stride[0], - ctx.padding[1], - ctx.padding[0], - ctx.dilation[1], - ctx.dilation[0], - ctx.groups, - ctx.deformable_groups, - cur_im2col_step, - ) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - input, offset, weight = ctx.saved_tensors - - grad_input = grad_offset = grad_weight = None - - if not grad_output.is_cuda: - raise NotImplementedError("Deformable Conv is not supported on CPUs!") - else: - cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step) - assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize" - - if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: - grad_input = torch.zeros_like(input) - grad_offset = torch.zeros_like(offset) - _C.deform_conv_backward_input( - input, - offset, - grad_output, - grad_input, - grad_offset, - weight, - ctx.bufs_[0], - weight.size(3), - weight.size(2), - ctx.stride[1], - ctx.stride[0], - ctx.padding[1], - ctx.padding[0], - ctx.dilation[1], - ctx.dilation[0], - ctx.groups, - ctx.deformable_groups, - cur_im2col_step, - ) - - if ctx.needs_input_grad[2]: - grad_weight = torch.zeros_like(weight) - _C.deform_conv_backward_filter( - input, - offset, - grad_output, - grad_weight, - ctx.bufs_[0], - ctx.bufs_[1], - weight.size(3), - weight.size(2), - ctx.stride[1], - ctx.stride[0], - ctx.padding[1], - ctx.padding[0], - ctx.dilation[1], - ctx.dilation[0], - ctx.groups, - ctx.deformable_groups, - 1, - cur_im2col_step, - ) - - return grad_input, grad_offset, grad_weight, None, None, None, None, None, None - - @staticmethod - def _output_size(input, weight, padding, dilation, stride): - channels = weight.size(0) - output_size = (input.size(0), channels) - for d in range(input.dim() - 2): - in_size = input.size(d + 2) - pad = padding[d] - kernel = dilation[d] * (weight.size(d + 2) - 1) + 1 - stride_ = stride[d] - output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,) - if not all(map(lambda s: s > 0, output_size)): - raise ValueError( - "convolution input is too small (output would be {})".format( - "x".join(map(str, output_size)) - ) - ) - return output_size - - @staticmethod - @lru_cache(maxsize=128) - def _cal_im2col_step(input_size, default_size): - """ - Calculate proper im2col step size, which should be divisible by input_size and not larger - than prefer_size. Meanwhile the step size should be as large as possible to be more - efficient. So we choose the largest one among all divisors of input_size which are smaller - than prefer_size. - :param input_size: input batch size . - :param default_size: default preferred im2col step size. - :return: the largest proper step size. - """ - if input_size <= default_size: - return input_size - best_step = 1 - for step in range(2, min(int(math.sqrt(input_size)) + 1, default_size)): - if input_size % step == 0: - if input_size // step <= default_size: - return input_size // step - best_step = step - - return best_step - - -class _ModulatedDeformConv(Function): - @staticmethod - def forward( - ctx, - input, - offset, - mask, - weight, - bias=None, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - ): - ctx.stride = stride - ctx.padding = padding - ctx.dilation = dilation - ctx.groups = groups - ctx.deformable_groups = deformable_groups - ctx.with_bias = bias is not None - if not ctx.with_bias: - bias = input.new_empty(1) # fake tensor - if not input.is_cuda: - raise NotImplementedError("Deformable Conv is not supported on CPUs!") - if ( - weight.requires_grad - or mask.requires_grad - or offset.requires_grad - or input.requires_grad - ): - ctx.save_for_backward(input, offset, mask, weight, bias) - output = input.new_empty(_ModulatedDeformConv._infer_shape(ctx, input, weight)) - ctx._bufs = [input.new_empty(0), input.new_empty(0)] - _C.modulated_deform_conv_forward( - input, - weight, - bias, - ctx._bufs[0], - offset, - mask, - output, - ctx._bufs[1], - weight.shape[2], - weight.shape[3], - ctx.stride, - ctx.stride, - ctx.padding, - ctx.padding, - ctx.dilation, - ctx.dilation, - ctx.groups, - ctx.deformable_groups, - ctx.with_bias, - ) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - if not grad_output.is_cuda: - raise NotImplementedError("Deformable Conv is not supported on CPUs!") - input, offset, mask, weight, bias = ctx.saved_tensors - grad_input = torch.zeros_like(input) - grad_offset = torch.zeros_like(offset) - grad_mask = torch.zeros_like(mask) - grad_weight = torch.zeros_like(weight) - grad_bias = torch.zeros_like(bias) - _C.modulated_deform_conv_backward( - input, - weight, - bias, - ctx._bufs[0], - offset, - mask, - ctx._bufs[1], - grad_input, - grad_weight, - grad_bias, - grad_offset, - grad_mask, - grad_output, - weight.shape[2], - weight.shape[3], - ctx.stride, - ctx.stride, - ctx.padding, - ctx.padding, - ctx.dilation, - ctx.dilation, - ctx.groups, - ctx.deformable_groups, - ctx.with_bias, - ) - if not ctx.with_bias: - grad_bias = None - - return ( - grad_input, - grad_offset, - grad_mask, - grad_weight, - grad_bias, - None, - None, - None, - None, - None, - ) - - @staticmethod - def _infer_shape(ctx, input, weight): - n = input.size(0) - channels_out = weight.size(0) - height, width = input.shape[2:4] - kernel_h, kernel_w = weight.shape[2:4] - height_out = ( - height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1) - ) // ctx.stride + 1 - width_out = ( - width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1) - ) // ctx.stride + 1 - return n, channels_out, height_out, width_out - - -deform_conv = _DeformConv.apply -modulated_deform_conv = _ModulatedDeformConv.apply - - -class DeformConv(nn.Module): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - bias=False, - norm=None, - activation=None, - ): - """ - Deformable convolution from :paper:`deformconv`. - - Arguments are similar to :class:`Conv2D`. Extra arguments: - - Args: - deformable_groups (int): number of groups used in deformable convolution. - norm (nn.Module, optional): a normalization layer - activation (callable(Tensor) -> Tensor): a callable activation function - """ - super(DeformConv, self).__init__() - - assert not bias - assert in_channels % groups == 0, "in_channels {} cannot be divisible by groups {}".format( - in_channels, groups - ) - assert ( - out_channels % groups == 0 - ), "out_channels {} cannot be divisible by groups {}".format(out_channels, groups) - - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.stride = _pair(stride) - self.padding = _pair(padding) - self.dilation = _pair(dilation) - self.groups = groups - self.deformable_groups = deformable_groups - self.norm = norm - self.activation = activation - - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size) - ) - self.bias = None - - nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") - - def forward(self, x, offset): - if x.numel() == 0: - # When input is empty, we want to return a empty tensor with "correct" shape, - # So that the following operations will not panic - # if they check for the shape of the tensor. - # This computes the height and width of the output tensor - output_shape = [ - (i + 2 * p - (di * (k - 1) + 1)) // s + 1 - for i, p, di, k, s in zip( - x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride - ) - ] - output_shape = [x.shape[0], self.weight.shape[0]] + output_shape - return _NewEmptyTensorOp.apply(x, output_shape) - - x = deform_conv( - x, - offset, - self.weight, - self.stride, - self.padding, - self.dilation, - self.groups, - self.deformable_groups, - ) - if self.norm is not None: - x = self.norm(x) - if self.activation is not None: - x = self.activation(x) - return x - - def extra_repr(self): - tmpstr = "in_channels=" + str(self.in_channels) - tmpstr += ", out_channels=" + str(self.out_channels) - tmpstr += ", kernel_size=" + str(self.kernel_size) - tmpstr += ", stride=" + str(self.stride) - tmpstr += ", padding=" + str(self.padding) - tmpstr += ", dilation=" + str(self.dilation) - tmpstr += ", groups=" + str(self.groups) - tmpstr += ", deformable_groups=" + str(self.deformable_groups) - tmpstr += ", bias=False" - return tmpstr - - -class ModulatedDeformConv(nn.Module): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - bias=True, - norm=None, - activation=None, - ): - """ - Modulated deformable convolution from :paper:`deformconv2`. - - Arguments are similar to :class:`Conv2D`. Extra arguments: - - Args: - deformable_groups (int): number of groups used in deformable convolution. - norm (nn.Module, optional): a normalization layer - activation (callable(Tensor) -> Tensor): a callable activation function - """ - super(ModulatedDeformConv, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.stride = stride - self.padding = padding - self.dilation = dilation - self.groups = groups - self.deformable_groups = deformable_groups - self.with_bias = bias - self.norm = norm - self.activation = activation - - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) - ) - if bias: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.bias = None - - nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") - if self.bias is not None: - nn.init.constant_(self.bias, 0) - - def forward(self, x, offset, mask): - if x.numel() == 0: - output_shape = [ - (i + 2 * p - (di * (k - 1) + 1)) // s + 1 - for i, p, di, k, s in zip( - x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride - ) - ] - output_shape = [x.shape[0], self.weight.shape[0]] + output_shape - return _NewEmptyTensorOp.apply(x, output_shape) - - x = modulated_deform_conv( - x, - offset, - mask, - self.weight, - self.bias, - self.stride, - self.padding, - self.dilation, - self.groups, - self.deformable_groups, - ) - if self.norm is not None: - x = self.norm(x) - if self.activation is not None: - x = self.activation(x) - return x - - def extra_repr(self): - tmpstr = "in_channels=" + str(self.in_channels) - tmpstr += ", out_channels=" + str(self.out_channels) - tmpstr += ", kernel_size=" + str(self.kernel_size) - tmpstr += ", stride=" + str(self.stride) - tmpstr += ", padding=" + str(self.padding) - tmpstr += ", dilation=" + str(self.dilation) - tmpstr += ", groups=" + str(self.groups) - tmpstr += ", deformable_groups=" + str(self.deformable_groups) - tmpstr += ", bias=" + str(self.with_bias) - return tmpstr - - -try: - from detectron2 import _C -except ImportError: - # TODO: register ops natively so there is no need to import _C. - _msg = "detectron2 is not compiled successfully, please build following the instructions!" - _args = ("detectron2._C", _msg) - DeformConv = create_dummy_class("DeformConv", *_args) - ModulatedDeformConv = create_dummy_class("ModulatedDeformConv", *_args) - deform_conv = create_dummy_func("deform_conv", *_args) - modulated_deform_conv = create_dummy_func("modulated_deform_conv", *_args) diff --git a/detectron2/detectron2/layers/losses.py b/detectron2/detectron2/layers/losses.py deleted file mode 100644 index 850a852a2f0986d4d1ce89a526d96db42c76e44f..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/losses.py +++ /dev/null @@ -1,133 +0,0 @@ -import math -import torch - - -def diou_loss( - boxes1: torch.Tensor, - boxes2: torch.Tensor, - reduction: str = "none", - eps: float = 1e-7, -) -> torch.Tensor: - """ - Distance Intersection over Union Loss (Zhaohui Zheng et. al) - https://arxiv.org/abs/1911.08287 - Args: - boxes1, boxes2 (Tensor): box locations in XYXY format, shape (N, 4) or (4,). - reduction: 'none' | 'mean' | 'sum' - 'none': No reduction will be applied to the output. - 'mean': The output will be averaged. - 'sum': The output will be summed. - eps (float): small number to prevent division by zero - """ - - x1, y1, x2, y2 = boxes1.unbind(dim=-1) - x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) - - # TODO: use torch._assert_async() when pytorch 1.8 support is dropped - assert (x2 >= x1).all(), "bad box: x1 larger than x2" - assert (y2 >= y1).all(), "bad box: y1 larger than y2" - - # Intersection keypoints - xkis1 = torch.max(x1, x1g) - ykis1 = torch.max(y1, y1g) - xkis2 = torch.min(x2, x2g) - ykis2 = torch.min(y2, y2g) - - intsct = torch.zeros_like(x1) - mask = (ykis2 > ykis1) & (xkis2 > xkis1) - intsct[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask]) - union = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsct + eps - iou = intsct / union - - # smallest enclosing box - xc1 = torch.min(x1, x1g) - yc1 = torch.min(y1, y1g) - xc2 = torch.max(x2, x2g) - yc2 = torch.max(y2, y2g) - diag_len = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps - - # centers of boxes - x_p = (x2 + x1) / 2 - y_p = (y2 + y1) / 2 - x_g = (x1g + x2g) / 2 - y_g = (y1g + y2g) / 2 - distance = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2) - - # Eqn. (7) - loss = 1 - iou + (distance / diag_len) - if reduction == "mean": - loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() - elif reduction == "sum": - loss = loss.sum() - - return loss - - -def ciou_loss( - boxes1: torch.Tensor, - boxes2: torch.Tensor, - reduction: str = "none", - eps: float = 1e-7, -) -> torch.Tensor: - """ - Complete Intersection over Union Loss (Zhaohui Zheng et. al) - https://arxiv.org/abs/1911.08287 - Args: - boxes1, boxes2 (Tensor): box locations in XYXY format, shape (N, 4) or (4,). - reduction: 'none' | 'mean' | 'sum' - 'none': No reduction will be applied to the output. - 'mean': The output will be averaged. - 'sum': The output will be summed. - eps (float): small number to prevent division by zero - """ - - x1, y1, x2, y2 = boxes1.unbind(dim=-1) - x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) - - # TODO: use torch._assert_async() when pytorch 1.8 support is dropped - assert (x2 >= x1).all(), "bad box: x1 larger than x2" - assert (y2 >= y1).all(), "bad box: y1 larger than y2" - - # Intersection keypoints - xkis1 = torch.max(x1, x1g) - ykis1 = torch.max(y1, y1g) - xkis2 = torch.min(x2, x2g) - ykis2 = torch.min(y2, y2g) - - intsct = torch.zeros_like(x1) - mask = (ykis2 > ykis1) & (xkis2 > xkis1) - intsct[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask]) - union = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsct + eps - iou = intsct / union - - # smallest enclosing box - xc1 = torch.min(x1, x1g) - yc1 = torch.min(y1, y1g) - xc2 = torch.max(x2, x2g) - yc2 = torch.max(y2, y2g) - diag_len = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps - - # centers of boxes - x_p = (x2 + x1) / 2 - y_p = (y2 + y1) / 2 - x_g = (x1g + x2g) / 2 - y_g = (y1g + y2g) / 2 - distance = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2) - - # width and height of boxes - w_pred = x2 - x1 - h_pred = y2 - y1 - w_gt = x2g - x1g - h_gt = y2g - y1g - v = (4 / (math.pi**2)) * torch.pow((torch.atan(w_gt / h_gt) - torch.atan(w_pred / h_pred)), 2) - with torch.no_grad(): - alpha = v / (1 - iou + v + eps) - - # Eqn. (10) - loss = 1 - iou + (distance / diag_len) + alpha * v - if reduction == "mean": - loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() - elif reduction == "sum": - loss = loss.sum() - - return loss diff --git a/detectron2/detectron2/layers/mask_ops.py b/detectron2/detectron2/layers/mask_ops.py deleted file mode 100644 index 6a139bb47ad681469e7efa79b0643c664f758a01..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/mask_ops.py +++ /dev/null @@ -1,275 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -from typing import Tuple -import torch -from PIL import Image -from torch.nn import functional as F - -__all__ = ["paste_masks_in_image"] - - -BYTES_PER_FLOAT = 4 -# TODO: This memory limit may be too much or too little. It would be better to -# determine it based on available resources. -GPU_MEM_LIMIT = 1024**3 # 1 GB memory limit - - -def _do_paste_mask(masks, boxes, img_h: int, img_w: int, skip_empty: bool = True): - """ - Args: - masks: N, 1, H, W - boxes: N, 4 - img_h, img_w (int): - skip_empty (bool): only paste masks within the region that - tightly bound all boxes, and returns the results this region only. - An important optimization for CPU. - - Returns: - if skip_empty == False, a mask of shape (N, img_h, img_w) - if skip_empty == True, a mask of shape (N, h', w'), and the slice - object for the corresponding region. - """ - # On GPU, paste all masks together (up to chunk size) - # by using the entire image to sample the masks - # Compared to pasting them one by one, - # this has more operations but is faster on COCO-scale dataset. - device = masks.device - - if skip_empty and not torch.jit.is_scripting(): - x0_int, y0_int = torch.clamp(boxes.min(dim=0).values.floor()[:2] - 1, min=0).to( - dtype=torch.int32 - ) - x1_int = torch.clamp(boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32) - y1_int = torch.clamp(boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32) - else: - x0_int, y0_int = 0, 0 - x1_int, y1_int = img_w, img_h - x0, y0, x1, y1 = torch.split(boxes, 1, dim=1) # each is Nx1 - - N = masks.shape[0] - - img_y = torch.arange(y0_int, y1_int, device=device, dtype=torch.float32) + 0.5 - img_x = torch.arange(x0_int, x1_int, device=device, dtype=torch.float32) + 0.5 - img_y = (img_y - y0) / (y1 - y0) * 2 - 1 - img_x = (img_x - x0) / (x1 - x0) * 2 - 1 - # img_x, img_y have shapes (N, w), (N, h) - - gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1)) - gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1)) - grid = torch.stack([gx, gy], dim=3) - - if not torch.jit.is_scripting(): - if not masks.dtype.is_floating_point: - masks = masks.float() - img_masks = F.grid_sample(masks, grid.to(masks.dtype), align_corners=False) - - if skip_empty and not torch.jit.is_scripting(): - return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int)) - else: - return img_masks[:, 0], () - - -# Annotate boxes as Tensor (but not Boxes) in order to use scripting -@torch.jit.script_if_tracing -def paste_masks_in_image( - masks: torch.Tensor, boxes: torch.Tensor, image_shape: Tuple[int, int], threshold: float = 0.5 -): - """ - Paste a set of masks that are of a fixed resolution (e.g., 28 x 28) into an image. - The location, height, and width for pasting each mask is determined by their - corresponding bounding boxes in boxes. - - Note: - This is a complicated but more accurate implementation. In actual deployment, it is - often enough to use a faster but less accurate implementation. - See :func:`paste_mask_in_image_old` in this file for an alternative implementation. - - Args: - masks (tensor): Tensor of shape (Bimg, Hmask, Wmask), where Bimg is the number of - detected object instances in the image and Hmask, Wmask are the mask width and mask - height of the predicted mask (e.g., Hmask = Wmask = 28). Values are in [0, 1]. - boxes (Boxes or Tensor): A Boxes of length Bimg or Tensor of shape (Bimg, 4). - boxes[i] and masks[i] correspond to the same object instance. - image_shape (tuple): height, width - threshold (float): A threshold in [0, 1] for converting the (soft) masks to - binary masks. - - Returns: - img_masks (Tensor): A tensor of shape (Bimg, Himage, Wimage), where Bimg is the - number of detected object instances and Himage, Wimage are the image width - and height. img_masks[i] is a binary mask for object instance i. - """ - - assert masks.shape[-1] == masks.shape[-2], "Only square mask predictions are supported" - N = len(masks) - if N == 0: - return masks.new_empty((0,) + image_shape, dtype=torch.uint8) - if not isinstance(boxes, torch.Tensor): - boxes = boxes.tensor - device = boxes.device - assert len(boxes) == N, boxes.shape - - img_h, img_w = image_shape - - # The actual implementation split the input into chunks, - # and paste them chunk by chunk. - if device.type == "cpu" or torch.jit.is_scripting(): - # CPU is most efficient when they are pasted one by one with skip_empty=True - # so that it performs minimal number of operations. - num_chunks = N - else: - # GPU benefits from parallelism for larger chunks, but may have memory issue - # int(img_h) because shape may be tensors in tracing - num_chunks = int(np.ceil(N * int(img_h) * int(img_w) * BYTES_PER_FLOAT / GPU_MEM_LIMIT)) - assert ( - num_chunks <= N - ), "Default GPU_MEM_LIMIT in mask_ops.py is too small; try increasing it" - chunks = torch.chunk(torch.arange(N, device=device), num_chunks) - - img_masks = torch.zeros( - N, img_h, img_w, device=device, dtype=torch.bool if threshold >= 0 else torch.uint8 - ) - for inds in chunks: - masks_chunk, spatial_inds = _do_paste_mask( - masks[inds, None, :, :], boxes[inds], img_h, img_w, skip_empty=device.type == "cpu" - ) - - if threshold >= 0: - masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool) - else: - # for visualization and debugging - masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8) - - if torch.jit.is_scripting(): # Scripting does not use the optimized codepath - img_masks[inds] = masks_chunk - else: - img_masks[(inds,) + spatial_inds] = masks_chunk - return img_masks - - -# The below are the original paste function (from Detectron1) which has -# larger quantization error. -# It is faster on CPU, while the aligned one is faster on GPU thanks to grid_sample. - - -def paste_mask_in_image_old(mask, box, img_h, img_w, threshold): - """ - Paste a single mask in an image. - This is a per-box implementation of :func:`paste_masks_in_image`. - This function has larger quantization error due to incorrect pixel - modeling and is not used any more. - - Args: - mask (Tensor): A tensor of shape (Hmask, Wmask) storing the mask of a single - object instance. Values are in [0, 1]. - box (Tensor): A tensor of shape (4, ) storing the x0, y0, x1, y1 box corners - of the object instance. - img_h, img_w (int): Image height and width. - threshold (float): Mask binarization threshold in [0, 1]. - - Returns: - im_mask (Tensor): - The resized and binarized object mask pasted into the original - image plane (a tensor of shape (img_h, img_w)). - """ - # Conversion from continuous box coordinates to discrete pixel coordinates - # via truncation (cast to int32). This determines which pixels to paste the - # mask onto. - box = box.to(dtype=torch.int32) # Continuous to discrete coordinate conversion - # An example (1D) box with continuous coordinates (x0=0.7, x1=4.3) will map to - # a discrete coordinates (x0=0, x1=4). Note that box is mapped to 5 = x1 - x0 + 1 - # pixels (not x1 - x0 pixels). - samples_w = box[2] - box[0] + 1 # Number of pixel samples, *not* geometric width - samples_h = box[3] - box[1] + 1 # Number of pixel samples, *not* geometric height - - # Resample the mask from it's original grid to the new samples_w x samples_h grid - mask = Image.fromarray(mask.cpu().numpy()) - mask = mask.resize((samples_w, samples_h), resample=Image.BILINEAR) - mask = np.asarray(mask) - - if threshold >= 0: - mask = np.array(mask > threshold, dtype=np.uint8) - mask = torch.from_numpy(mask) - else: - # for visualization and debugging, we also - # allow it to return an unmodified mask - mask = torch.from_numpy(mask * 255).to(torch.uint8) - - im_mask = torch.zeros((img_h, img_w), dtype=torch.uint8) - x_0 = max(box[0], 0) - x_1 = min(box[2] + 1, img_w) - y_0 = max(box[1], 0) - y_1 = min(box[3] + 1, img_h) - - im_mask[y_0:y_1, x_0:x_1] = mask[ - (y_0 - box[1]) : (y_1 - box[1]), (x_0 - box[0]) : (x_1 - box[0]) - ] - return im_mask - - -# Our pixel modeling requires extrapolation for any continuous -# coordinate < 0.5 or > length - 0.5. When sampling pixels on the masks, -# we would like this extrapolation to be an interpolation between boundary values and zero, -# instead of using absolute zero or boundary values. -# Therefore `paste_mask_in_image_old` is often used with zero padding around the masks like this: -# masks, scale = pad_masks(masks[:, 0, :, :], 1) -# boxes = scale_boxes(boxes.tensor, scale) - - -def pad_masks(masks, padding): - """ - Args: - masks (tensor): A tensor of shape (B, M, M) representing B masks. - padding (int): Number of cells to pad on all sides. - - Returns: - The padded masks and the scale factor of the padding size / original size. - """ - B = masks.shape[0] - M = masks.shape[-1] - pad2 = 2 * padding - scale = float(M + pad2) / M - padded_masks = masks.new_zeros((B, M + pad2, M + pad2)) - padded_masks[:, padding:-padding, padding:-padding] = masks - return padded_masks, scale - - -def scale_boxes(boxes, scale): - """ - Args: - boxes (tensor): A tensor of shape (B, 4) representing B boxes with 4 - coords representing the corners x0, y0, x1, y1, - scale (float): The box scaling factor. - - Returns: - Scaled boxes. - """ - w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5 - h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5 - x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5 - y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5 - - w_half *= scale - h_half *= scale - - scaled_boxes = torch.zeros_like(boxes) - scaled_boxes[:, 0] = x_c - w_half - scaled_boxes[:, 2] = x_c + w_half - scaled_boxes[:, 1] = y_c - h_half - scaled_boxes[:, 3] = y_c + h_half - return scaled_boxes - - -@torch.jit.script_if_tracing -def _paste_masks_tensor_shape( - masks: torch.Tensor, - boxes: torch.Tensor, - image_shape: Tuple[torch.Tensor, torch.Tensor], - threshold: float = 0.5, -): - """ - A wrapper of paste_masks_in_image where image_shape is Tensor. - During tracing, shapes might be tensors instead of ints. The Tensor->int - conversion should be scripted rather than traced. - """ - return paste_masks_in_image(masks, boxes, (int(image_shape[0]), int(image_shape[1])), threshold) diff --git a/detectron2/detectron2/layers/nms.py b/detectron2/detectron2/layers/nms.py deleted file mode 100644 index 37ba18b2af6abf6b2b20d32bdd064fa6592c0cad..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/nms.py +++ /dev/null @@ -1,147 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import torch -from torchvision.ops import boxes as box_ops -from torchvision.ops import nms # noqa . for compatibility - -from detectron2.layers.wrappers import disable_torch_compiler - - -def batched_nms( - boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float -): - """ - Same as torchvision.ops.boxes.batched_nms, but with float(). - """ - assert boxes.shape[-1] == 4 - # Note: Torchvision already has a strategy (https://github.com/pytorch/vision/issues/1311) - # to decide whether to use coordinate trick or for loop to implement batched_nms. So we - # just call it directly. - # Fp16 does not have enough range for batched NMS, so adding float(). - return box_ops.batched_nms(boxes.float(), scores, idxs, iou_threshold) - - -# Note: this function (nms_rotated) might be moved into -# torchvision/ops/boxes.py in the future -@disable_torch_compiler -def nms_rotated(boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float): - """ - Performs non-maximum suppression (NMS) on the rotated boxes according - to their intersection-over-union (IoU). - - Rotated NMS iteratively removes lower scoring rotated boxes which have an - IoU greater than iou_threshold with another (higher scoring) rotated box. - - Note that RotatedBox (5, 3, 4, 2, -90) covers exactly the same region as - RotatedBox (5, 3, 4, 2, 90) does, and their IoU will be 1. However, they - can be representing completely different objects in certain tasks, e.g., OCR. - - As for the question of whether rotated-NMS should treat them as faraway boxes - even though their IOU is 1, it depends on the application and/or ground truth annotation. - - As an extreme example, consider a single character v and the square box around it. - - If the angle is 0 degree, the object (text) would be read as 'v'; - - If the angle is 90 degrees, the object (text) would become '>'; - - If the angle is 180 degrees, the object (text) would become '^'; - - If the angle is 270/-90 degrees, the object (text) would become '<' - - All of these cases have IoU of 1 to each other, and rotated NMS that only - uses IoU as criterion would only keep one of them with the highest score - - which, practically, still makes sense in most cases because typically - only one of theses orientations is the correct one. Also, it does not matter - as much if the box is only used to classify the object (instead of transcribing - them with a sequential OCR recognition model) later. - - On the other hand, when we use IoU to filter proposals that are close to the - ground truth during training, we should definitely take the angle into account if - we know the ground truth is labeled with the strictly correct orientation (as in, - upside-down words are annotated with -180 degrees even though they can be covered - with a 0/90/-90 degree box, etc.) - - The way the original dataset is annotated also matters. For example, if the dataset - is a 4-point polygon dataset that does not enforce ordering of vertices/orientation, - we can estimate a minimum rotated bounding box to this polygon, but there's no way - we can tell the correct angle with 100% confidence (as shown above, there could be 4 different - rotated boxes, with angles differed by 90 degrees to each other, covering the exactly - same region). In that case we have to just use IoU to determine the box - proximity (as many detection benchmarks (even for text) do) unless there're other - assumptions we can make (like width is always larger than height, or the object is not - rotated by more than 90 degrees CCW/CW, etc.) - - In summary, not considering angles in rotated NMS seems to be a good option for now, - but we should be aware of its implications. - - Args: - boxes (Tensor[N, 5]): Rotated boxes to perform NMS on. They are expected to be in - (x_center, y_center, width, height, angle_degrees) format. - scores (Tensor[N]): Scores for each one of the rotated boxes - iou_threshold (float): Discards all overlapping rotated boxes with IoU < iou_threshold - - Returns: - keep (Tensor): int64 tensor with the indices of the elements that have been kept - by Rotated NMS, sorted in decreasing order of scores - """ - return torch.ops.detectron2.nms_rotated(boxes, scores, iou_threshold) - - -# Note: this function (batched_nms_rotated) might be moved into -# torchvision/ops/boxes.py in the future - - -@torch.jit.script_if_tracing -def batched_nms_rotated( - boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float -): - """ - Performs non-maximum suppression in a batched fashion. - - Each index value correspond to a category, and NMS - will not be applied between elements of different categories. - - Args: - boxes (Tensor[N, 5]): - boxes where NMS will be performed. They - are expected to be in (x_ctr, y_ctr, width, height, angle_degrees) format - scores (Tensor[N]): - scores for each one of the boxes - idxs (Tensor[N]): - indices of the categories for each one of the boxes. - iou_threshold (float): - discards all overlapping boxes - with IoU < iou_threshold - - Returns: - Tensor: - int64 tensor with the indices of the elements that have been kept - by NMS, sorted in decreasing order of scores - """ - assert boxes.shape[-1] == 5 - - if boxes.numel() == 0: - return torch.empty((0,), dtype=torch.int64, device=boxes.device) - boxes = boxes.float() # fp16 does not have enough range for batched NMS - # Strategy: in order to perform NMS independently per class, - # we add an offset to all the boxes. The offset is dependent - # only on the class idx, and is large enough so that boxes - # from different classes do not overlap - - # Note that batched_nms in torchvision/ops/boxes.py only uses max_coordinate, - # which won't handle negative coordinates correctly. - # Here by using min_coordinate we can make sure the negative coordinates are - # correctly handled. - max_coordinate = ( - torch.max(boxes[:, 0], boxes[:, 1]) + torch.max(boxes[:, 2], boxes[:, 3]) / 2 - ).max() - min_coordinate = ( - torch.min(boxes[:, 0], boxes[:, 1]) - torch.max(boxes[:, 2], boxes[:, 3]) / 2 - ).min() - offsets = idxs.to(boxes) * (max_coordinate - min_coordinate + 1) - boxes_for_nms = boxes.clone() # avoid modifying the original values in boxes - boxes_for_nms[:, :2] += offsets[:, None] - keep = nms_rotated(boxes_for_nms, scores, iou_threshold) - return keep diff --git a/detectron2/detectron2/layers/roi_align.py b/detectron2/detectron2/layers/roi_align.py deleted file mode 100644 index 163462e1f194e1e4100da92d76d9516f7cc22e35..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/roi_align.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from torch import nn -from torchvision.ops import roi_align - - -# NOTE: torchvision's RoIAlign has a different default aligned=False -class ROIAlign(nn.Module): - def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True): - """ - Args: - output_size (tuple): h, w - spatial_scale (float): scale the input boxes by this number - sampling_ratio (int): number of inputs samples to take for each output - sample. 0 to take samples densely. - aligned (bool): if False, use the legacy implementation in - Detectron. If True, align the results more perfectly. - - Note: - The meaning of aligned=True: - - Given a continuous coordinate c, its two neighboring pixel indices (in our - pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, - c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled - from the underlying signal at continuous coordinates 0.5 and 1.5). But the original - roi_align (aligned=False) does not subtract the 0.5 when computing neighboring - pixel indices and therefore it uses pixels with a slightly incorrect alignment - (relative to our pixel model) when performing bilinear interpolation. - - With `aligned=True`, - we first appropriately scale the ROI and then shift it by -0.5 - prior to calling roi_align. This produces the correct neighbors; see - detectron2/tests/test_roi_align.py for verification. - - The difference does not make a difference to the model's performance if - ROIAlign is used together with conv layers. - """ - super().__init__() - self.output_size = output_size - self.spatial_scale = spatial_scale - self.sampling_ratio = sampling_ratio - self.aligned = aligned - - from torchvision import __version__ - - version = tuple(int(x) for x in __version__.split(".")[:2]) - # https://github.com/pytorch/vision/pull/2438 - assert version >= (0, 7), "Require torchvision >= 0.7" - - def forward(self, input, rois): - """ - Args: - input: NCHW images - rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. - """ - assert rois.dim() == 2 and rois.size(1) == 5 - if input.is_quantized: - input = input.dequantize() - return roi_align( - input, - rois.to(dtype=input.dtype), - self.output_size, - self.spatial_scale, - self.sampling_ratio, - self.aligned, - ) - - def __repr__(self): - tmpstr = self.__class__.__name__ + "(" - tmpstr += "output_size=" + str(self.output_size) - tmpstr += ", spatial_scale=" + str(self.spatial_scale) - tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) - tmpstr += ", aligned=" + str(self.aligned) - tmpstr += ")" - return tmpstr diff --git a/detectron2/detectron2/layers/roi_align_rotated.py b/detectron2/detectron2/layers/roi_align_rotated.py deleted file mode 100644 index 12dd00118cd4ae45985976a36b67e637e3115750..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/roi_align_rotated.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import torch -from torch import nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from detectron2.layers.wrappers import disable_torch_compiler - - -class _ROIAlignRotated(Function): - @staticmethod - @disable_torch_compiler - def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): - ctx.save_for_backward(roi) - ctx.output_size = _pair(output_size) - ctx.spatial_scale = spatial_scale - ctx.sampling_ratio = sampling_ratio - ctx.input_shape = input.size() - output = torch.ops.detectron2.roi_align_rotated_forward( - input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio - ) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - (rois,) = ctx.saved_tensors - output_size = ctx.output_size - spatial_scale = ctx.spatial_scale - sampling_ratio = ctx.sampling_ratio - bs, ch, h, w = ctx.input_shape - grad_input = torch.ops.detectron2.roi_align_rotated_backward( - grad_output, - rois, - spatial_scale, - output_size[0], - output_size[1], - bs, - ch, - h, - w, - sampling_ratio, - ) - return grad_input, None, None, None, None, None - - -roi_align_rotated = _ROIAlignRotated.apply - - -class ROIAlignRotated(nn.Module): - def __init__(self, output_size, spatial_scale, sampling_ratio): - """ - Args: - output_size (tuple): h, w - spatial_scale (float): scale the input boxes by this number - sampling_ratio (int): number of inputs samples to take for each output - sample. 0 to take samples densely. - - Note: - ROIAlignRotated supports continuous coordinate by default: - Given a continuous coordinate c, its two neighboring pixel indices (in our - pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, - c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled - from the underlying signal at continuous coordinates 0.5 and 1.5). - """ - super(ROIAlignRotated, self).__init__() - self.output_size = output_size - self.spatial_scale = spatial_scale - self.sampling_ratio = sampling_ratio - - def forward(self, input, rois): - """ - Args: - input: NCHW images - rois: Bx6 boxes. First column is the index into N. - The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees). - """ - assert rois.dim() == 2 and rois.size(1) == 6 - orig_dtype = input.dtype - if orig_dtype == torch.float16: - input = input.float() - rois = rois.float() - output_size = _pair(self.output_size) - - # Scripting for Autograd is currently unsupported. - # This is a quick fix without having to rewrite code on the C++ side - if torch.jit.is_scripting() or torch.jit.is_tracing(): - return torch.ops.detectron2.roi_align_rotated_forward( - input, rois, self.spatial_scale, output_size[0], output_size[1], self.sampling_ratio - ).to(dtype=orig_dtype) - - return roi_align_rotated( - input, rois, self.output_size, self.spatial_scale, self.sampling_ratio - ).to(dtype=orig_dtype) - - def __repr__(self): - tmpstr = self.__class__.__name__ + "(" - tmpstr += "output_size=" + str(self.output_size) - tmpstr += ", spatial_scale=" + str(self.spatial_scale) - tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) - tmpstr += ")" - return tmpstr diff --git a/detectron2/detectron2/layers/rotated_boxes.py b/detectron2/detectron2/layers/rotated_boxes.py deleted file mode 100644 index 03f73b3bb99275931a887ad9b2d8c0ac9f412bf3..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/rotated_boxes.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from __future__ import absolute_import, division, print_function, unicode_literals -import torch - - -def pairwise_iou_rotated(boxes1, boxes2): - """ - Return intersection-over-union (Jaccard index) of boxes. - - Both sets of boxes are expected to be in - (x_center, y_center, width, height, angle) format. - - Arguments: - boxes1 (Tensor[N, 5]) - boxes2 (Tensor[M, 5]) - - Returns: - iou (Tensor[N, M]): the NxM matrix containing the pairwise - IoU values for every element in boxes1 and boxes2 - """ - return torch.ops.detectron2.box_iou_rotated(boxes1, boxes2) diff --git a/detectron2/detectron2/layers/shape_spec.py b/detectron2/detectron2/layers/shape_spec.py deleted file mode 100644 index 8dac3c59b96576710656abebe9b5eac25868abbb..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/shape_spec.py +++ /dev/null @@ -1,18 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. -from dataclasses import dataclass -from typing import Optional - - -@dataclass -class ShapeSpec: - """ - A simple structure that contains basic shape specification about a tensor. - It is often used as the auxiliary inputs/outputs of models, - to complement the lack of shape inference ability among pytorch modules. - """ - - channels: Optional[int] = None - height: Optional[int] = None - width: Optional[int] = None - stride: Optional[int] = None diff --git a/detectron2/detectron2/layers/wrappers.py b/detectron2/detectron2/layers/wrappers.py deleted file mode 100644 index c1a41195185f53f351dfa9b49107f20f3e29b71f..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/layers/wrappers.py +++ /dev/null @@ -1,177 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -""" -Wrappers around on some nn functions, mainly to support empty tensors. - -Ideally, add support directly in PyTorch to empty tensors in those functions. - -These can be removed once https://github.com/pytorch/pytorch/issues/12013 -is implemented -""" - -import functools -import warnings -from typing import List, Optional -import torch -from torch.nn import functional as F - -from detectron2.utils.env import TORCH_VERSION - - -def shapes_to_tensor(x: List[int], device: Optional[torch.device] = None) -> torch.Tensor: - """ - Turn a list of integer scalars or integer Tensor scalars into a vector, - in a way that's both traceable and scriptable. - - In tracing, `x` should be a list of scalar Tensor, so the output can trace to the inputs. - In scripting or eager, `x` should be a list of int. - """ - if torch.jit.is_scripting(): - return torch.as_tensor(x, device=device) - if torch.jit.is_tracing(): - assert all( - [isinstance(t, torch.Tensor) for t in x] - ), "Shape should be tensor during tracing!" - # as_tensor should not be used in tracing because it records a constant - ret = torch.stack(x) - if ret.device != device: # avoid recording a hard-coded device if not necessary - ret = ret.to(device=device) - return ret - return torch.as_tensor(x, device=device) - - -def check_if_dynamo_compiling(): - if TORCH_VERSION >= (2, 1): - from torch._dynamo import is_compiling - - return is_compiling() - else: - return False - - -def disable_torch_compiler(func): - if TORCH_VERSION >= (2, 1): - # Use the torch.compiler.disable decorator if supported - @torch.compiler.disable - @functools.wraps(func) - def wrapper(*args, **kwargs): - return func(*args, **kwargs) - - return wrapper - else: - # Return the function unchanged if torch.compiler.disable is not supported - return func - - -def cat(tensors: List[torch.Tensor], dim: int = 0): - """ - Efficient version of torch.cat that avoids a copy if there is only a single element in a list - """ - assert isinstance(tensors, (list, tuple)) - if len(tensors) == 1: - return tensors[0] - return torch.cat(tensors, dim) - - -def empty_input_loss_func_wrapper(loss_func): - def wrapped_loss_func(input, target, *, reduction="mean", **kwargs): - """ - Same as `loss_func`, but returns 0 (instead of nan) for empty inputs. - """ - if target.numel() == 0 and reduction == "mean": - return input.sum() * 0.0 # connect the gradient - return loss_func(input, target, reduction=reduction, **kwargs) - - return wrapped_loss_func - - -cross_entropy = empty_input_loss_func_wrapper(F.cross_entropy) - - -class _NewEmptyTensorOp(torch.autograd.Function): - @staticmethod - def forward(ctx, x, new_shape): - ctx.shape = x.shape - return x.new_empty(new_shape) - - @staticmethod - def backward(ctx, grad): - shape = ctx.shape - return _NewEmptyTensorOp.apply(grad, shape), None - - -class Conv2d(torch.nn.Conv2d): - """ - A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features. - """ - - def __init__(self, *args, **kwargs): - """ - Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`: - - Args: - norm (nn.Module, optional): a normalization layer - activation (callable(Tensor) -> Tensor): a callable activation function - - It assumes that norm layer is used before activation. - """ - norm = kwargs.pop("norm", None) - activation = kwargs.pop("activation", None) - super().__init__(*args, **kwargs) - - self.norm = norm - self.activation = activation - - def forward(self, x): - # torchscript does not support SyncBatchNorm yet - # https://github.com/pytorch/pytorch/issues/40507 - # and we skip these codes in torchscript since: - # 1. currently we only support torchscript in evaluation mode - # 2. features needed by exporting module to torchscript are added in PyTorch 1.6 or - # later version, `Conv2d` in these PyTorch versions has already supported empty inputs. - if not torch.jit.is_scripting(): - # Dynamo doesn't support context managers yet - is_dynamo_compiling = check_if_dynamo_compiling() - if not is_dynamo_compiling: - with warnings.catch_warnings(record=True): - if x.numel() == 0 and self.training: - # https://github.com/pytorch/pytorch/issues/12013 - assert not isinstance( - self.norm, torch.nn.SyncBatchNorm - ), "SyncBatchNorm does not support empty inputs!" - - x = F.conv2d( - x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups - ) - if self.norm is not None: - x = self.norm(x) - if self.activation is not None: - x = self.activation(x) - return x - - -ConvTranspose2d = torch.nn.ConvTranspose2d -BatchNorm2d = torch.nn.BatchNorm2d -interpolate = F.interpolate -Linear = torch.nn.Linear - - -def nonzero_tuple(x): - """ - A 'as_tuple=True' version of torch.nonzero to support torchscript. - because of https://github.com/pytorch/pytorch/issues/38718 - """ - if torch.jit.is_scripting(): - if x.dim() == 0: - return x.unsqueeze(0).nonzero().unbind(1) - return x.nonzero().unbind(1) - else: - return x.nonzero(as_tuple=True) - - -@torch.jit.script_if_tracing -def move_device_like(src: torch.Tensor, dst: torch.Tensor) -> torch.Tensor: - """ - Tracing friendly way to cast tensor to another tensor's device. Device will be treated - as constant during tracing, scripting the casting process as whole can workaround this issue. - """ - return src.to(dst.device) diff --git a/detectron2/detectron2/model_zoo/__init__.py b/detectron2/detectron2/model_zoo/__init__.py deleted file mode 100644 index 6204208198d813728cf6419e8eef4a733f20c18f..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/model_zoo/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -""" -Model Zoo API for Detectron2: a collection of functions to create common model architectures -listed in `MODEL_ZOO.md `_, -and optionally load their pre-trained weights. -""" - -from .model_zoo import get, get_config_file, get_checkpoint_url, get_config - -__all__ = ["get_checkpoint_url", "get", "get_config_file", "get_config"] diff --git a/detectron2/detectron2/model_zoo/model_zoo.py b/detectron2/detectron2/model_zoo/model_zoo.py deleted file mode 100644 index 4e80ffd48f2469287ed091527f72e39766136469..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/model_zoo/model_zoo.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import os -from typing import Optional -import pkg_resources -import torch - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import CfgNode, LazyConfig, get_cfg, instantiate -from detectron2.modeling import build_model - - -class _ModelZooUrls: - """ - Mapping from names to officially released Detectron2 pre-trained models. - """ - - S3_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" - - # format: {config_path.yaml} -> model_id/model_final_{commit}.pkl - CONFIG_PATH_TO_URL_SUFFIX = { - # COCO Detection with Faster R-CNN - "COCO-Detection/faster_rcnn_R_50_C4_1x": "137257644/model_final_721ade.pkl", - "COCO-Detection/faster_rcnn_R_50_DC5_1x": "137847829/model_final_51d356.pkl", - "COCO-Detection/faster_rcnn_R_50_FPN_1x": "137257794/model_final_b275ba.pkl", - "COCO-Detection/faster_rcnn_R_50_C4_3x": "137849393/model_final_f97cb7.pkl", - "COCO-Detection/faster_rcnn_R_50_DC5_3x": "137849425/model_final_68d202.pkl", - "COCO-Detection/faster_rcnn_R_50_FPN_3x": "137849458/model_final_280758.pkl", - "COCO-Detection/faster_rcnn_R_101_C4_3x": "138204752/model_final_298dad.pkl", - "COCO-Detection/faster_rcnn_R_101_DC5_3x": "138204841/model_final_3e0943.pkl", - "COCO-Detection/faster_rcnn_R_101_FPN_3x": "137851257/model_final_f6e8b1.pkl", - "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x": "139173657/model_final_68b088.pkl", - # COCO Detection with RetinaNet - "COCO-Detection/retinanet_R_50_FPN_1x": "190397773/model_final_bfca0b.pkl", - "COCO-Detection/retinanet_R_50_FPN_3x": "190397829/model_final_5bd44e.pkl", - "COCO-Detection/retinanet_R_101_FPN_3x": "190397697/model_final_971ab9.pkl", - # COCO Detection with RPN and Fast R-CNN - "COCO-Detection/rpn_R_50_C4_1x": "137258005/model_final_450694.pkl", - "COCO-Detection/rpn_R_50_FPN_1x": "137258492/model_final_02ce48.pkl", - "COCO-Detection/fast_rcnn_R_50_FPN_1x": "137635226/model_final_e5f7ce.pkl", - # COCO Instance Segmentation Baselines with Mask R-CNN - "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x": "137259246/model_final_9243eb.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x": "137260150/model_final_4f86c3.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x": "137260431/model_final_a54504.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x": "137849525/model_final_4ce675.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x": "137849551/model_final_84107b.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x": "137849600/model_final_f10217.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x": "138363239/model_final_a2914c.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x": "138363294/model_final_0464b7.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x": "138205316/model_final_a3ec72.pkl", - "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x": "139653917/model_final_2d9806.pkl", # noqa - # New baselines using Large-Scale Jitter and Longer Training Schedule - "new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ": "42047764/model_final_bb69de.pkl", - "new_baselines/mask_rcnn_R_50_FPN_200ep_LSJ": "42047638/model_final_89a8d3.pkl", - "new_baselines/mask_rcnn_R_50_FPN_400ep_LSJ": "42019571/model_final_14d201.pkl", - "new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ": "42025812/model_final_4f7b58.pkl", - "new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ": "42131867/model_final_0bb7ae.pkl", - "new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ": "42073830/model_final_f96b26.pkl", - "new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_100ep_LSJ": "42047771/model_final_b7fbab.pkl", # noqa - "new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ": "42132721/model_final_5d87c1.pkl", # noqa - "new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_400ep_LSJ": "42025447/model_final_f1362d.pkl", # noqa - "new_baselines/mask_rcnn_regnety_4gf_dds_FPN_100ep_LSJ": "42047784/model_final_6ba57e.pkl", # noqa - "new_baselines/mask_rcnn_regnety_4gf_dds_FPN_200ep_LSJ": "42047642/model_final_27b9c1.pkl", # noqa - "new_baselines/mask_rcnn_regnety_4gf_dds_FPN_400ep_LSJ": "42045954/model_final_ef3a80.pkl", # noqa - # COCO Person Keypoint Detection Baselines with Keypoint R-CNN - "COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x": "137261548/model_final_04e291.pkl", - "COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x": "137849621/model_final_a6e10b.pkl", - "COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x": "138363331/model_final_997cc7.pkl", - "COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x": "139686956/model_final_5ad38f.pkl", - # COCO Panoptic Segmentation Baselines with Panoptic FPN - "COCO-PanopticSegmentation/panoptic_fpn_R_50_1x": "139514544/model_final_dbfeb4.pkl", - "COCO-PanopticSegmentation/panoptic_fpn_R_50_3x": "139514569/model_final_c10459.pkl", - "COCO-PanopticSegmentation/panoptic_fpn_R_101_3x": "139514519/model_final_cafdb1.pkl", - # LVIS Instance Segmentation Baselines with Mask R-CNN - "LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x": "144219072/model_final_571f7c.pkl", # noqa - "LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x": "144219035/model_final_824ab5.pkl", # noqa - "LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x": "144219108/model_final_5e3439.pkl", # noqa - # Cityscapes & Pascal VOC Baselines - "Cityscapes/mask_rcnn_R_50_FPN": "142423278/model_final_af9cf5.pkl", - "PascalVOC-Detection/faster_rcnn_R_50_C4": "142202221/model_final_b1acc2.pkl", - # Other Settings - "Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5": "138602867/model_final_65c703.pkl", - "Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5": "144998336/model_final_821d0b.pkl", - "Misc/cascade_mask_rcnn_R_50_FPN_1x": "138602847/model_final_e9d89b.pkl", - "Misc/cascade_mask_rcnn_R_50_FPN_3x": "144998488/model_final_480dd8.pkl", - "Misc/mask_rcnn_R_50_FPN_3x_syncbn": "169527823/model_final_3b3c51.pkl", - "Misc/mask_rcnn_R_50_FPN_3x_gn": "138602888/model_final_dc5d9e.pkl", - "Misc/scratch_mask_rcnn_R_50_FPN_3x_gn": "138602908/model_final_01ca85.pkl", - "Misc/scratch_mask_rcnn_R_50_FPN_9x_gn": "183808979/model_final_da7b4c.pkl", - "Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn": "184226666/model_final_5ce33e.pkl", - "Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x": "139797668/model_final_be35db.pkl", - "Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv": "18131413/model_0039999_e76410.pkl", # noqa - # D1 Comparisons - "Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x": "137781054/model_final_7ab50c.pkl", # noqa - "Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x": "137781281/model_final_62ca52.pkl", # noqa - "Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x": "137781195/model_final_cce136.pkl", - } - - @staticmethod - def query(config_path: str) -> Optional[str]: - """ - Args: - config_path: relative config filename - """ - name = config_path.replace(".yaml", "").replace(".py", "") - if name in _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX: - suffix = _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX[name] - return _ModelZooUrls.S3_PREFIX + name + "/" + suffix - return None - - -def get_checkpoint_url(config_path): - """ - Returns the URL to the model trained using the given config - - Args: - config_path (str): config file name relative to detectron2's "configs/" - directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" - - Returns: - str: a URL to the model - """ - url = _ModelZooUrls.query(config_path) - if url is None: - raise RuntimeError("Pretrained model for {} is not available!".format(config_path)) - return url - - -def get_config_file(config_path): - """ - Returns path to a builtin config file. - - Args: - config_path (str): config file name relative to detectron2's "configs/" - directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" - - Returns: - str: the real path to the config file. - """ - cfg_file = pkg_resources.resource_filename( - "detectron2.model_zoo", os.path.join("configs", config_path) - ) - if not os.path.exists(cfg_file): - raise RuntimeError("{} not available in Model Zoo!".format(config_path)) - return cfg_file - - -def get_config(config_path, trained: bool = False): - """ - Returns a config object for a model in model zoo. - - Args: - config_path (str): config file name relative to detectron2's "configs/" - directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" - trained (bool): If True, will set ``MODEL.WEIGHTS`` to trained model zoo weights. - If False, the checkpoint specified in the config file's ``MODEL.WEIGHTS`` is used - instead; this will typically (though not always) initialize a subset of weights using - an ImageNet pre-trained model, while randomly initializing the other weights. - - Returns: - CfgNode or omegaconf.DictConfig: a config object - """ - cfg_file = get_config_file(config_path) - if cfg_file.endswith(".yaml"): - cfg = get_cfg() - cfg.merge_from_file(cfg_file) - if trained: - cfg.MODEL.WEIGHTS = get_checkpoint_url(config_path) - return cfg - elif cfg_file.endswith(".py"): - cfg = LazyConfig.load(cfg_file) - if trained: - url = get_checkpoint_url(config_path) - if "train" in cfg and "init_checkpoint" in cfg.train: - cfg.train.init_checkpoint = url - else: - raise NotImplementedError - return cfg - - -def get(config_path, trained: bool = False, device: Optional[str] = None): - """ - Get a model specified by relative path under Detectron2's official ``configs/`` directory. - - Args: - config_path (str): config file name relative to detectron2's "configs/" - directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" - trained (bool): see :func:`get_config`. - device (str or None): overwrite the device in config, if given. - - Returns: - nn.Module: a detectron2 model. Will be in training mode. - - Example: - :: - from detectron2 import model_zoo - model = model_zoo.get("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml", trained=True) - """ - cfg = get_config(config_path, trained) - if device is None and not torch.cuda.is_available(): - device = "cpu" - if device is not None and isinstance(cfg, CfgNode): - cfg.MODEL.DEVICE = device - - if isinstance(cfg, CfgNode): - model = build_model(cfg) - DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) - else: - model = instantiate(cfg.model) - if device is not None: - model = model.to(device) - if "train" in cfg and "init_checkpoint" in cfg.train: - DetectionCheckpointer(model).load(cfg.train.init_checkpoint) - return model diff --git a/detectron2/detectron2/modeling/__init__.py b/detectron2/detectron2/modeling/__init__.py deleted file mode 100644 index 4d949e222b5e94bef7deac65dadf21dd0e466c5d..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/__init__.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from detectron2.layers import ShapeSpec - -from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY -from .backbone import ( - BACKBONE_REGISTRY, - FPN, - Backbone, - ResNet, - ResNetBlockBase, - build_backbone, - build_resnet_backbone, - make_stage, - ViT, - SimpleFeaturePyramid, - get_vit_lr_decay_rate, - MViT, - SwinTransformer, -) -from .meta_arch import ( - META_ARCH_REGISTRY, - SEM_SEG_HEADS_REGISTRY, - GeneralizedRCNN, - PanopticFPN, - ProposalNetwork, - RetinaNet, - SemanticSegmentor, - build_model, - build_sem_seg_head, - FCOS, -) -from .postprocessing import detector_postprocess -from .proposal_generator import ( - PROPOSAL_GENERATOR_REGISTRY, - build_proposal_generator, - RPN_HEAD_REGISTRY, - build_rpn_head, -) -from .roi_heads import ( - ROI_BOX_HEAD_REGISTRY, - ROI_HEADS_REGISTRY, - ROI_KEYPOINT_HEAD_REGISTRY, - ROI_MASK_HEAD_REGISTRY, - ROIHeads, - StandardROIHeads, - BaseMaskRCNNHead, - BaseKeypointRCNNHead, - FastRCNNOutputLayers, - build_box_head, - build_keypoint_head, - build_mask_head, - build_roi_heads, -) -from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA -from .mmdet_wrapper import MMDetBackbone, MMDetDetector - -_EXCLUDE = {"ShapeSpec"} -__all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] - - -from detectron2.utils.env import fixup_module_metadata - -fixup_module_metadata(__name__, globals(), __all__) -del fixup_module_metadata diff --git a/detectron2/detectron2/modeling/anchor_generator.py b/detectron2/detectron2/modeling/anchor_generator.py deleted file mode 100644 index b37474c096b9d95824bfd65db2ff8204da22e1df..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/anchor_generator.py +++ /dev/null @@ -1,390 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import collections -import math -from typing import List -import torch -from torch import nn - -from detectron2.config import configurable -from detectron2.layers import ShapeSpec, move_device_like -from detectron2.structures import Boxes, RotatedBoxes -from detectron2.utils.registry import Registry - -ANCHOR_GENERATOR_REGISTRY = Registry("ANCHOR_GENERATOR") -ANCHOR_GENERATOR_REGISTRY.__doc__ = """ -Registry for modules that creates object detection anchors for feature maps. - -The registered object will be called with `obj(cfg, input_shape)`. -""" - - -class BufferList(nn.Module): - """ - Similar to nn.ParameterList, but for buffers - """ - - def __init__(self, buffers): - super().__init__() - for i, buffer in enumerate(buffers): - # Use non-persistent buffer so the values are not saved in checkpoint - self.register_buffer(str(i), buffer, persistent=False) - - def __len__(self): - return len(self._buffers) - - def __iter__(self): - return iter(self._buffers.values()) - - -def _create_grid_offsets( - size: List[int], stride: int, offset: float, target_device_tensor: torch.Tensor -): - grid_height, grid_width = size - shifts_x = move_device_like( - torch.arange(offset * stride, grid_width * stride, step=stride, dtype=torch.float32), - target_device_tensor, - ) - shifts_y = move_device_like( - torch.arange(offset * stride, grid_height * stride, step=stride, dtype=torch.float32), - target_device_tensor, - ) - - shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) - shift_x = shift_x.reshape(-1) - shift_y = shift_y.reshape(-1) - return shift_x, shift_y - - -def _broadcast_params(params, num_features, name): - """ - If one size (or aspect ratio) is specified and there are multiple feature - maps, we "broadcast" anchors of that single size (or aspect ratio) - over all feature maps. - - If params is list[float], or list[list[float]] with len(params) == 1, repeat - it num_features time. - - Returns: - list[list[float]]: param for each feature - """ - assert isinstance( - params, collections.abc.Sequence - ), f"{name} in anchor generator has to be a list! Got {params}." - assert len(params), f"{name} in anchor generator cannot be empty!" - if not isinstance(params[0], collections.abc.Sequence): # params is list[float] - return [params] * num_features - if len(params) == 1: - return list(params) * num_features - assert len(params) == num_features, ( - f"Got {name} of length {len(params)} in anchor generator, " - f"but the number of input features is {num_features}!" - ) - return params - - -@ANCHOR_GENERATOR_REGISTRY.register() -class DefaultAnchorGenerator(nn.Module): - """ - Compute anchors in the standard ways described in - "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks". - """ - - box_dim: torch.jit.Final[int] = 4 - """ - the dimension of each anchor box. - """ - - @configurable - def __init__(self, *, sizes, aspect_ratios, strides, offset=0.5): - """ - This interface is experimental. - - Args: - sizes (list[list[float]] or list[float]): - If ``sizes`` is list[list[float]], ``sizes[i]`` is the list of anchor sizes - (i.e. sqrt of anchor area) to use for the i-th feature map. - If ``sizes`` is list[float], ``sizes`` is used for all feature maps. - Anchor sizes are given in absolute lengths in units of - the input image; they do not dynamically scale if the input image size changes. - aspect_ratios (list[list[float]] or list[float]): list of aspect ratios - (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. - strides (list[int]): stride of each input feature. - offset (float): Relative offset between the center of the first anchor and the top-left - corner of the image. Value has to be in [0, 1). - Recommend to use 0.5, which means half stride. - """ - super().__init__() - - self.strides = strides - self.num_features = len(self.strides) - sizes = _broadcast_params(sizes, self.num_features, "sizes") - aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") - self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios) - - self.offset = offset - assert 0.0 <= self.offset < 1.0, self.offset - - @classmethod - def from_config(cls, cfg, input_shape: List[ShapeSpec]): - return { - "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, - "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, - "strides": [x.stride for x in input_shape], - "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, - } - - def _calculate_anchors(self, sizes, aspect_ratios): - cell_anchors = [ - self.generate_cell_anchors(s, a).float() for s, a in zip(sizes, aspect_ratios) - ] - return BufferList(cell_anchors) - - @property - @torch.jit.unused - def num_cell_anchors(self): - """ - Alias of `num_anchors`. - """ - return self.num_anchors - - @property - @torch.jit.unused - def num_anchors(self): - """ - Returns: - list[int]: Each int is the number of anchors at every pixel - location, on that feature map. - For example, if at every pixel we use anchors of 3 aspect - ratios and 5 sizes, the number of anchors is 15. - (See also ANCHOR_GENERATOR.SIZES and ANCHOR_GENERATOR.ASPECT_RATIOS in config) - - In standard RPN models, `num_anchors` on every feature map is the same. - """ - return [len(cell_anchors) for cell_anchors in self.cell_anchors] - - def _grid_anchors(self, grid_sizes: List[List[int]]): - """ - Returns: - list[Tensor]: #featuremap tensors, each is (#locations x #cell_anchors) x 4 - """ - anchors = [] - # buffers() not supported by torchscript. use named_buffers() instead - buffers: List[torch.Tensor] = [x[1] for x in self.cell_anchors.named_buffers()] - for size, stride, base_anchors in zip(grid_sizes, self.strides, buffers): - shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors) - shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) - - anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4)) - - return anchors - - def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)): - """ - Generate a tensor storing canonical anchor boxes, which are all anchor - boxes of different sizes and aspect_ratios centered at (0, 0). - We can later build the set of anchors for a full feature map by - shifting and tiling these tensors (see `meth:_grid_anchors`). - - Args: - sizes (tuple[float]): - aspect_ratios (tuple[float]]): - - Returns: - Tensor of shape (len(sizes) * len(aspect_ratios), 4) storing anchor boxes - in XYXY format. - """ - - # This is different from the anchor generator defined in the original Faster R-CNN - # code or Detectron. They yield the same AP, however the old version defines cell - # anchors in a less natural way with a shift relative to the feature grid and - # quantization that results in slightly different sizes for different aspect ratios. - # See also https://github.com/facebookresearch/Detectron/issues/227 - - anchors = [] - for size in sizes: - area = size**2.0 - for aspect_ratio in aspect_ratios: - # s * s = w * h - # a = h / w - # ... some algebra ... - # w = sqrt(s * s / a) - # h = a * w - w = math.sqrt(area / aspect_ratio) - h = aspect_ratio * w - x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0 - anchors.append([x0, y0, x1, y1]) - return torch.tensor(anchors) - - def forward(self, features: List[torch.Tensor]): - """ - Args: - features (list[Tensor]): list of backbone feature maps on which to generate anchors. - - Returns: - list[Boxes]: a list of Boxes containing all the anchors for each feature map - (i.e. the cell anchors repeated over all locations in the feature map). - The number of anchors of each feature map is Hi x Wi x num_cell_anchors, - where Hi, Wi are resolution of the feature map divided by anchor stride. - """ - grid_sizes = [feature_map.shape[-2:] for feature_map in features] - anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) # pyre-ignore - return [Boxes(x) for x in anchors_over_all_feature_maps] - - -@ANCHOR_GENERATOR_REGISTRY.register() -class RotatedAnchorGenerator(nn.Module): - """ - Compute rotated anchors used by Rotated RPN (RRPN), described in - "Arbitrary-Oriented Scene Text Detection via Rotation Proposals". - """ - - box_dim: int = 5 - """ - the dimension of each anchor box. - """ - - @configurable - def __init__(self, *, sizes, aspect_ratios, strides, angles, offset=0.5): - """ - This interface is experimental. - - Args: - sizes (list[list[float]] or list[float]): - If sizes is list[list[float]], sizes[i] is the list of anchor sizes - (i.e. sqrt of anchor area) to use for the i-th feature map. - If sizes is list[float], the sizes are used for all feature maps. - Anchor sizes are given in absolute lengths in units of - the input image; they do not dynamically scale if the input image size changes. - aspect_ratios (list[list[float]] or list[float]): list of aspect ratios - (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. - strides (list[int]): stride of each input feature. - angles (list[list[float]] or list[float]): list of angles (in degrees CCW) - to use for anchors. Same "broadcast" rule for `sizes` applies. - offset (float): Relative offset between the center of the first anchor and the top-left - corner of the image. Value has to be in [0, 1). - Recommend to use 0.5, which means half stride. - """ - super().__init__() - - self.strides = strides - self.num_features = len(self.strides) - sizes = _broadcast_params(sizes, self.num_features, "sizes") - aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") - angles = _broadcast_params(angles, self.num_features, "angles") - self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios, angles) - - self.offset = offset - assert 0.0 <= self.offset < 1.0, self.offset - - @classmethod - def from_config(cls, cfg, input_shape: List[ShapeSpec]): - return { - "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, - "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, - "strides": [x.stride for x in input_shape], - "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, - "angles": cfg.MODEL.ANCHOR_GENERATOR.ANGLES, - } - - def _calculate_anchors(self, sizes, aspect_ratios, angles): - cell_anchors = [ - self.generate_cell_anchors(size, aspect_ratio, angle).float() - for size, aspect_ratio, angle in zip(sizes, aspect_ratios, angles) - ] - return BufferList(cell_anchors) - - @property - def num_cell_anchors(self): - """ - Alias of `num_anchors`. - """ - return self.num_anchors - - @property - def num_anchors(self): - """ - Returns: - list[int]: Each int is the number of anchors at every pixel - location, on that feature map. - For example, if at every pixel we use anchors of 3 aspect - ratios, 2 sizes and 5 angles, the number of anchors is 30. - (See also ANCHOR_GENERATOR.SIZES, ANCHOR_GENERATOR.ASPECT_RATIOS - and ANCHOR_GENERATOR.ANGLES in config) - - In standard RRPN models, `num_anchors` on every feature map is the same. - """ - return [len(cell_anchors) for cell_anchors in self.cell_anchors] - - def _grid_anchors(self, grid_sizes: List[List[int]]): - anchors = [] - for size, stride, base_anchors in zip( - grid_sizes, - self.strides, - self.cell_anchors._buffers.values(), - ): - shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors) - zeros = torch.zeros_like(shift_x) - shifts = torch.stack((shift_x, shift_y, zeros, zeros, zeros), dim=1) - - anchors.append((shifts.view(-1, 1, 5) + base_anchors.view(1, -1, 5)).reshape(-1, 5)) - - return anchors - - def generate_cell_anchors( - self, - sizes=(32, 64, 128, 256, 512), - aspect_ratios=(0.5, 1, 2), - angles=(-90, -60, -30, 0, 30, 60, 90), - ): - """ - Generate a tensor storing canonical anchor boxes, which are all anchor - boxes of different sizes, aspect_ratios, angles centered at (0, 0). - We can later build the set of anchors for a full feature map by - shifting and tiling these tensors (see `meth:_grid_anchors`). - - Args: - sizes (tuple[float]): - aspect_ratios (tuple[float]]): - angles (tuple[float]]): - - Returns: - Tensor of shape (len(sizes) * len(aspect_ratios) * len(angles), 5) - storing anchor boxes in (x_ctr, y_ctr, w, h, angle) format. - """ - anchors = [] - for size in sizes: - area = size**2.0 - for aspect_ratio in aspect_ratios: - # s * s = w * h - # a = h / w - # ... some algebra ... - # w = sqrt(s * s / a) - # h = a * w - w = math.sqrt(area / aspect_ratio) - h = aspect_ratio * w - anchors.extend([0, 0, w, h, a] for a in angles) - - return torch.tensor(anchors) - - def forward(self, features): - """ - Args: - features (list[Tensor]): list of backbone feature maps on which to generate anchors. - - Returns: - list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map - (i.e. the cell anchors repeated over all locations in the feature map). - The number of anchors of each feature map is Hi x Wi x num_cell_anchors, - where Hi, Wi are resolution of the feature map divided by anchor stride. - """ - grid_sizes = [feature_map.shape[-2:] for feature_map in features] - anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) - return [RotatedBoxes(x) for x in anchors_over_all_feature_maps] - - -def build_anchor_generator(cfg, input_shape): - """ - Built an anchor generator from `cfg.MODEL.ANCHOR_GENERATOR.NAME`. - """ - anchor_generator = cfg.MODEL.ANCHOR_GENERATOR.NAME - return ANCHOR_GENERATOR_REGISTRY.get(anchor_generator)(cfg, input_shape) diff --git a/detectron2/detectron2/modeling/backbone/__init__.py b/detectron2/detectron2/modeling/backbone/__init__.py deleted file mode 100644 index 5b3358a4061b143c78eba8e7bf81fe9f7ffac1aa..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/backbone/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .build import build_backbone, BACKBONE_REGISTRY # noqa F401 isort:skip - -from .backbone import Backbone -from .fpn import FPN -from .regnet import RegNet -from .resnet import ( - BasicStem, - ResNet, - ResNetBlockBase, - build_resnet_backbone, - make_stage, - BottleneckBlock, -) -from .vit import ViT, SimpleFeaturePyramid, get_vit_lr_decay_rate -from .mvit import MViT -from .swin import SwinTransformer - -__all__ = [k for k in globals().keys() if not k.startswith("_")] -# TODO can expose more resnet blocks after careful consideration diff --git a/detectron2/detectron2/modeling/backbone/backbone.py b/detectron2/detectron2/modeling/backbone/backbone.py deleted file mode 100644 index e1c765a6b38542f66cae55216bba697a6626d128..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/backbone/backbone.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from abc import ABCMeta, abstractmethod -from typing import Dict -import torch.nn as nn - -from detectron2.layers import ShapeSpec - -__all__ = ["Backbone"] - - -class Backbone(nn.Module, metaclass=ABCMeta): - """ - Abstract base class for network backbones. - """ - - def __init__(self): - """ - The `__init__` method of any subclass can specify its own set of arguments. - """ - super().__init__() - - @abstractmethod - def forward(self): - """ - Subclasses must override this method, but adhere to the same return type. - - Returns: - dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor - """ - pass - - @property - def size_divisibility(self) -> int: - """ - Some backbones require the input height and width to be divisible by a - specific integer. This is typically true for encoder / decoder type networks - with lateral connection (e.g., FPN) for which feature maps need to match - dimension in the "bottom up" and "top down" paths. Set to 0 if no specific - input size divisibility is required. - """ - return 0 - - @property - def padding_constraints(self) -> Dict[str, int]: - """ - This property is a generalization of size_divisibility. Some backbones and training - recipes require specific padding constraints, such as enforcing divisibility by a specific - integer (e.g., FPN) or padding to a square (e.g., ViTDet with large-scale jitter - in :paper:vitdet). `padding_constraints` contains these optional items like: - { - "size_divisibility": int, - "square_size": int, - # Future options are possible - } - `size_divisibility` will read from here if presented and `square_size` indicates the - square padding size if `square_size` > 0. - - TODO: use type of Dict[str, int] to avoid torchscipt issues. The type of padding_constraints - could be generalized as TypedDict (Python 3.8+) to support more types in the future. - """ - return {} - - def output_shape(self): - """ - Returns: - dict[str->ShapeSpec] - """ - # this is a backward-compatible default - return { - name: ShapeSpec( - channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] - ) - for name in self._out_features - } diff --git a/detectron2/detectron2/modeling/backbone/build.py b/detectron2/detectron2/modeling/backbone/build.py deleted file mode 100644 index af02141172bebe9a2a27a88c81673c2710b4d73f..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/backbone/build.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from detectron2.layers import ShapeSpec -from detectron2.utils.registry import Registry - -from .backbone import Backbone - -BACKBONE_REGISTRY = Registry("BACKBONE") -BACKBONE_REGISTRY.__doc__ = """ -Registry for backbones, which extract feature maps from images - -The registered object must be a callable that accepts two arguments: - -1. A :class:`detectron2.config.CfgNode` -2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification. - -Registered object must return instance of :class:`Backbone`. -""" - - -def build_backbone(cfg, input_shape=None): - """ - Build a backbone from `cfg.MODEL.BACKBONE.NAME`. - - Returns: - an instance of :class:`Backbone` - """ - if input_shape is None: - input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) - - backbone_name = cfg.MODEL.BACKBONE.NAME - backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) - assert isinstance(backbone, Backbone) - return backbone diff --git a/detectron2/detectron2/modeling/backbone/fpn.py b/detectron2/detectron2/modeling/backbone/fpn.py deleted file mode 100644 index 19d24e13f069ecb389edcdb4d9859506fe9e6f76..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/backbone/fpn.py +++ /dev/null @@ -1,268 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import math -import fvcore.nn.weight_init as weight_init -import torch -import torch.nn.functional as F -from torch import nn - -from detectron2.layers import Conv2d, ShapeSpec, get_norm - -from .backbone import Backbone -from .build import BACKBONE_REGISTRY -from .resnet import build_resnet_backbone - -__all__ = ["build_resnet_fpn_backbone", "build_retinanet_resnet_fpn_backbone", "FPN"] - - -class FPN(Backbone): - """ - This module implements :paper:`FPN`. - It creates pyramid features built on top of some input feature maps. - """ - - _fuse_type: torch.jit.Final[str] - - def __init__( - self, - bottom_up, - in_features, - out_channels, - norm="", - top_block=None, - fuse_type="sum", - square_pad=0, - ): - """ - Args: - bottom_up (Backbone): module representing the bottom up subnetwork. - Must be a subclass of :class:`Backbone`. The multi-scale feature - maps generated by the bottom up network, and listed in `in_features`, - are used to generate FPN levels. - in_features (list[str]): names of the input feature maps coming - from the backbone to which FPN is attached. For example, if the - backbone produces ["res2", "res3", "res4"], any *contiguous* sublist - of these may be used; order must be from high to low resolution. - out_channels (int): number of channels in the output feature maps. - norm (str): the normalization to use. - top_block (nn.Module or None): if provided, an extra operation will - be performed on the output of the last (smallest resolution) - FPN output, and the result will extend the result list. The top_block - further downsamples the feature map. It must have an attribute - "num_levels", meaning the number of extra FPN levels added by - this block, and "in_feature", which is a string representing - its input feature (e.g., p5). - fuse_type (str): types for fusing the top down features and the lateral - ones. It can be "sum" (default), which sums up element-wise; or "avg", - which takes the element-wise mean of the two. - square_pad (int): If > 0, require input images to be padded to specific square size. - """ - super(FPN, self).__init__() - assert isinstance(bottom_up, Backbone) - assert in_features, in_features - - # Feature map strides and channels from the bottom up network (e.g. ResNet) - input_shapes = bottom_up.output_shape() - strides = [input_shapes[f].stride for f in in_features] - in_channels_per_feature = [input_shapes[f].channels for f in in_features] - - _assert_strides_are_log2_contiguous(strides) - lateral_convs = [] - output_convs = [] - - use_bias = norm == "" - for idx, in_channels in enumerate(in_channels_per_feature): - lateral_norm = get_norm(norm, out_channels) - output_norm = get_norm(norm, out_channels) - - lateral_conv = Conv2d( - in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm - ) - output_conv = Conv2d( - out_channels, - out_channels, - kernel_size=3, - stride=1, - padding=1, - bias=use_bias, - norm=output_norm, - ) - weight_init.c2_xavier_fill(lateral_conv) - weight_init.c2_xavier_fill(output_conv) - stage = int(math.log2(strides[idx])) - self.add_module("fpn_lateral{}".format(stage), lateral_conv) - self.add_module("fpn_output{}".format(stage), output_conv) - - lateral_convs.append(lateral_conv) - output_convs.append(output_conv) - # Place convs into top-down order (from low to high resolution) - # to make the top-down computation in forward clearer. - self.lateral_convs = lateral_convs[::-1] - self.output_convs = output_convs[::-1] - self.top_block = top_block - self.in_features = tuple(in_features) - self.bottom_up = bottom_up - # Return feature names are "p", like ["p2", "p3", ..., "p6"] - self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in strides} - # top block output feature maps. - if self.top_block is not None: - for s in range(stage, stage + self.top_block.num_levels): - self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1) - - self._out_features = list(self._out_feature_strides.keys()) - self._out_feature_channels = {k: out_channels for k in self._out_features} - self._size_divisibility = strides[-1] - self._square_pad = square_pad - assert fuse_type in {"avg", "sum"} - self._fuse_type = fuse_type - - @property - def size_divisibility(self): - return self._size_divisibility - - @property - def padding_constraints(self): - return {"square_size": self._square_pad} - - def forward(self, x): - """ - Args: - input (dict[str->Tensor]): mapping feature map name (e.g., "res5") to - feature map tensor for each feature level in high to low resolution order. - - Returns: - dict[str->Tensor]: - mapping from feature map name to FPN feature map tensor - in high to low resolution order. Returned feature names follow the FPN - paper convention: "p", where stage has stride = 2 ** stage e.g., - ["p2", "p3", ..., "p6"]. - """ - bottom_up_features = self.bottom_up(x) - results = [] - prev_features = self.lateral_convs[0](bottom_up_features[self.in_features[-1]]) - results.append(self.output_convs[0](prev_features)) - - # Reverse feature maps into top-down order (from low to high resolution) - for idx, (lateral_conv, output_conv) in enumerate( - zip(self.lateral_convs, self.output_convs) - ): - # Slicing of ModuleList is not supported https://github.com/pytorch/pytorch/issues/47336 - # Therefore we loop over all modules but skip the first one - if idx > 0: - features = self.in_features[-idx - 1] - features = bottom_up_features[features] - top_down_features = F.interpolate(prev_features, scale_factor=2.0, mode="nearest") - lateral_features = lateral_conv(features) - prev_features = lateral_features + top_down_features - if self._fuse_type == "avg": - prev_features /= 2 - results.insert(0, output_conv(prev_features)) - - if self.top_block is not None: - if self.top_block.in_feature in bottom_up_features: - top_block_in_feature = bottom_up_features[self.top_block.in_feature] - else: - top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)] - results.extend(self.top_block(top_block_in_feature)) - assert len(self._out_features) == len(results) - return {f: res for f, res in zip(self._out_features, results)} - - def output_shape(self): - return { - name: ShapeSpec( - channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] - ) - for name in self._out_features - } - - -def _assert_strides_are_log2_contiguous(strides): - """ - Assert that each stride is 2x times its preceding stride, i.e. "contiguous in log2". - """ - for i, stride in enumerate(strides[1:], 1): - assert stride == 2 * strides[i - 1], "Strides {} {} are not log2 contiguous".format( - stride, strides[i - 1] - ) - - -class LastLevelMaxPool(nn.Module): - """ - This module is used in the original FPN to generate a downsampled - P6 feature from P5. - """ - - def __init__(self): - super().__init__() - self.num_levels = 1 - self.in_feature = "p5" - - def forward(self, x): - return [F.max_pool2d(x, kernel_size=1, stride=2, padding=0)] - - -class LastLevelP6P7(nn.Module): - """ - This module is used in RetinaNet to generate extra layers, P6 and P7 from - C5 feature. - """ - - def __init__(self, in_channels, out_channels, in_feature="res5"): - super().__init__() - self.num_levels = 2 - self.in_feature = in_feature - self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) - self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) - for module in [self.p6, self.p7]: - weight_init.c2_xavier_fill(module) - - def forward(self, c5): - p6 = self.p6(c5) - p7 = self.p7(F.relu(p6)) - return [p6, p7] - - -@BACKBONE_REGISTRY.register() -def build_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): - """ - Args: - cfg: a detectron2 CfgNode - - Returns: - backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. - """ - bottom_up = build_resnet_backbone(cfg, input_shape) - in_features = cfg.MODEL.FPN.IN_FEATURES - out_channels = cfg.MODEL.FPN.OUT_CHANNELS - backbone = FPN( - bottom_up=bottom_up, - in_features=in_features, - out_channels=out_channels, - norm=cfg.MODEL.FPN.NORM, - top_block=LastLevelMaxPool(), - fuse_type=cfg.MODEL.FPN.FUSE_TYPE, - ) - return backbone - - -@BACKBONE_REGISTRY.register() -def build_retinanet_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): - """ - Args: - cfg: a detectron2 CfgNode - - Returns: - backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. - """ - bottom_up = build_resnet_backbone(cfg, input_shape) - in_features = cfg.MODEL.FPN.IN_FEATURES - out_channels = cfg.MODEL.FPN.OUT_CHANNELS - in_channels_p6p7 = bottom_up.output_shape()["res5"].channels - backbone = FPN( - bottom_up=bottom_up, - in_features=in_features, - out_channels=out_channels, - norm=cfg.MODEL.FPN.NORM, - top_block=LastLevelP6P7(in_channels_p6p7, out_channels), - fuse_type=cfg.MODEL.FPN.FUSE_TYPE, - ) - return backbone diff --git a/detectron2/detectron2/modeling/backbone/mvit.py b/detectron2/detectron2/modeling/backbone/mvit.py deleted file mode 100644 index 50667a8a836b933666761cc09d4175e64098c8aa..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/backbone/mvit.py +++ /dev/null @@ -1,448 +0,0 @@ -import logging -import numpy as np -import torch -import torch.nn as nn - -from .backbone import Backbone -from .utils import ( - PatchEmbed, - add_decomposed_rel_pos, - get_abs_pos, - window_partition, - window_unpartition, -) - -logger = logging.getLogger(__name__) - - -__all__ = ["MViT"] - - -def attention_pool(x, pool, norm=None): - # (B, H, W, C) -> (B, C, H, W) - x = x.permute(0, 3, 1, 2) - x = pool(x) - # (B, C, H1, W1) -> (B, H1, W1, C) - x = x.permute(0, 2, 3, 1) - if norm: - x = norm(x) - - return x - - -class MultiScaleAttention(nn.Module): - """Multiscale Multi-head Attention block.""" - - def __init__( - self, - dim, - dim_out, - num_heads, - qkv_bias=True, - norm_layer=nn.LayerNorm, - pool_kernel=(3, 3), - stride_q=1, - stride_kv=1, - residual_pooling=True, - window_size=0, - use_rel_pos=False, - rel_pos_zero_init=True, - input_size=None, - ): - """ - Args: - dim (int): Number of input channels. - dim_out (int): Number of output channels. - num_heads (int): Number of attention heads. - qkv_bias (bool: If True, add a learnable bias to query, key, value. - norm_layer (nn.Module): Normalization layer. - pool_kernel (tuple): kernel size for qkv pooling layers. - stride_q (int): stride size for q pooling layer. - stride_kv (int): stride size for kv pooling layer. - residual_pooling (bool): If true, enable residual pooling. - use_rel_pos (bool): If True, add relative postional embeddings to the attention map. - rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. - input_size (int or None): Input resolution. - """ - super().__init__() - self.num_heads = num_heads - head_dim = dim_out // num_heads - self.scale = head_dim**-0.5 - - self.qkv = nn.Linear(dim, dim_out * 3, bias=qkv_bias) - self.proj = nn.Linear(dim_out, dim_out) - - # qkv pooling - pool_padding = [k // 2 for k in pool_kernel] - dim_conv = dim_out // num_heads - self.pool_q = nn.Conv2d( - dim_conv, - dim_conv, - pool_kernel, - stride=stride_q, - padding=pool_padding, - groups=dim_conv, - bias=False, - ) - self.norm_q = norm_layer(dim_conv) - self.pool_k = nn.Conv2d( - dim_conv, - dim_conv, - pool_kernel, - stride=stride_kv, - padding=pool_padding, - groups=dim_conv, - bias=False, - ) - self.norm_k = norm_layer(dim_conv) - self.pool_v = nn.Conv2d( - dim_conv, - dim_conv, - pool_kernel, - stride=stride_kv, - padding=pool_padding, - groups=dim_conv, - bias=False, - ) - self.norm_v = norm_layer(dim_conv) - - self.window_size = window_size - if window_size: - self.q_win_size = window_size // stride_q - self.kv_win_size = window_size // stride_kv - self.residual_pooling = residual_pooling - - self.use_rel_pos = use_rel_pos - if self.use_rel_pos: - # initialize relative positional embeddings - assert input_size[0] == input_size[1] - size = input_size[0] - rel_dim = 2 * max(size // stride_q, size // stride_kv) - 1 - self.rel_pos_h = nn.Parameter(torch.zeros(rel_dim, head_dim)) - self.rel_pos_w = nn.Parameter(torch.zeros(rel_dim, head_dim)) - - if not rel_pos_zero_init: - nn.init.trunc_normal_(self.rel_pos_h, std=0.02) - nn.init.trunc_normal_(self.rel_pos_w, std=0.02) - - def forward(self, x): - B, H, W, _ = x.shape - # qkv with shape (3, B, nHead, H, W, C) - qkv = self.qkv(x).reshape(B, H, W, 3, self.num_heads, -1).permute(3, 0, 4, 1, 2, 5) - # q, k, v with shape (B * nHead, H, W, C) - q, k, v = qkv.reshape(3, B * self.num_heads, H, W, -1).unbind(0) - - q = attention_pool(q, self.pool_q, self.norm_q) - k = attention_pool(k, self.pool_k, self.norm_k) - v = attention_pool(v, self.pool_v, self.norm_v) - - ori_q = q - if self.window_size: - q, q_hw_pad = window_partition(q, self.q_win_size) - k, kv_hw_pad = window_partition(k, self.kv_win_size) - v, _ = window_partition(v, self.kv_win_size) - q_hw = (self.q_win_size, self.q_win_size) - kv_hw = (self.kv_win_size, self.kv_win_size) - else: - q_hw = q.shape[1:3] - kv_hw = k.shape[1:3] - - q = q.view(q.shape[0], np.prod(q_hw), -1) - k = k.view(k.shape[0], np.prod(kv_hw), -1) - v = v.view(v.shape[0], np.prod(kv_hw), -1) - - attn = (q * self.scale) @ k.transpose(-2, -1) - - if self.use_rel_pos: - attn = add_decomposed_rel_pos(attn, q, self.rel_pos_h, self.rel_pos_w, q_hw, kv_hw) - - attn = attn.softmax(dim=-1) - x = attn @ v - - x = x.view(x.shape[0], q_hw[0], q_hw[1], -1) - - if self.window_size: - x = window_unpartition(x, self.q_win_size, q_hw_pad, ori_q.shape[1:3]) - - if self.residual_pooling: - x += ori_q - - H, W = x.shape[1], x.shape[2] - x = x.view(B, self.num_heads, H, W, -1).permute(0, 2, 3, 1, 4).reshape(B, H, W, -1) - x = self.proj(x) - - return x - - -class MultiScaleBlock(nn.Module): - """Multiscale Transformer blocks""" - - def __init__( - self, - dim, - dim_out, - num_heads, - mlp_ratio=4.0, - qkv_bias=True, - drop_path=0.0, - norm_layer=nn.LayerNorm, - act_layer=nn.GELU, - qkv_pool_kernel=(3, 3), - stride_q=1, - stride_kv=1, - residual_pooling=True, - window_size=0, - use_rel_pos=False, - rel_pos_zero_init=True, - input_size=None, - ): - """ - Args: - dim (int): Number of input channels. - dim_out (int): Number of output channels. - num_heads (int): Number of attention heads in the MViT block. - mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. - qkv_bias (bool): If True, add a learnable bias to query, key, value. - drop_path (float): Stochastic depth rate. - norm_layer (nn.Module): Normalization layer. - act_layer (nn.Module): Activation layer. - qkv_pool_kernel (tuple): kernel size for qkv pooling layers. - stride_q (int): stride size for q pooling layer. - stride_kv (int): stride size for kv pooling layer. - residual_pooling (bool): If true, enable residual pooling. - window_size (int): Window size for window attention blocks. If it equals 0, then not - use window attention. - use_rel_pos (bool): If True, add relative postional embeddings to the attention map. - rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. - input_size (int or None): Input resolution. - """ - super().__init__() - self.norm1 = norm_layer(dim) - self.attn = MultiScaleAttention( - dim, - dim_out, - num_heads=num_heads, - qkv_bias=qkv_bias, - norm_layer=norm_layer, - pool_kernel=qkv_pool_kernel, - stride_q=stride_q, - stride_kv=stride_kv, - residual_pooling=residual_pooling, - window_size=window_size, - use_rel_pos=use_rel_pos, - rel_pos_zero_init=rel_pos_zero_init, - input_size=input_size, - ) - - from timm.models.layers import DropPath, Mlp - - self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() - self.norm2 = norm_layer(dim_out) - self.mlp = Mlp( - in_features=dim_out, - hidden_features=int(dim_out * mlp_ratio), - out_features=dim_out, - act_layer=act_layer, - ) - - if dim != dim_out: - self.proj = nn.Linear(dim, dim_out) - - if stride_q > 1: - kernel_skip = stride_q + 1 - padding_skip = int(kernel_skip // 2) - self.pool_skip = nn.MaxPool2d(kernel_skip, stride_q, padding_skip, ceil_mode=False) - - def forward(self, x): - x_norm = self.norm1(x) - x_block = self.attn(x_norm) - - if hasattr(self, "proj"): - x = self.proj(x_norm) - if hasattr(self, "pool_skip"): - x = attention_pool(x, self.pool_skip) - - x = x + self.drop_path(x_block) - x = x + self.drop_path(self.mlp(self.norm2(x))) - - return x - - -class MViT(Backbone): - """ - This module implements Multiscale Vision Transformer (MViT) backbone in :paper:'mvitv2'. - """ - - def __init__( - self, - img_size=224, - patch_kernel=(7, 7), - patch_stride=(4, 4), - patch_padding=(3, 3), - in_chans=3, - embed_dim=96, - depth=16, - num_heads=1, - last_block_indexes=(0, 2, 11, 15), - qkv_pool_kernel=(3, 3), - adaptive_kv_stride=4, - adaptive_window_size=56, - residual_pooling=True, - mlp_ratio=4.0, - qkv_bias=True, - drop_path_rate=0.0, - norm_layer=nn.LayerNorm, - act_layer=nn.GELU, - use_abs_pos=False, - use_rel_pos=True, - rel_pos_zero_init=True, - use_act_checkpoint=False, - pretrain_img_size=224, - pretrain_use_cls_token=True, - out_features=("scale2", "scale3", "scale4", "scale5"), - ): - """ - Args: - img_size (int): Input image size. - patch_kernel (tuple): kernel size for patch embedding. - patch_stride (tuple): stride size for patch embedding. - patch_padding (tuple): padding size for patch embedding. - in_chans (int): Number of input image channels. - embed_dim (int): Patch embedding dimension. - depth (int): Depth of MViT. - num_heads (int): Number of base attention heads in each MViT block. - last_block_indexes (tuple): Block indexes for last blocks in each stage. - qkv_pool_kernel (tuple): kernel size for qkv pooling layers. - adaptive_kv_stride (int): adaptive stride size for kv pooling. - adaptive_window_size (int): adaptive window size for window attention blocks. - residual_pooling (bool): If true, enable residual pooling. - mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. - qkv_bias (bool): If True, add a learnable bias to query, key, value. - drop_path_rate (float): Stochastic depth rate. - norm_layer (nn.Module): Normalization layer. - act_layer (nn.Module): Activation layer. - use_abs_pos (bool): If True, use absolute positional embeddings. - use_rel_pos (bool): If True, add relative postional embeddings to the attention map. - rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. - window_size (int): Window size for window attention blocks. - use_act_checkpoint (bool): If True, use activation checkpointing. - pretrain_img_size (int): input image size for pretraining models. - pretrain_use_cls_token (bool): If True, pretrainig models use class token. - out_features (tuple): name of the feature maps from each stage. - """ - super().__init__() - self.pretrain_use_cls_token = pretrain_use_cls_token - - self.patch_embed = PatchEmbed( - kernel_size=patch_kernel, - stride=patch_stride, - padding=patch_padding, - in_chans=in_chans, - embed_dim=embed_dim, - ) - - if use_abs_pos: - # Initialize absoluate positional embedding with pretrain image size. - num_patches = (pretrain_img_size // patch_stride[0]) * ( - pretrain_img_size // patch_stride[1] - ) - num_positions = (num_patches + 1) if pretrain_use_cls_token else num_patches - self.pos_embed = nn.Parameter(torch.zeros(1, num_positions, embed_dim)) - else: - self.pos_embed = None - - # stochastic depth decay rule - dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] - dim_out = embed_dim - stride_kv = adaptive_kv_stride - window_size = adaptive_window_size - input_size = (img_size // patch_stride[0], img_size // patch_stride[1]) - stage = 2 - stride = patch_stride[0] - self._out_feature_strides = {} - self._out_feature_channels = {} - self.blocks = nn.ModuleList() - for i in range(depth): - # Multiply stride_kv by 2 if it's the last block of stage2 and stage3. - if i == last_block_indexes[1] or i == last_block_indexes[2]: - stride_kv_ = stride_kv * 2 - else: - stride_kv_ = stride_kv - # hybrid window attention: global attention in last three stages. - window_size_ = 0 if i in last_block_indexes[1:] else window_size - block = MultiScaleBlock( - dim=embed_dim, - dim_out=dim_out, - num_heads=num_heads, - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - drop_path=dpr[i], - norm_layer=norm_layer, - qkv_pool_kernel=qkv_pool_kernel, - stride_q=2 if i - 1 in last_block_indexes else 1, - stride_kv=stride_kv_, - residual_pooling=residual_pooling, - window_size=window_size_, - use_rel_pos=use_rel_pos, - rel_pos_zero_init=rel_pos_zero_init, - input_size=input_size, - ) - if use_act_checkpoint: - # TODO: use torch.utils.checkpoint - from fairscale.nn.checkpoint import checkpoint_wrapper - - block = checkpoint_wrapper(block) - self.blocks.append(block) - - embed_dim = dim_out - if i in last_block_indexes: - name = f"scale{stage}" - if name in out_features: - self._out_feature_channels[name] = dim_out - self._out_feature_strides[name] = stride - self.add_module(f"{name}_norm", norm_layer(dim_out)) - - dim_out *= 2 - num_heads *= 2 - stride_kv = max(stride_kv // 2, 1) - stride *= 2 - stage += 1 - if i - 1 in last_block_indexes: - window_size = window_size // 2 - input_size = [s // 2 for s in input_size] - - self._out_features = out_features - self._last_block_indexes = last_block_indexes - - if self.pos_embed is not None: - nn.init.trunc_normal_(self.pos_embed, std=0.02) - - self.apply(self._init_weights) - - def _init_weights(self, m): - if isinstance(m, nn.Linear): - nn.init.trunc_normal_(m.weight, std=0.02) - if isinstance(m, nn.Linear) and m.bias is not None: - nn.init.constant_(m.bias, 0) - elif isinstance(m, nn.LayerNorm): - nn.init.constant_(m.bias, 0) - nn.init.constant_(m.weight, 1.0) - - def forward(self, x): - x = self.patch_embed(x) - - if self.pos_embed is not None: - x = x + get_abs_pos(self.pos_embed, self.pretrain_use_cls_token, x.shape[1:3]) - - outputs = {} - stage = 2 - for i, blk in enumerate(self.blocks): - x = blk(x) - if i in self._last_block_indexes: - name = f"scale{stage}" - if name in self._out_features: - x_out = getattr(self, f"{name}_norm")(x) - outputs[name] = x_out.permute(0, 3, 1, 2) - stage += 1 - - return outputs diff --git a/detectron2/detectron2/modeling/backbone/regnet.py b/detectron2/detectron2/modeling/backbone/regnet.py deleted file mode 100644 index 56d7baf44255e620d5bd18a34aac16dd18476b31..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/backbone/regnet.py +++ /dev/null @@ -1,452 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -Implementation of RegNet models from :paper:`dds` and :paper:`scaling`. - -This code is adapted from https://github.com/facebookresearch/pycls with minimal modifications. -Some code duplication exists between RegNet and ResNets (e.g., ResStem) in order to simplify -model loading. -""" - -import numpy as np -from torch import nn - -from detectron2.layers import CNNBlockBase, ShapeSpec, get_norm - -from .backbone import Backbone - -__all__ = [ - "AnyNet", - "RegNet", - "ResStem", - "SimpleStem", - "VanillaBlock", - "ResBasicBlock", - "ResBottleneckBlock", -] - - -def conv2d(w_in, w_out, k, *, stride=1, groups=1, bias=False): - """Helper for building a conv2d layer.""" - assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." - s, p, g, b = stride, (k - 1) // 2, groups, bias - return nn.Conv2d(w_in, w_out, k, stride=s, padding=p, groups=g, bias=b) - - -def gap2d(): - """Helper for building a global average pooling layer.""" - return nn.AdaptiveAvgPool2d((1, 1)) - - -def pool2d(k, *, stride=1): - """Helper for building a pool2d layer.""" - assert k % 2 == 1, "Only odd size kernels supported to avoid padding issues." - return nn.MaxPool2d(k, stride=stride, padding=(k - 1) // 2) - - -def init_weights(m): - """Performs ResNet-style weight initialization.""" - if isinstance(m, nn.Conv2d): - # Note that there is no bias due to BN - fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(mean=0.0, std=np.sqrt(2.0 / fan_out)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1.0) - m.bias.data.zero_() - elif isinstance(m, nn.Linear): - m.weight.data.normal_(mean=0.0, std=0.01) - m.bias.data.zero_() - - -class ResStem(CNNBlockBase): - """ResNet stem for ImageNet: 7x7, BN, AF, MaxPool.""" - - def __init__(self, w_in, w_out, norm, activation_class): - super().__init__(w_in, w_out, 4) - self.conv = conv2d(w_in, w_out, 7, stride=2) - self.bn = get_norm(norm, w_out) - self.af = activation_class() - self.pool = pool2d(3, stride=2) - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - -class SimpleStem(CNNBlockBase): - """Simple stem for ImageNet: 3x3, BN, AF.""" - - def __init__(self, w_in, w_out, norm, activation_class): - super().__init__(w_in, w_out, 2) - self.conv = conv2d(w_in, w_out, 3, stride=2) - self.bn = get_norm(norm, w_out) - self.af = activation_class() - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - -class SE(nn.Module): - """Squeeze-and-Excitation (SE) block: AvgPool, FC, Act, FC, Sigmoid.""" - - def __init__(self, w_in, w_se, activation_class): - super().__init__() - self.avg_pool = gap2d() - self.f_ex = nn.Sequential( - conv2d(w_in, w_se, 1, bias=True), - activation_class(), - conv2d(w_se, w_in, 1, bias=True), - nn.Sigmoid(), - ) - - def forward(self, x): - return x * self.f_ex(self.avg_pool(x)) - - -class VanillaBlock(CNNBlockBase): - """Vanilla block: [3x3 conv, BN, Relu] x2.""" - - def __init__(self, w_in, w_out, stride, norm, activation_class, _params): - super().__init__(w_in, w_out, stride) - self.a = conv2d(w_in, w_out, 3, stride=stride) - self.a_bn = get_norm(norm, w_out) - self.a_af = activation_class() - self.b = conv2d(w_out, w_out, 3) - self.b_bn = get_norm(norm, w_out) - self.b_af = activation_class() - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - -class BasicTransform(nn.Module): - """Basic transformation: [3x3 conv, BN, Relu] x2.""" - - def __init__(self, w_in, w_out, stride, norm, activation_class, _params): - super().__init__() - self.a = conv2d(w_in, w_out, 3, stride=stride) - self.a_bn = get_norm(norm, w_out) - self.a_af = activation_class() - self.b = conv2d(w_out, w_out, 3) - self.b_bn = get_norm(norm, w_out) - self.b_bn.final_bn = True - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - -class ResBasicBlock(CNNBlockBase): - """Residual basic block: x + f(x), f = basic transform.""" - - def __init__(self, w_in, w_out, stride, norm, activation_class, params): - super().__init__(w_in, w_out, stride) - self.proj, self.bn = None, None - if (w_in != w_out) or (stride != 1): - self.proj = conv2d(w_in, w_out, 1, stride=stride) - self.bn = get_norm(norm, w_out) - self.f = BasicTransform(w_in, w_out, stride, norm, activation_class, params) - self.af = activation_class() - - def forward(self, x): - x_p = self.bn(self.proj(x)) if self.proj else x - return self.af(x_p + self.f(x)) - - -class BottleneckTransform(nn.Module): - """Bottleneck transformation: 1x1, 3x3 [+SE], 1x1.""" - - def __init__(self, w_in, w_out, stride, norm, activation_class, params): - super().__init__() - w_b = int(round(w_out * params["bot_mul"])) - w_se = int(round(w_in * params["se_r"])) - groups = w_b // params["group_w"] - self.a = conv2d(w_in, w_b, 1) - self.a_bn = get_norm(norm, w_b) - self.a_af = activation_class() - self.b = conv2d(w_b, w_b, 3, stride=stride, groups=groups) - self.b_bn = get_norm(norm, w_b) - self.b_af = activation_class() - self.se = SE(w_b, w_se, activation_class) if w_se else None - self.c = conv2d(w_b, w_out, 1) - self.c_bn = get_norm(norm, w_out) - self.c_bn.final_bn = True - - def forward(self, x): - for layer in self.children(): - x = layer(x) - return x - - -class ResBottleneckBlock(CNNBlockBase): - """Residual bottleneck block: x + f(x), f = bottleneck transform.""" - - def __init__(self, w_in, w_out, stride, norm, activation_class, params): - super().__init__(w_in, w_out, stride) - self.proj, self.bn = None, None - if (w_in != w_out) or (stride != 1): - self.proj = conv2d(w_in, w_out, 1, stride=stride) - self.bn = get_norm(norm, w_out) - self.f = BottleneckTransform(w_in, w_out, stride, norm, activation_class, params) - self.af = activation_class() - - def forward(self, x): - x_p = self.bn(self.proj(x)) if self.proj else x - return self.af(x_p + self.f(x)) - - -class AnyStage(nn.Module): - """AnyNet stage (sequence of blocks w/ the same output shape).""" - - def __init__(self, w_in, w_out, stride, d, block_class, norm, activation_class, params): - super().__init__() - for i in range(d): - block = block_class(w_in, w_out, stride, norm, activation_class, params) - self.add_module("b{}".format(i + 1), block) - stride, w_in = 1, w_out - - def forward(self, x): - for block in self.children(): - x = block(x) - return x - - -class AnyNet(Backbone): - """AnyNet model. See :paper:`dds`.""" - - def __init__( - self, - *, - stem_class, - stem_width, - block_class, - depths, - widths, - group_widths, - strides, - bottleneck_ratios, - se_ratio, - activation_class, - freeze_at=0, - norm="BN", - out_features=None, - ): - """ - Args: - stem_class (callable): A callable taking 4 arguments (channels in, channels out, - normalization, callable returning an activation function) that returns another - callable implementing the stem module. - stem_width (int): The number of output channels that the stem produces. - block_class (callable): A callable taking 6 arguments (channels in, channels out, - stride, normalization, callable returning an activation function, a dict of - block-specific parameters) that returns another callable implementing the repeated - block module. - depths (list[int]): Number of blocks in each stage. - widths (list[int]): For each stage, the number of output channels of each block. - group_widths (list[int]): For each stage, the number of channels per group in group - convolution, if the block uses group convolution. - strides (list[int]): The stride that each network stage applies to its input. - bottleneck_ratios (list[float]): For each stage, the ratio of the number of bottleneck - channels to the number of block input channels (or, equivalently, output channels), - if the block uses a bottleneck. - se_ratio (float): The ratio of the number of channels used inside the squeeze-excitation - (SE) module to it number of input channels, if SE the block uses SE. - activation_class (callable): A callable taking no arguments that returns another - callable implementing an activation function. - freeze_at (int): The number of stages at the beginning to freeze. - see :meth:`freeze` for detailed explanation. - norm (str or callable): normalization for all conv layers. - See :func:`layers.get_norm` for supported format. - out_features (list[str]): name of the layers whose outputs should - be returned in forward. RegNet's use "stem" and "s1", "s2", etc for the stages after - the stem. If None, will return the output of the last layer. - """ - super().__init__() - self.stem = stem_class(3, stem_width, norm, activation_class) - - current_stride = self.stem.stride - self._out_feature_strides = {"stem": current_stride} - self._out_feature_channels = {"stem": self.stem.out_channels} - self.stages_and_names = [] - prev_w = stem_width - - for i, (d, w, s, b, g) in enumerate( - zip(depths, widths, strides, bottleneck_ratios, group_widths) - ): - params = {"bot_mul": b, "group_w": g, "se_r": se_ratio} - stage = AnyStage(prev_w, w, s, d, block_class, norm, activation_class, params) - name = "s{}".format(i + 1) - self.add_module(name, stage) - self.stages_and_names.append((stage, name)) - self._out_feature_strides[name] = current_stride = int( - current_stride * np.prod([k.stride for k in stage.children()]) - ) - self._out_feature_channels[name] = list(stage.children())[-1].out_channels - prev_w = w - - self.apply(init_weights) - - if out_features is None: - out_features = [name] - self._out_features = out_features - assert len(self._out_features) - children = [x[0] for x in self.named_children()] - for out_feature in self._out_features: - assert out_feature in children, "Available children: {} does not include {}".format( - ", ".join(children), out_feature - ) - self.freeze(freeze_at) - - def forward(self, x): - """ - Args: - x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``. - - Returns: - dict[str->Tensor]: names and the corresponding features - """ - assert x.dim() == 4, f"Model takes an input of shape (N, C, H, W). Got {x.shape} instead!" - outputs = {} - x = self.stem(x) - if "stem" in self._out_features: - outputs["stem"] = x - for stage, name in self.stages_and_names: - x = stage(x) - if name in self._out_features: - outputs[name] = x - return outputs - - def output_shape(self): - return { - name: ShapeSpec( - channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] - ) - for name in self._out_features - } - - def freeze(self, freeze_at=0): - """ - Freeze the first several stages of the model. Commonly used in fine-tuning. - - Layers that produce the same feature map spatial size are defined as one - "stage" by :paper:`FPN`. - - Args: - freeze_at (int): number of stages to freeze. - `1` means freezing the stem. `2` means freezing the stem and - one residual stage, etc. - - Returns: - nn.Module: this model itself - """ - if freeze_at >= 1: - self.stem.freeze() - for idx, (stage, _) in enumerate(self.stages_and_names, start=2): - if freeze_at >= idx: - for block in stage.children(): - block.freeze() - return self - - -def adjust_block_compatibility(ws, bs, gs): - """Adjusts the compatibility of widths, bottlenecks, and groups.""" - assert len(ws) == len(bs) == len(gs) - assert all(w > 0 and b > 0 and g > 0 for w, b, g in zip(ws, bs, gs)) - vs = [int(max(1, w * b)) for w, b in zip(ws, bs)] - gs = [int(min(g, v)) for g, v in zip(gs, vs)] - ms = [np.lcm(g, b) if b > 1 else g for g, b in zip(gs, bs)] - vs = [max(m, int(round(v / m) * m)) for v, m in zip(vs, ms)] - ws = [int(v / b) for v, b in zip(vs, bs)] - assert all(w * b % g == 0 for w, b, g in zip(ws, bs, gs)) - return ws, bs, gs - - -def generate_regnet_parameters(w_a, w_0, w_m, d, q=8): - """Generates per stage widths and depths from RegNet parameters.""" - assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0 - # Generate continuous per-block ws - ws_cont = np.arange(d) * w_a + w_0 - # Generate quantized per-block ws - ks = np.round(np.log(ws_cont / w_0) / np.log(w_m)) - ws_all = w_0 * np.power(w_m, ks) - ws_all = np.round(np.divide(ws_all, q)).astype(int) * q - # Generate per stage ws and ds (assumes ws_all are sorted) - ws, ds = np.unique(ws_all, return_counts=True) - # Compute number of actual stages and total possible stages - num_stages, total_stages = len(ws), ks.max() + 1 - # Convert numpy arrays to lists and return - ws, ds, ws_all, ws_cont = (x.tolist() for x in (ws, ds, ws_all, ws_cont)) - return ws, ds, num_stages, total_stages, ws_all, ws_cont - - -class RegNet(AnyNet): - """RegNet model. See :paper:`dds`.""" - - def __init__( - self, - *, - stem_class, - stem_width, - block_class, - depth, - w_a, - w_0, - w_m, - group_width, - stride=2, - bottleneck_ratio=1.0, - se_ratio=0.0, - activation_class=None, - freeze_at=0, - norm="BN", - out_features=None, - ): - """ - Build a RegNet from the parameterization described in :paper:`dds` Section 3.3. - - Args: - See :class:`AnyNet` for arguments that are not listed here. - depth (int): Total number of blocks in the RegNet. - w_a (float): Factor by which block width would increase prior to quantizing block widths - by stage. See :paper:`dds` Section 3.3. - w_0 (int): Initial block width. See :paper:`dds` Section 3.3. - w_m (float): Parameter controlling block width quantization. - See :paper:`dds` Section 3.3. - group_width (int): Number of channels per group in group convolution, if the block uses - group convolution. - bottleneck_ratio (float): The ratio of the number of bottleneck channels to the number - of block input channels (or, equivalently, output channels), if the block uses a - bottleneck. - stride (int): The stride that each network stage applies to its input. - """ - ws, ds = generate_regnet_parameters(w_a, w_0, w_m, depth)[0:2] - ss = [stride for _ in ws] - bs = [bottleneck_ratio for _ in ws] - gs = [group_width for _ in ws] - ws, bs, gs = adjust_block_compatibility(ws, bs, gs) - - def default_activation_class(): - return nn.ReLU(inplace=True) - - super().__init__( - stem_class=stem_class, - stem_width=stem_width, - block_class=block_class, - depths=ds, - widths=ws, - strides=ss, - group_widths=gs, - bottleneck_ratios=bs, - se_ratio=se_ratio, - activation_class=( - default_activation_class if activation_class is None else activation_class - ), - freeze_at=freeze_at, - norm=norm, - out_features=out_features, - ) diff --git a/detectron2/detectron2/modeling/backbone/resnet.py b/detectron2/detectron2/modeling/backbone/resnet.py deleted file mode 100644 index 7a2263d5e2131316975e169c4f64e1ca684c6c98..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/backbone/resnet.py +++ /dev/null @@ -1,694 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -import fvcore.nn.weight_init as weight_init -import torch -import torch.nn.functional as F -from torch import nn - -from detectron2.layers import ( - CNNBlockBase, - Conv2d, - DeformConv, - ModulatedDeformConv, - ShapeSpec, - get_norm, -) - -from .backbone import Backbone -from .build import BACKBONE_REGISTRY - -__all__ = [ - "ResNetBlockBase", - "BasicBlock", - "BottleneckBlock", - "DeformBottleneckBlock", - "BasicStem", - "ResNet", - "make_stage", - "build_resnet_backbone", -] - - -class BasicBlock(CNNBlockBase): - """ - The basic residual block for ResNet-18 and ResNet-34 defined in :paper:`ResNet`, - with two 3x3 conv layers and a projection shortcut if needed. - """ - - def __init__(self, in_channels, out_channels, *, stride=1, norm="BN"): - """ - Args: - in_channels (int): Number of input channels. - out_channels (int): Number of output channels. - stride (int): Stride for the first conv. - norm (str or callable): normalization for all conv layers. - See :func:`layers.get_norm` for supported format. - """ - super().__init__(in_channels, out_channels, stride) - - if in_channels != out_channels: - self.shortcut = Conv2d( - in_channels, - out_channels, - kernel_size=1, - stride=stride, - bias=False, - norm=get_norm(norm, out_channels), - ) - else: - self.shortcut = None - - self.conv1 = Conv2d( - in_channels, - out_channels, - kernel_size=3, - stride=stride, - padding=1, - bias=False, - norm=get_norm(norm, out_channels), - ) - - self.conv2 = Conv2d( - out_channels, - out_channels, - kernel_size=3, - stride=1, - padding=1, - bias=False, - norm=get_norm(norm, out_channels), - ) - - for layer in [self.conv1, self.conv2, self.shortcut]: - if layer is not None: # shortcut can be None - weight_init.c2_msra_fill(layer) - - def forward(self, x): - out = self.conv1(x) - out = F.relu_(out) - out = self.conv2(out) - - if self.shortcut is not None: - shortcut = self.shortcut(x) - else: - shortcut = x - - out += shortcut - out = F.relu_(out) - return out - - -class BottleneckBlock(CNNBlockBase): - """ - The standard bottleneck residual block used by ResNet-50, 101 and 152 - defined in :paper:`ResNet`. It contains 3 conv layers with kernels - 1x1, 3x3, 1x1, and a projection shortcut if needed. - """ - - def __init__( - self, - in_channels, - out_channels, - *, - bottleneck_channels, - stride=1, - num_groups=1, - norm="BN", - stride_in_1x1=False, - dilation=1, - ): - """ - Args: - bottleneck_channels (int): number of output channels for the 3x3 - "bottleneck" conv layers. - num_groups (int): number of groups for the 3x3 conv layer. - norm (str or callable): normalization for all conv layers. - See :func:`layers.get_norm` for supported format. - stride_in_1x1 (bool): when stride>1, whether to put stride in the - first 1x1 convolution or the bottleneck 3x3 convolution. - dilation (int): the dilation rate of the 3x3 conv layer. - """ - super().__init__(in_channels, out_channels, stride) - - if in_channels != out_channels: - self.shortcut = Conv2d( - in_channels, - out_channels, - kernel_size=1, - stride=stride, - bias=False, - norm=get_norm(norm, out_channels), - ) - else: - self.shortcut = None - - # The original MSRA ResNet models have stride in the first 1x1 conv - # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have - # stride in the 3x3 conv - stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) - - self.conv1 = Conv2d( - in_channels, - bottleneck_channels, - kernel_size=1, - stride=stride_1x1, - bias=False, - norm=get_norm(norm, bottleneck_channels), - ) - - self.conv2 = Conv2d( - bottleneck_channels, - bottleneck_channels, - kernel_size=3, - stride=stride_3x3, - padding=1 * dilation, - bias=False, - groups=num_groups, - dilation=dilation, - norm=get_norm(norm, bottleneck_channels), - ) - - self.conv3 = Conv2d( - bottleneck_channels, - out_channels, - kernel_size=1, - bias=False, - norm=get_norm(norm, out_channels), - ) - - for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: - if layer is not None: # shortcut can be None - weight_init.c2_msra_fill(layer) - - # Zero-initialize the last normalization in each residual branch, - # so that at the beginning, the residual branch starts with zeros, - # and each residual block behaves like an identity. - # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": - # "For BN layers, the learnable scaling coefficient Ξ³ is initialized - # to be 1, except for each residual block's last BN - # where Ξ³ is initialized to be 0." - - # nn.init.constant_(self.conv3.norm.weight, 0) - # TODO this somehow hurts performance when training GN models from scratch. - # Add it as an option when we need to use this code to train a backbone. - - def forward(self, x): - out = self.conv1(x) - out = F.relu_(out) - - out = self.conv2(out) - out = F.relu_(out) - - out = self.conv3(out) - - if self.shortcut is not None: - shortcut = self.shortcut(x) - else: - shortcut = x - - out += shortcut - out = F.relu_(out) - return out - - -class DeformBottleneckBlock(CNNBlockBase): - """ - Similar to :class:`BottleneckBlock`, but with :paper:`deformable conv ` - in the 3x3 convolution. - """ - - def __init__( - self, - in_channels, - out_channels, - *, - bottleneck_channels, - stride=1, - num_groups=1, - norm="BN", - stride_in_1x1=False, - dilation=1, - deform_modulated=False, - deform_num_groups=1, - ): - super().__init__(in_channels, out_channels, stride) - self.deform_modulated = deform_modulated - - if in_channels != out_channels: - self.shortcut = Conv2d( - in_channels, - out_channels, - kernel_size=1, - stride=stride, - bias=False, - norm=get_norm(norm, out_channels), - ) - else: - self.shortcut = None - - stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) - - self.conv1 = Conv2d( - in_channels, - bottleneck_channels, - kernel_size=1, - stride=stride_1x1, - bias=False, - norm=get_norm(norm, bottleneck_channels), - ) - - if deform_modulated: - deform_conv_op = ModulatedDeformConv - # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size - offset_channels = 27 - else: - deform_conv_op = DeformConv - offset_channels = 18 - - self.conv2_offset = Conv2d( - bottleneck_channels, - offset_channels * deform_num_groups, - kernel_size=3, - stride=stride_3x3, - padding=1 * dilation, - dilation=dilation, - ) - self.conv2 = deform_conv_op( - bottleneck_channels, - bottleneck_channels, - kernel_size=3, - stride=stride_3x3, - padding=1 * dilation, - bias=False, - groups=num_groups, - dilation=dilation, - deformable_groups=deform_num_groups, - norm=get_norm(norm, bottleneck_channels), - ) - - self.conv3 = Conv2d( - bottleneck_channels, - out_channels, - kernel_size=1, - bias=False, - norm=get_norm(norm, out_channels), - ) - - for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: - if layer is not None: # shortcut can be None - weight_init.c2_msra_fill(layer) - - nn.init.constant_(self.conv2_offset.weight, 0) - nn.init.constant_(self.conv2_offset.bias, 0) - - def forward(self, x): - out = self.conv1(x) - out = F.relu_(out) - - if self.deform_modulated: - offset_mask = self.conv2_offset(out) - offset_x, offset_y, mask = torch.chunk(offset_mask, 3, dim=1) - offset = torch.cat((offset_x, offset_y), dim=1) - mask = mask.sigmoid() - out = self.conv2(out, offset, mask) - else: - offset = self.conv2_offset(out) - out = self.conv2(out, offset) - out = F.relu_(out) - - out = self.conv3(out) - - if self.shortcut is not None: - shortcut = self.shortcut(x) - else: - shortcut = x - - out += shortcut - out = F.relu_(out) - return out - - -class BasicStem(CNNBlockBase): - """ - The standard ResNet stem (layers before the first residual block), - with a conv, relu and max_pool. - """ - - def __init__(self, in_channels=3, out_channels=64, norm="BN"): - """ - Args: - norm (str or callable): norm after the first conv layer. - See :func:`layers.get_norm` for supported format. - """ - super().__init__(in_channels, out_channels, 4) - self.in_channels = in_channels - self.conv1 = Conv2d( - in_channels, - out_channels, - kernel_size=7, - stride=2, - padding=3, - bias=False, - norm=get_norm(norm, out_channels), - ) - weight_init.c2_msra_fill(self.conv1) - - def forward(self, x): - x = self.conv1(x) - x = F.relu_(x) - x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) - return x - - -class ResNet(Backbone): - """ - Implement :paper:`ResNet`. - """ - - def __init__(self, stem, stages, num_classes=None, out_features=None, freeze_at=0): - """ - Args: - stem (nn.Module): a stem module - stages (list[list[CNNBlockBase]]): several (typically 4) stages, - each contains multiple :class:`CNNBlockBase`. - num_classes (None or int): if None, will not perform classification. - Otherwise, will create a linear layer. - out_features (list[str]): name of the layers whose outputs should - be returned in forward. Can be anything in "stem", "linear", or "res2" ... - If None, will return the output of the last layer. - freeze_at (int): The number of stages at the beginning to freeze. - see :meth:`freeze` for detailed explanation. - """ - super().__init__() - self.stem = stem - self.num_classes = num_classes - - current_stride = self.stem.stride - self._out_feature_strides = {"stem": current_stride} - self._out_feature_channels = {"stem": self.stem.out_channels} - - self.stage_names, self.stages = [], [] - - if out_features is not None: - # Avoid keeping unused layers in this module. They consume extra memory - # and may cause allreduce to fail - num_stages = max( - [{"res2": 1, "res3": 2, "res4": 3, "res5": 4}.get(f, 0) for f in out_features] - ) - stages = stages[:num_stages] - for i, blocks in enumerate(stages): - assert len(blocks) > 0, len(blocks) - for block in blocks: - assert isinstance(block, CNNBlockBase), block - - name = "res" + str(i + 2) - stage = nn.Sequential(*blocks) - - self.add_module(name, stage) - self.stage_names.append(name) - self.stages.append(stage) - - self._out_feature_strides[name] = current_stride = int( - current_stride * np.prod([k.stride for k in blocks]) - ) - self._out_feature_channels[name] = curr_channels = blocks[-1].out_channels - self.stage_names = tuple(self.stage_names) # Make it static for scripting - - if num_classes is not None: - self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) - self.linear = nn.Linear(curr_channels, num_classes) - - # Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": - # "The 1000-way fully-connected layer is initialized by - # drawing weights from a zero-mean Gaussian with standard deviation of 0.01." - nn.init.normal_(self.linear.weight, std=0.01) - name = "linear" - - if out_features is None: - out_features = [name] - self._out_features = out_features - assert len(self._out_features) - children = [x[0] for x in self.named_children()] - for out_feature in self._out_features: - assert out_feature in children, "Available children: {}".format(", ".join(children)) - self.freeze(freeze_at) - - def forward(self, x): - """ - Args: - x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``. - - Returns: - dict[str->Tensor]: names and the corresponding features - """ - assert x.dim() == 4, f"ResNet takes an input of shape (N, C, H, W). Got {x.shape} instead!" - outputs = {} - x = self.stem(x) - if "stem" in self._out_features: - outputs["stem"] = x - for name, stage in zip(self.stage_names, self.stages): - x = stage(x) - if name in self._out_features: - outputs[name] = x - if self.num_classes is not None: - x = self.avgpool(x) - x = torch.flatten(x, 1) - x = self.linear(x) - if "linear" in self._out_features: - outputs["linear"] = x - return outputs - - def output_shape(self): - return { - name: ShapeSpec( - channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] - ) - for name in self._out_features - } - - def freeze(self, freeze_at=0): - """ - Freeze the first several stages of the ResNet. Commonly used in - fine-tuning. - - Layers that produce the same feature map spatial size are defined as one - "stage" by :paper:`FPN`. - - Args: - freeze_at (int): number of stages to freeze. - `1` means freezing the stem. `2` means freezing the stem and - one residual stage, etc. - - Returns: - nn.Module: this ResNet itself - """ - if freeze_at >= 1: - self.stem.freeze() - for idx, stage in enumerate(self.stages, start=2): - if freeze_at >= idx: - for block in stage.children(): - block.freeze() - return self - - @staticmethod - def make_stage(block_class, num_blocks, *, in_channels, out_channels, **kwargs): - """ - Create a list of blocks of the same type that forms one ResNet stage. - - Args: - block_class (type): a subclass of CNNBlockBase that's used to create all blocks in this - stage. A module of this type must not change spatial resolution of inputs unless its - stride != 1. - num_blocks (int): number of blocks in this stage - in_channels (int): input channels of the entire stage. - out_channels (int): output channels of **every block** in the stage. - kwargs: other arguments passed to the constructor of - `block_class`. If the argument name is "xx_per_block", the - argument is a list of values to be passed to each block in the - stage. Otherwise, the same argument is passed to every block - in the stage. - - Returns: - list[CNNBlockBase]: a list of block module. - - Examples: - :: - stage = ResNet.make_stage( - BottleneckBlock, 3, in_channels=16, out_channels=64, - bottleneck_channels=16, num_groups=1, - stride_per_block=[2, 1, 1], - dilations_per_block=[1, 1, 2] - ) - - Usually, layers that produce the same feature map spatial size are defined as one - "stage" (in :paper:`FPN`). Under such definition, ``stride_per_block[1:]`` should - all be 1. - """ - blocks = [] - for i in range(num_blocks): - curr_kwargs = {} - for k, v in kwargs.items(): - if k.endswith("_per_block"): - assert len(v) == num_blocks, ( - f"Argument '{k}' of make_stage should have the " - f"same length as num_blocks={num_blocks}." - ) - newk = k[: -len("_per_block")] - assert newk not in kwargs, f"Cannot call make_stage with both {k} and {newk}!" - curr_kwargs[newk] = v[i] - else: - curr_kwargs[k] = v - - blocks.append( - block_class(in_channels=in_channels, out_channels=out_channels, **curr_kwargs) - ) - in_channels = out_channels - return blocks - - @staticmethod - def make_default_stages(depth, block_class=None, **kwargs): - """ - Created list of ResNet stages from pre-defined depth (one of 18, 34, 50, 101, 152). - If it doesn't create the ResNet variant you need, please use :meth:`make_stage` - instead for fine-grained customization. - - Args: - depth (int): depth of ResNet - block_class (type): the CNN block class. Has to accept - `bottleneck_channels` argument for depth > 50. - By default it is BasicBlock or BottleneckBlock, based on the - depth. - kwargs: - other arguments to pass to `make_stage`. Should not contain - stride and channels, as they are predefined for each depth. - - Returns: - list[list[CNNBlockBase]]: modules in all stages; see arguments of - :class:`ResNet.__init__`. - """ - num_blocks_per_stage = { - 18: [2, 2, 2, 2], - 34: [3, 4, 6, 3], - 50: [3, 4, 6, 3], - 101: [3, 4, 23, 3], - 152: [3, 8, 36, 3], - }[depth] - if block_class is None: - block_class = BasicBlock if depth < 50 else BottleneckBlock - if depth < 50: - in_channels = [64, 64, 128, 256] - out_channels = [64, 128, 256, 512] - else: - in_channels = [64, 256, 512, 1024] - out_channels = [256, 512, 1024, 2048] - ret = [] - for n, s, i, o in zip(num_blocks_per_stage, [1, 2, 2, 2], in_channels, out_channels): - if depth >= 50: - kwargs["bottleneck_channels"] = o // 4 - ret.append( - ResNet.make_stage( - block_class=block_class, - num_blocks=n, - stride_per_block=[s] + [1] * (n - 1), - in_channels=i, - out_channels=o, - **kwargs, - ) - ) - return ret - - -ResNetBlockBase = CNNBlockBase -""" -Alias for backward compatibiltiy. -""" - - -def make_stage(*args, **kwargs): - """ - Deprecated alias for backward compatibiltiy. - """ - return ResNet.make_stage(*args, **kwargs) - - -@BACKBONE_REGISTRY.register() -def build_resnet_backbone(cfg, input_shape): - """ - Create a ResNet instance from config. - - Returns: - ResNet: a :class:`ResNet` instance. - """ - # need registration of new blocks/stems? - norm = cfg.MODEL.RESNETS.NORM - stem = BasicStem( - in_channels=input_shape.channels, - out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, - norm=norm, - ) - - # fmt: off - freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT - out_features = cfg.MODEL.RESNETS.OUT_FEATURES - depth = cfg.MODEL.RESNETS.DEPTH - num_groups = cfg.MODEL.RESNETS.NUM_GROUPS - width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP - bottleneck_channels = num_groups * width_per_group - in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS - out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS - stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 - res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION - deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE - deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED - deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS - # fmt: on - assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation) - - num_blocks_per_stage = { - 18: [2, 2, 2, 2], - 34: [3, 4, 6, 3], - 50: [3, 4, 6, 3], - 101: [3, 4, 23, 3], - 152: [3, 8, 36, 3], - }[depth] - - if depth in [18, 34]: - assert out_channels == 64, "Must set MODEL.RESNETS.RES2_OUT_CHANNELS = 64 for R18/R34" - assert not any( - deform_on_per_stage - ), "MODEL.RESNETS.DEFORM_ON_PER_STAGE unsupported for R18/R34" - assert res5_dilation == 1, "Must set MODEL.RESNETS.RES5_DILATION = 1 for R18/R34" - assert num_groups == 1, "Must set MODEL.RESNETS.NUM_GROUPS = 1 for R18/R34" - - stages = [] - - for idx, stage_idx in enumerate(range(2, 6)): - # res5_dilation is used this way as a convention in R-FCN & Deformable Conv paper - dilation = res5_dilation if stage_idx == 5 else 1 - first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2 - stage_kargs = { - "num_blocks": num_blocks_per_stage[idx], - "stride_per_block": [first_stride] + [1] * (num_blocks_per_stage[idx] - 1), - "in_channels": in_channels, - "out_channels": out_channels, - "norm": norm, - } - # Use BasicBlock for R18 and R34. - if depth in [18, 34]: - stage_kargs["block_class"] = BasicBlock - else: - stage_kargs["bottleneck_channels"] = bottleneck_channels - stage_kargs["stride_in_1x1"] = stride_in_1x1 - stage_kargs["dilation"] = dilation - stage_kargs["num_groups"] = num_groups - if deform_on_per_stage[idx]: - stage_kargs["block_class"] = DeformBottleneckBlock - stage_kargs["deform_modulated"] = deform_modulated - stage_kargs["deform_num_groups"] = deform_num_groups - else: - stage_kargs["block_class"] = BottleneckBlock - blocks = ResNet.make_stage(**stage_kargs) - in_channels = out_channels - out_channels *= 2 - bottleneck_channels *= 2 - stages.append(blocks) - return ResNet(stem, stages, out_features=out_features, freeze_at=freeze_at) diff --git a/detectron2/detectron2/modeling/backbone/swin.py b/detectron2/detectron2/modeling/backbone/swin.py deleted file mode 100644 index 780b6fc6eaab1d9a3f513b8a09cb4dc95166e5a3..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/backbone/swin.py +++ /dev/null @@ -1,695 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -Implementation of Swin models from :paper:`swin`. - -This code is adapted from https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/blob/master/mmdet/models/backbones/swin_transformer.py with minimal modifications. # noqa --------------------------------------------------------- -Swin Transformer -Copyright (c) 2021 Microsoft -Licensed under The MIT License [see LICENSE for details] -Written by Ze Liu, Yutong Lin, Yixuan Wei --------------------------------------------------------- -LICENSE: https://github.com/SwinTransformer/Swin-Transformer-Object-Detection/blob/461e003166a8083d0b620beacd4662a2df306bd6/LICENSE -""" - -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.utils.checkpoint as checkpoint - -from detectron2.modeling.backbone.backbone import Backbone - -_to_2tuple = nn.modules.utils._ntuple(2) - - -class Mlp(nn.Module): - """Multilayer perceptron.""" - - def __init__( - self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0 - ): - super().__init__() - out_features = out_features or in_features - hidden_features = hidden_features or in_features - self.fc1 = nn.Linear(in_features, hidden_features) - self.act = act_layer() - self.fc2 = nn.Linear(hidden_features, out_features) - self.drop = nn.Dropout(drop) - - def forward(self, x): - x = self.fc1(x) - x = self.act(x) - x = self.drop(x) - x = self.fc2(x) - x = self.drop(x) - return x - - -def window_partition(x, window_size): - """ - Args: - x: (B, H, W, C) - window_size (int): window size - Returns: - windows: (num_windows*B, window_size, window_size, C) - """ - B, H, W, C = x.shape - x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) - windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) - return windows - - -def window_reverse(windows, window_size, H, W): - """ - Args: - windows: (num_windows*B, window_size, window_size, C) - window_size (int): Window size - H (int): Height of image - W (int): Width of image - Returns: - x: (B, H, W, C) - """ - B = int(windows.shape[0] / (H * W / window_size / window_size)) - x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) - x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) - return x - - -class WindowAttention(nn.Module): - """Window based multi-head self attention (W-MSA) module with relative position bias. - It supports both of shifted and non-shifted window. - Args: - dim (int): Number of input channels. - window_size (tuple[int]): The height and width of the window. - num_heads (int): Number of attention heads. - qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. - Default: True - qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set - attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 - proj_drop (float, optional): Dropout ratio of output. Default: 0.0 - """ - - def __init__( - self, - dim, - window_size, - num_heads, - qkv_bias=True, - qk_scale=None, - attn_drop=0.0, - proj_drop=0.0, - ): - - super().__init__() - self.dim = dim - self.window_size = window_size # Wh, Ww - self.num_heads = num_heads - head_dim = dim // num_heads - self.scale = qk_scale or head_dim**-0.5 - - # define a parameter table of relative position bias - self.relative_position_bias_table = nn.Parameter( - torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads) - ) # 2*Wh-1 * 2*Ww-1, nH - - # get pair-wise relative position index for each token inside the window - coords_h = torch.arange(self.window_size[0]) - coords_w = torch.arange(self.window_size[1]) - coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww - coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww - relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww - relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 - relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 - relative_coords[:, :, 1] += self.window_size[1] - 1 - relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 - relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww - self.register_buffer("relative_position_index", relative_position_index) - - self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) - self.attn_drop = nn.Dropout(attn_drop) - self.proj = nn.Linear(dim, dim) - self.proj_drop = nn.Dropout(proj_drop) - - nn.init.trunc_normal_(self.relative_position_bias_table, std=0.02) - self.softmax = nn.Softmax(dim=-1) - - def forward(self, x, mask=None): - """Forward function. - Args: - x: input features with shape of (num_windows*B, N, C) - mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None - """ - B_, N, C = x.shape - qkv = ( - self.qkv(x) - .reshape(B_, N, 3, self.num_heads, C // self.num_heads) - .permute(2, 0, 3, 1, 4) - ) - q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) - - q = q * self.scale - attn = q @ k.transpose(-2, -1) - - relative_position_bias = self.relative_position_bias_table[ - self.relative_position_index.view(-1) - ].view( - self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1 - ) # Wh*Ww,Wh*Ww,nH - relative_position_bias = relative_position_bias.permute( - 2, 0, 1 - ).contiguous() # nH, Wh*Ww, Wh*Ww - attn = attn + relative_position_bias.unsqueeze(0) - - if mask is not None: - nW = mask.shape[0] - attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) - attn = attn.view(-1, self.num_heads, N, N) - attn = self.softmax(attn) - else: - attn = self.softmax(attn) - - attn = self.attn_drop(attn) - - x = (attn @ v).transpose(1, 2).reshape(B_, N, C) - x = self.proj(x) - x = self.proj_drop(x) - return x - - -class SwinTransformerBlock(nn.Module): - """Swin Transformer Block. - Args: - dim (int): Number of input channels. - num_heads (int): Number of attention heads. - window_size (int): Window size. - shift_size (int): Shift size for SW-MSA. - mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. - qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True - qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. - drop (float, optional): Dropout rate. Default: 0.0 - attn_drop (float, optional): Attention dropout rate. Default: 0.0 - drop_path (float, optional): Stochastic depth rate. Default: 0.0 - act_layer (nn.Module, optional): Activation layer. Default: nn.GELU - norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm - """ - - def __init__( - self, - dim, - num_heads, - window_size=7, - shift_size=0, - mlp_ratio=4.0, - qkv_bias=True, - qk_scale=None, - drop=0.0, - attn_drop=0.0, - drop_path=0.0, - act_layer=nn.GELU, - norm_layer=nn.LayerNorm, - ): - super().__init__() - self.dim = dim - self.num_heads = num_heads - self.window_size = window_size - self.shift_size = shift_size - self.mlp_ratio = mlp_ratio - assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" - - self.norm1 = norm_layer(dim) - self.attn = WindowAttention( - dim, - window_size=_to_2tuple(self.window_size), - num_heads=num_heads, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - attn_drop=attn_drop, - proj_drop=drop, - ) - - if drop_path > 0.0: - from timm.models.layers import DropPath - - self.drop_path = DropPath(drop_path) - else: - self.drop_path = nn.Identity() - self.norm2 = norm_layer(dim) - mlp_hidden_dim = int(dim * mlp_ratio) - self.mlp = Mlp( - in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop - ) - - self.H = None - self.W = None - - def forward(self, x, mask_matrix): - """Forward function. - Args: - x: Input feature, tensor size (B, H*W, C). - H, W: Spatial resolution of the input feature. - mask_matrix: Attention mask for cyclic shift. - """ - B, L, C = x.shape - H, W = self.H, self.W - assert L == H * W, "input feature has wrong size" - - shortcut = x - x = self.norm1(x) - x = x.view(B, H, W, C) - - # pad feature maps to multiples of window size - pad_l = pad_t = 0 - pad_r = (self.window_size - W % self.window_size) % self.window_size - pad_b = (self.window_size - H % self.window_size) % self.window_size - x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) - _, Hp, Wp, _ = x.shape - - # cyclic shift - if self.shift_size > 0: - shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) - attn_mask = mask_matrix - else: - shifted_x = x - attn_mask = None - - # partition windows - x_windows = window_partition( - shifted_x, self.window_size - ) # nW*B, window_size, window_size, C - x_windows = x_windows.view( - -1, self.window_size * self.window_size, C - ) # nW*B, window_size*window_size, C - - # W-MSA/SW-MSA - attn_windows = self.attn(x_windows, mask=attn_mask) # nW*B, window_size*window_size, C - - # merge windows - attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) - shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C - - # reverse cyclic shift - if self.shift_size > 0: - x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) - else: - x = shifted_x - - if pad_r > 0 or pad_b > 0: - x = x[:, :H, :W, :].contiguous() - - x = x.view(B, H * W, C) - - # FFN - x = shortcut + self.drop_path(x) - x = x + self.drop_path(self.mlp(self.norm2(x))) - - return x - - -class PatchMerging(nn.Module): - """Patch Merging Layer - Args: - dim (int): Number of input channels. - norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm - """ - - def __init__(self, dim, norm_layer=nn.LayerNorm): - super().__init__() - self.dim = dim - self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) - self.norm = norm_layer(4 * dim) - - def forward(self, x, H, W): - """Forward function. - Args: - x: Input feature, tensor size (B, H*W, C). - H, W: Spatial resolution of the input feature. - """ - B, L, C = x.shape - assert L == H * W, "input feature has wrong size" - - x = x.view(B, H, W, C) - - # padding - pad_input = (H % 2 == 1) or (W % 2 == 1) - if pad_input: - x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2)) - - x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C - x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C - x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C - x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C - x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C - x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C - - x = self.norm(x) - x = self.reduction(x) - - return x - - -class BasicLayer(nn.Module): - """A basic Swin Transformer layer for one stage. - Args: - dim (int): Number of feature channels - depth (int): Depths of this stage. - num_heads (int): Number of attention head. - window_size (int): Local window size. Default: 7. - mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4. - qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True - qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. - drop (float, optional): Dropout rate. Default: 0.0 - attn_drop (float, optional): Attention dropout rate. Default: 0.0 - drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 - norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm - downsample (nn.Module | None, optional): Downsample layer at the end of the layer. - Default: None - use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. - """ - - def __init__( - self, - dim, - depth, - num_heads, - window_size=7, - mlp_ratio=4.0, - qkv_bias=True, - qk_scale=None, - drop=0.0, - attn_drop=0.0, - drop_path=0.0, - norm_layer=nn.LayerNorm, - downsample=None, - use_checkpoint=False, - ): - super().__init__() - self.window_size = window_size - self.shift_size = window_size // 2 - self.depth = depth - self.use_checkpoint = use_checkpoint - - # build blocks - self.blocks = nn.ModuleList( - [ - SwinTransformerBlock( - dim=dim, - num_heads=num_heads, - window_size=window_size, - shift_size=0 if (i % 2 == 0) else window_size // 2, - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop, - attn_drop=attn_drop, - drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, - norm_layer=norm_layer, - ) - for i in range(depth) - ] - ) - - # patch merging layer - if downsample is not None: - self.downsample = downsample(dim=dim, norm_layer=norm_layer) - else: - self.downsample = None - - def forward(self, x, H, W): - """Forward function. - Args: - x: Input feature, tensor size (B, H*W, C). - H, W: Spatial resolution of the input feature. - """ - - # calculate attention mask for SW-MSA - Hp = int(np.ceil(H / self.window_size)) * self.window_size - Wp = int(np.ceil(W / self.window_size)) * self.window_size - img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 - h_slices = ( - slice(0, -self.window_size), - slice(-self.window_size, -self.shift_size), - slice(-self.shift_size, None), - ) - w_slices = ( - slice(0, -self.window_size), - slice(-self.window_size, -self.shift_size), - slice(-self.shift_size, None), - ) - cnt = 0 - for h in h_slices: - for w in w_slices: - img_mask[:, h, w, :] = cnt - cnt += 1 - - mask_windows = window_partition( - img_mask, self.window_size - ) # nW, window_size, window_size, 1 - mask_windows = mask_windows.view(-1, self.window_size * self.window_size) - attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) - attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill( - attn_mask == 0, float(0.0) - ) - - for blk in self.blocks: - blk.H, blk.W = H, W - if self.use_checkpoint: - x = checkpoint.checkpoint(blk, x, attn_mask) - else: - x = blk(x, attn_mask) - if self.downsample is not None: - x_down = self.downsample(x, H, W) - Wh, Ww = (H + 1) // 2, (W + 1) // 2 - return x, H, W, x_down, Wh, Ww - else: - return x, H, W, x, H, W - - -class PatchEmbed(nn.Module): - """Image to Patch Embedding - Args: - patch_size (int): Patch token size. Default: 4. - in_chans (int): Number of input image channels. Default: 3. - embed_dim (int): Number of linear projection output channels. Default: 96. - norm_layer (nn.Module, optional): Normalization layer. Default: None - """ - - def __init__(self, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): - super().__init__() - patch_size = _to_2tuple(patch_size) - self.patch_size = patch_size - - self.in_chans = in_chans - self.embed_dim = embed_dim - - self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) - if norm_layer is not None: - self.norm = norm_layer(embed_dim) - else: - self.norm = None - - def forward(self, x): - """Forward function.""" - # padding - _, _, H, W = x.size() - if W % self.patch_size[1] != 0: - x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1])) - if H % self.patch_size[0] != 0: - x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0])) - - x = self.proj(x) # B C Wh Ww - if self.norm is not None: - Wh, Ww = x.size(2), x.size(3) - x = x.flatten(2).transpose(1, 2) - x = self.norm(x) - x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww) - - return x - - -class SwinTransformer(Backbone): - """Swin Transformer backbone. - A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted - Windows` - https://arxiv.org/pdf/2103.14030 - Args: - pretrain_img_size (int): Input image size for training the pretrained model, - used in absolute postion embedding. Default 224. - patch_size (int | tuple(int)): Patch size. Default: 4. - in_chans (int): Number of input image channels. Default: 3. - embed_dim (int): Number of linear projection output channels. Default: 96. - depths (tuple[int]): Depths of each Swin Transformer stage. - num_heads (tuple[int]): Number of attention head of each stage. - window_size (int): Window size. Default: 7. - mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4. - qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True - qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. - drop_rate (float): Dropout rate. - attn_drop_rate (float): Attention dropout rate. Default: 0. - drop_path_rate (float): Stochastic depth rate. Default: 0.2. - norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. - ape (bool): If True, add absolute position embedding to the patch embedding. Default: False. - patch_norm (bool): If True, add normalization after patch embedding. Default: True. - out_indices (Sequence[int]): Output from which stages. - frozen_stages (int): Stages to be frozen (stop grad and set eval mode). - -1 means not freezing any parameters. - use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. - """ - - def __init__( - self, - pretrain_img_size=224, - patch_size=4, - in_chans=3, - embed_dim=96, - depths=(2, 2, 6, 2), - num_heads=(3, 6, 12, 24), - window_size=7, - mlp_ratio=4.0, - qkv_bias=True, - qk_scale=None, - drop_rate=0.0, - attn_drop_rate=0.0, - drop_path_rate=0.2, - norm_layer=nn.LayerNorm, - ape=False, - patch_norm=True, - out_indices=(0, 1, 2, 3), - frozen_stages=-1, - use_checkpoint=False, - ): - super().__init__() - - self.pretrain_img_size = pretrain_img_size - self.num_layers = len(depths) - self.embed_dim = embed_dim - self.ape = ape - self.patch_norm = patch_norm - self.out_indices = out_indices - self.frozen_stages = frozen_stages - - # split image into non-overlapping patches - self.patch_embed = PatchEmbed( - patch_size=patch_size, - in_chans=in_chans, - embed_dim=embed_dim, - norm_layer=norm_layer if self.patch_norm else None, - ) - - # absolute position embedding - if self.ape: - pretrain_img_size = _to_2tuple(pretrain_img_size) - patch_size = _to_2tuple(patch_size) - patches_resolution = [ - pretrain_img_size[0] // patch_size[0], - pretrain_img_size[1] // patch_size[1], - ] - - self.absolute_pos_embed = nn.Parameter( - torch.zeros(1, embed_dim, patches_resolution[0], patches_resolution[1]) - ) - nn.init.trunc_normal_(self.absolute_pos_embed, std=0.02) - - self.pos_drop = nn.Dropout(p=drop_rate) - - # stochastic depth - dpr = [ - x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) - ] # stochastic depth decay rule - - # build layers - self.layers = nn.ModuleList() - for i_layer in range(self.num_layers): - layer = BasicLayer( - dim=int(embed_dim * 2**i_layer), - depth=depths[i_layer], - num_heads=num_heads[i_layer], - window_size=window_size, - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - qk_scale=qk_scale, - drop=drop_rate, - attn_drop=attn_drop_rate, - drop_path=dpr[sum(depths[:i_layer]) : sum(depths[: i_layer + 1])], - norm_layer=norm_layer, - downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, - use_checkpoint=use_checkpoint, - ) - self.layers.append(layer) - - num_features = [int(embed_dim * 2**i) for i in range(self.num_layers)] - self.num_features = num_features - - # add a norm layer for each output - for i_layer in out_indices: - layer = norm_layer(num_features[i_layer]) - layer_name = f"norm{i_layer}" - self.add_module(layer_name, layer) - - self._freeze_stages() - self._out_features = ["p{}".format(i) for i in self.out_indices] - self._out_feature_channels = { - "p{}".format(i): self.embed_dim * 2**i for i in self.out_indices - } - self._out_feature_strides = {"p{}".format(i): 2 ** (i + 2) for i in self.out_indices} - self._size_devisibility = 32 - - self.apply(self._init_weights) - - def _freeze_stages(self): - if self.frozen_stages >= 0: - self.patch_embed.eval() - for param in self.patch_embed.parameters(): - param.requires_grad = False - - if self.frozen_stages >= 1 and self.ape: - self.absolute_pos_embed.requires_grad = False - - if self.frozen_stages >= 2: - self.pos_drop.eval() - for i in range(0, self.frozen_stages - 1): - m = self.layers[i] - m.eval() - for param in m.parameters(): - param.requires_grad = False - - def _init_weights(self, m): - if isinstance(m, nn.Linear): - nn.init.trunc_normal_(m.weight, std=0.02) - if isinstance(m, nn.Linear) and m.bias is not None: - nn.init.constant_(m.bias, 0) - elif isinstance(m, nn.LayerNorm): - nn.init.constant_(m.bias, 0) - nn.init.constant_(m.weight, 1.0) - - @property - def size_divisibility(self): - return self._size_divisibility - - def forward(self, x): - """Forward function.""" - x = self.patch_embed(x) - - Wh, Ww = x.size(2), x.size(3) - if self.ape: - # interpolate the position embedding to the corresponding size - absolute_pos_embed = F.interpolate( - self.absolute_pos_embed, size=(Wh, Ww), mode="bicubic" - ) - x = (x + absolute_pos_embed).flatten(2).transpose(1, 2) # B Wh*Ww C - else: - x = x.flatten(2).transpose(1, 2) - x = self.pos_drop(x) - - outs = {} - for i in range(self.num_layers): - layer = self.layers[i] - x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww) - - if i in self.out_indices: - norm_layer = getattr(self, f"norm{i}") - x_out = norm_layer(x_out) - - out = x_out.view(-1, H, W, self.num_features[i]).permute(0, 3, 1, 2).contiguous() - outs["p{}".format(i)] = out - - return outs diff --git a/detectron2/detectron2/modeling/backbone/utils.py b/detectron2/detectron2/modeling/backbone/utils.py deleted file mode 100644 index 2b89a4c3fbe079a77fd0cef947cf9ada787fc55d..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/backbone/utils.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import math -import torch -import torch.nn as nn -import torch.nn.functional as F - -__all__ = [ - "window_partition", - "window_unpartition", - "add_decomposed_rel_pos", - "get_abs_pos", - "PatchEmbed", -] - - -def window_partition(x, window_size): - """ - Partition into non-overlapping windows with padding if needed. - Args: - x (tensor): input tokens with [B, H, W, C]. - window_size (int): window size. - - Returns: - windows: windows after partition with [B * num_windows, window_size, window_size, C]. - (Hp, Wp): padded height and width before partition - """ - B, H, W, C = x.shape - - pad_h = (window_size - H % window_size) % window_size - pad_w = (window_size - W % window_size) % window_size - if pad_h > 0 or pad_w > 0: - x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h)) - Hp, Wp = H + pad_h, W + pad_w - - x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C) - windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) - return windows, (Hp, Wp) - - -def window_unpartition(windows, window_size, pad_hw, hw): - """ - Window unpartition into original sequences and removing padding. - Args: - x (tensor): input tokens with [B * num_windows, window_size, window_size, C]. - window_size (int): window size. - pad_hw (Tuple): padded height and width (Hp, Wp). - hw (Tuple): original height and width (H, W) before padding. - - Returns: - x: unpartitioned sequences with [B, H, W, C]. - """ - Hp, Wp = pad_hw - H, W = hw - B = windows.shape[0] // (Hp * Wp // window_size // window_size) - x = windows.view(B, Hp // window_size, Wp // window_size, window_size, window_size, -1) - x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, Hp, Wp, -1) - - if Hp > H or Wp > W: - x = x[:, :H, :W, :].contiguous() - return x - - -def get_rel_pos(q_size, k_size, rel_pos): - """ - Get relative positional embeddings according to the relative positions of - query and key sizes. - Args: - q_size (int): size of query q. - k_size (int): size of key k. - rel_pos (Tensor): relative position embeddings (L, C). - - Returns: - Extracted positional embeddings according to relative positions. - """ - max_rel_dist = int(2 * max(q_size, k_size) - 1) - # Interpolate rel pos if needed. - if rel_pos.shape[0] != max_rel_dist: - # Interpolate rel pos. - rel_pos_resized = F.interpolate( - rel_pos.reshape(1, rel_pos.shape[0], -1).permute(0, 2, 1), - size=max_rel_dist, - mode="linear", - ) - rel_pos_resized = rel_pos_resized.reshape(-1, max_rel_dist).permute(1, 0) - else: - rel_pos_resized = rel_pos - - # Scale the coords with short length if shapes for q and k are different. - q_coords = torch.arange(q_size)[:, None] * max(k_size / q_size, 1.0) - k_coords = torch.arange(k_size)[None, :] * max(q_size / k_size, 1.0) - relative_coords = (q_coords - k_coords) + (k_size - 1) * max(q_size / k_size, 1.0) - - return rel_pos_resized[relative_coords.long()] - - -def add_decomposed_rel_pos(attn, q, rel_pos_h, rel_pos_w, q_size, k_size): - """ - Calculate decomposed Relative Positional Embeddings from :paper:`mvitv2`. - https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py # noqa B950 - Args: - attn (Tensor): attention map. - q (Tensor): query q in the attention layer with shape (B, q_h * q_w, C). - rel_pos_h (Tensor): relative position embeddings (Lh, C) for height axis. - rel_pos_w (Tensor): relative position embeddings (Lw, C) for width axis. - q_size (Tuple): spatial sequence size of query q with (q_h, q_w). - k_size (Tuple): spatial sequence size of key k with (k_h, k_w). - - Returns: - attn (Tensor): attention map with added relative positional embeddings. - """ - q_h, q_w = q_size - k_h, k_w = k_size - Rh = get_rel_pos(q_h, k_h, rel_pos_h) - Rw = get_rel_pos(q_w, k_w, rel_pos_w) - - B, _, dim = q.shape - r_q = q.reshape(B, q_h, q_w, dim) - rel_h = torch.einsum("bhwc,hkc->bhwk", r_q, Rh) - rel_w = torch.einsum("bhwc,wkc->bhwk", r_q, Rw) - - attn = ( - attn.view(B, q_h, q_w, k_h, k_w) + rel_h[:, :, :, :, None] + rel_w[:, :, :, None, :] - ).view(B, q_h * q_w, k_h * k_w) - - return attn - - -def get_abs_pos(abs_pos, has_cls_token, hw): - """ - Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token - dimension for the original embeddings. - Args: - abs_pos (Tensor): absolute positional embeddings with (1, num_position, C). - has_cls_token (bool): If true, has 1 embedding in abs_pos for cls token. - hw (Tuple): size of input image tokens. - - Returns: - Absolute positional embeddings after processing with shape (1, H, W, C) - """ - h, w = hw - if has_cls_token: - abs_pos = abs_pos[:, 1:] - xy_num = abs_pos.shape[1] - size = int(math.sqrt(xy_num)) - assert size * size == xy_num - - if size != h or size != w: - new_abs_pos = F.interpolate( - abs_pos.reshape(1, size, size, -1).permute(0, 3, 1, 2), - size=(h, w), - mode="bicubic", - align_corners=False, - ) - - return new_abs_pos.permute(0, 2, 3, 1) - else: - return abs_pos.reshape(1, h, w, -1) - - -class PatchEmbed(nn.Module): - """ - Image to Patch Embedding. - """ - - def __init__( - self, kernel_size=(16, 16), stride=(16, 16), padding=(0, 0), in_chans=3, embed_dim=768 - ): - """ - Args: - kernel_size (Tuple): kernel size of the projection layer. - stride (Tuple): stride of the projection layer. - padding (Tuple): padding size of the projection layer. - in_chans (int): Number of input image channels. - embed_dim (int): embed_dim (int): Patch embedding dimension. - """ - super().__init__() - - self.proj = nn.Conv2d( - in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding - ) - - def forward(self, x): - x = self.proj(x) - # B C H W -> B H W C - x = x.permute(0, 2, 3, 1) - return x diff --git a/detectron2/detectron2/modeling/backbone/vit.py b/detectron2/detectron2/modeling/backbone/vit.py deleted file mode 100644 index 31cc28ac887773dbc8aea2a663bacd5f7b63bb0c..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/backbone/vit.py +++ /dev/null @@ -1,524 +0,0 @@ -import logging -import math -import fvcore.nn.weight_init as weight_init -import torch -import torch.nn as nn - -from detectron2.layers import CNNBlockBase, Conv2d, get_norm -from detectron2.modeling.backbone.fpn import _assert_strides_are_log2_contiguous - -from .backbone import Backbone -from .utils import ( - PatchEmbed, - add_decomposed_rel_pos, - get_abs_pos, - window_partition, - window_unpartition, -) - -logger = logging.getLogger(__name__) - - -__all__ = ["ViT", "SimpleFeaturePyramid", "get_vit_lr_decay_rate"] - - -class Attention(nn.Module): - """Multi-head Attention block with relative position embeddings.""" - - def __init__( - self, - dim, - num_heads=8, - qkv_bias=True, - use_rel_pos=False, - rel_pos_zero_init=True, - input_size=None, - ): - """ - Args: - dim (int): Number of input channels. - num_heads (int): Number of attention heads. - qkv_bias (bool: If True, add a learnable bias to query, key, value. - rel_pos (bool): If True, add relative positional embeddings to the attention map. - rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. - input_size (int or None): Input resolution for calculating the relative positional - parameter size. - """ - super().__init__() - self.num_heads = num_heads - head_dim = dim // num_heads - self.scale = head_dim**-0.5 - - self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) - self.proj = nn.Linear(dim, dim) - - self.use_rel_pos = use_rel_pos - if self.use_rel_pos: - # initialize relative positional embeddings - self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim)) - self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim)) - - if not rel_pos_zero_init: - nn.init.trunc_normal_(self.rel_pos_h, std=0.02) - nn.init.trunc_normal_(self.rel_pos_w, std=0.02) - - def forward(self, x): - B, H, W, _ = x.shape - # qkv with shape (3, B, nHead, H * W, C) - qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) - # q, k, v with shape (B * nHead, H * W, C) - q, k, v = qkv.reshape(3, B * self.num_heads, H * W, -1).unbind(0) - - attn = (q * self.scale) @ k.transpose(-2, -1) - - if self.use_rel_pos: - attn = add_decomposed_rel_pos(attn, q, self.rel_pos_h, self.rel_pos_w, (H, W), (H, W)) - - attn = attn.softmax(dim=-1) - x = (attn @ v).view(B, self.num_heads, H, W, -1).permute(0, 2, 3, 1, 4).reshape(B, H, W, -1) - x = self.proj(x) - - return x - - -class ResBottleneckBlock(CNNBlockBase): - """ - The standard bottleneck residual block without the last activation layer. - It contains 3 conv layers with kernels 1x1, 3x3, 1x1. - """ - - def __init__( - self, - in_channels, - out_channels, - bottleneck_channels, - norm="LN", - act_layer=nn.GELU, - ): - """ - Args: - in_channels (int): Number of input channels. - out_channels (int): Number of output channels. - bottleneck_channels (int): number of output channels for the 3x3 - "bottleneck" conv layers. - norm (str or callable): normalization for all conv layers. - See :func:`layers.get_norm` for supported format. - act_layer (callable): activation for all conv layers. - """ - super().__init__(in_channels, out_channels, 1) - - self.conv1 = Conv2d(in_channels, bottleneck_channels, 1, bias=False) - self.norm1 = get_norm(norm, bottleneck_channels) - self.act1 = act_layer() - - self.conv2 = Conv2d( - bottleneck_channels, - bottleneck_channels, - 3, - padding=1, - bias=False, - ) - self.norm2 = get_norm(norm, bottleneck_channels) - self.act2 = act_layer() - - self.conv3 = Conv2d(bottleneck_channels, out_channels, 1, bias=False) - self.norm3 = get_norm(norm, out_channels) - - for layer in [self.conv1, self.conv2, self.conv3]: - weight_init.c2_msra_fill(layer) - for layer in [self.norm1, self.norm2]: - layer.weight.data.fill_(1.0) - layer.bias.data.zero_() - # zero init last norm layer. - self.norm3.weight.data.zero_() - self.norm3.bias.data.zero_() - - def forward(self, x): - out = x - for layer in self.children(): - out = layer(out) - - out = x + out - return out - - -class Block(nn.Module): - """Transformer blocks with support of window attention and residual propagation blocks""" - - def __init__( - self, - dim, - num_heads, - mlp_ratio=4.0, - qkv_bias=True, - drop_path=0.0, - norm_layer=nn.LayerNorm, - act_layer=nn.GELU, - use_rel_pos=False, - rel_pos_zero_init=True, - window_size=0, - use_residual_block=False, - input_size=None, - ): - """ - Args: - dim (int): Number of input channels. - num_heads (int): Number of attention heads in each ViT block. - mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. - qkv_bias (bool): If True, add a learnable bias to query, key, value. - drop_path (float): Stochastic depth rate. - norm_layer (nn.Module): Normalization layer. - act_layer (nn.Module): Activation layer. - use_rel_pos (bool): If True, add relative positional embeddings to the attention map. - rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. - window_size (int): Window size for window attention blocks. If it equals 0, then not - use window attention. - use_residual_block (bool): If True, use a residual block after the MLP block. - input_size (int or None): Input resolution for calculating the relative positional - parameter size. - """ - super().__init__() - self.norm1 = norm_layer(dim) - self.attn = Attention( - dim, - num_heads=num_heads, - qkv_bias=qkv_bias, - use_rel_pos=use_rel_pos, - rel_pos_zero_init=rel_pos_zero_init, - input_size=input_size if window_size == 0 else (window_size, window_size), - ) - - from timm.models.layers import DropPath, Mlp - - self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() - self.norm2 = norm_layer(dim) - self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio), act_layer=act_layer) - - self.window_size = window_size - - self.use_residual_block = use_residual_block - if use_residual_block: - # Use a residual block with bottleneck channel as dim // 2 - self.residual = ResBottleneckBlock( - in_channels=dim, - out_channels=dim, - bottleneck_channels=dim // 2, - norm="LN", - act_layer=act_layer, - ) - - def forward(self, x): - shortcut = x - x = self.norm1(x) - # Window partition - if self.window_size > 0: - H, W = x.shape[1], x.shape[2] - x, pad_hw = window_partition(x, self.window_size) - - x = self.attn(x) - # Reverse window partition - if self.window_size > 0: - x = window_unpartition(x, self.window_size, pad_hw, (H, W)) - - x = shortcut + self.drop_path(x) - x = x + self.drop_path(self.mlp(self.norm2(x))) - - if self.use_residual_block: - x = self.residual(x.permute(0, 3, 1, 2)).permute(0, 2, 3, 1) - - return x - - -class ViT(Backbone): - """ - This module implements Vision Transformer (ViT) backbone in :paper:`vitdet`. - "Exploring Plain Vision Transformer Backbones for Object Detection", - https://arxiv.org/abs/2203.16527 - """ - - def __init__( - self, - img_size=1024, - patch_size=16, - in_chans=3, - embed_dim=768, - depth=12, - num_heads=12, - mlp_ratio=4.0, - qkv_bias=True, - drop_path_rate=0.0, - norm_layer=nn.LayerNorm, - act_layer=nn.GELU, - use_abs_pos=True, - use_rel_pos=False, - rel_pos_zero_init=True, - window_size=0, - window_block_indexes=(), - residual_block_indexes=(), - use_act_checkpoint=False, - pretrain_img_size=224, - pretrain_use_cls_token=True, - out_feature="last_feat", - ): - """ - Args: - img_size (int): Input image size. - patch_size (int): Patch size. - in_chans (int): Number of input image channels. - embed_dim (int): Patch embedding dimension. - depth (int): Depth of ViT. - num_heads (int): Number of attention heads in each ViT block. - mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. - qkv_bias (bool): If True, add a learnable bias to query, key, value. - drop_path_rate (float): Stochastic depth rate. - norm_layer (nn.Module): Normalization layer. - act_layer (nn.Module): Activation layer. - use_abs_pos (bool): If True, use absolute positional embeddings. - use_rel_pos (bool): If True, add relative positional embeddings to the attention map. - rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. - window_size (int): Window size for window attention blocks. - window_block_indexes (list): Indexes for blocks using window attention. - residual_block_indexes (list): Indexes for blocks using conv propagation. - use_act_checkpoint (bool): If True, use activation checkpointing. - pretrain_img_size (int): input image size for pretraining models. - pretrain_use_cls_token (bool): If True, pretrainig models use class token. - out_feature (str): name of the feature from the last block. - """ - super().__init__() - self.pretrain_use_cls_token = pretrain_use_cls_token - - self.patch_embed = PatchEmbed( - kernel_size=(patch_size, patch_size), - stride=(patch_size, patch_size), - in_chans=in_chans, - embed_dim=embed_dim, - ) - - if use_abs_pos: - # Initialize absolute positional embedding with pretrain image size. - num_patches = (pretrain_img_size // patch_size) * (pretrain_img_size // patch_size) - num_positions = (num_patches + 1) if pretrain_use_cls_token else num_patches - self.pos_embed = nn.Parameter(torch.zeros(1, num_positions, embed_dim)) - else: - self.pos_embed = None - - # stochastic depth decay rule - dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] - - self.blocks = nn.ModuleList() - for i in range(depth): - block = Block( - dim=embed_dim, - num_heads=num_heads, - mlp_ratio=mlp_ratio, - qkv_bias=qkv_bias, - drop_path=dpr[i], - norm_layer=norm_layer, - act_layer=act_layer, - use_rel_pos=use_rel_pos, - rel_pos_zero_init=rel_pos_zero_init, - window_size=window_size if i in window_block_indexes else 0, - use_residual_block=i in residual_block_indexes, - input_size=(img_size // patch_size, img_size // patch_size), - ) - if use_act_checkpoint: - # TODO: use torch.utils.checkpoint - from fairscale.nn.checkpoint import checkpoint_wrapper - - block = checkpoint_wrapper(block) - self.blocks.append(block) - - self._out_feature_channels = {out_feature: embed_dim} - self._out_feature_strides = {out_feature: patch_size} - self._out_features = [out_feature] - - if self.pos_embed is not None: - nn.init.trunc_normal_(self.pos_embed, std=0.02) - - self.apply(self._init_weights) - - def _init_weights(self, m): - if isinstance(m, nn.Linear): - nn.init.trunc_normal_(m.weight, std=0.02) - if isinstance(m, nn.Linear) and m.bias is not None: - nn.init.constant_(m.bias, 0) - elif isinstance(m, nn.LayerNorm): - nn.init.constant_(m.bias, 0) - nn.init.constant_(m.weight, 1.0) - - def forward(self, x): - x = self.patch_embed(x) - if self.pos_embed is not None: - x = x + get_abs_pos( - self.pos_embed, self.pretrain_use_cls_token, (x.shape[1], x.shape[2]) - ) - - for blk in self.blocks: - x = blk(x) - - outputs = {self._out_features[0]: x.permute(0, 3, 1, 2)} - return outputs - - -class SimpleFeaturePyramid(Backbone): - """ - This module implements SimpleFeaturePyramid in :paper:`vitdet`. - It creates pyramid features built on top of the input feature map. - """ - - def __init__( - self, - net, - in_feature, - out_channels, - scale_factors, - top_block=None, - norm="LN", - square_pad=0, - ): - """ - Args: - net (Backbone): module representing the subnetwork backbone. - Must be a subclass of :class:`Backbone`. - in_feature (str): names of the input feature maps coming - from the net. - out_channels (int): number of channels in the output feature maps. - scale_factors (list[float]): list of scaling factors to upsample or downsample - the input features for creating pyramid features. - top_block (nn.Module or None): if provided, an extra operation will - be performed on the output of the last (smallest resolution) - pyramid output, and the result will extend the result list. The top_block - further downsamples the feature map. It must have an attribute - "num_levels", meaning the number of extra pyramid levels added by - this block, and "in_feature", which is a string representing - its input feature (e.g., p5). - norm (str): the normalization to use. - square_pad (int): If > 0, require input images to be padded to specific square size. - """ - super(SimpleFeaturePyramid, self).__init__() - assert isinstance(net, Backbone) - - self.scale_factors = scale_factors - - input_shapes = net.output_shape() - strides = [int(input_shapes[in_feature].stride / scale) for scale in scale_factors] - _assert_strides_are_log2_contiguous(strides) - - dim = input_shapes[in_feature].channels - self.stages = [] - use_bias = norm == "" - for idx, scale in enumerate(scale_factors): - out_dim = dim - if scale == 4.0: - layers = [ - nn.ConvTranspose2d(dim, dim // 2, kernel_size=2, stride=2), - get_norm(norm, dim // 2), - nn.GELU(), - nn.ConvTranspose2d(dim // 2, dim // 4, kernel_size=2, stride=2), - ] - out_dim = dim // 4 - elif scale == 2.0: - layers = [nn.ConvTranspose2d(dim, dim // 2, kernel_size=2, stride=2)] - out_dim = dim // 2 - elif scale == 1.0: - layers = [] - elif scale == 0.5: - layers = [nn.MaxPool2d(kernel_size=2, stride=2)] - else: - raise NotImplementedError(f"scale_factor={scale} is not supported yet.") - - layers.extend( - [ - Conv2d( - out_dim, - out_channels, - kernel_size=1, - bias=use_bias, - norm=get_norm(norm, out_channels), - ), - Conv2d( - out_channels, - out_channels, - kernel_size=3, - padding=1, - bias=use_bias, - norm=get_norm(norm, out_channels), - ), - ] - ) - layers = nn.Sequential(*layers) - - stage = int(math.log2(strides[idx])) - self.add_module(f"simfp_{stage}", layers) - self.stages.append(layers) - - self.net = net - self.in_feature = in_feature - self.top_block = top_block - # Return feature names are "p", like ["p2", "p3", ..., "p6"] - self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in strides} - # top block output feature maps. - if self.top_block is not None: - for s in range(stage, stage + self.top_block.num_levels): - self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1) - - self._out_features = list(self._out_feature_strides.keys()) - self._out_feature_channels = {k: out_channels for k in self._out_features} - self._size_divisibility = strides[-1] - self._square_pad = square_pad - - @property - def padding_constraints(self): - return { - "size_divisiblity": self._size_divisibility, - "square_size": self._square_pad, - } - - def forward(self, x): - """ - Args: - x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``. - - Returns: - dict[str->Tensor]: - mapping from feature map name to pyramid feature map tensor - in high to low resolution order. Returned feature names follow the FPN - convention: "p", where stage has stride = 2 ** stage e.g., - ["p2", "p3", ..., "p6"]. - """ - bottom_up_features = self.net(x) - features = bottom_up_features[self.in_feature] - results = [] - - for stage in self.stages: - results.append(stage(features)) - - if self.top_block is not None: - if self.top_block.in_feature in bottom_up_features: - top_block_in_feature = bottom_up_features[self.top_block.in_feature] - else: - top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)] - results.extend(self.top_block(top_block_in_feature)) - assert len(self._out_features) == len(results) - return {f: res for f, res in zip(self._out_features, results)} - - -def get_vit_lr_decay_rate(name, lr_decay_rate=1.0, num_layers=12): - """ - Calculate lr decay rate for different ViT blocks. - Args: - name (string): parameter name. - lr_decay_rate (float): base lr decay rate. - num_layers (int): number of ViT blocks. - - Returns: - lr decay rate for the given parameter. - """ - layer_id = num_layers + 1 - if name.startswith("backbone"): - if ".pos_embed" in name or ".patch_embed" in name: - layer_id = 0 - elif ".blocks." in name and ".residual." not in name: - layer_id = int(name[name.find(".blocks.") :].split(".")[2]) + 1 - - return lr_decay_rate ** (num_layers + 1 - layer_id) diff --git a/detectron2/detectron2/modeling/box_regression.py b/detectron2/detectron2/modeling/box_regression.py deleted file mode 100644 index 290248e0685bfa332101f0454ba03688652dfd91..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/box_regression.py +++ /dev/null @@ -1,369 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import math -from typing import List, Tuple, Union -import torch -from fvcore.nn import giou_loss, smooth_l1_loss -from torch.nn import functional as F - -from detectron2.layers import cat, ciou_loss, diou_loss -from detectron2.structures import Boxes - -# Value for clamping large dw and dh predictions. The heuristic is that we clamp -# such that dw and dh are no larger than what would transform a 16px box into a -# 1000px box (based on a small anchor, 16px, and a typical image size, 1000px). -_DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16) - - -__all__ = ["Box2BoxTransform", "Box2BoxTransformRotated", "Box2BoxTransformLinear"] - - -@torch.jit.script -class Box2BoxTransform: - """ - The box-to-box transform defined in R-CNN. The transformation is parameterized - by 4 deltas: (dx, dy, dw, dh). The transformation scales the box's width and height - by exp(dw), exp(dh) and shifts a box's center by the offset (dx * width, dy * height). - """ - - def __init__( - self, weights: Tuple[float, float, float, float], scale_clamp: float = _DEFAULT_SCALE_CLAMP - ): - """ - Args: - weights (4-element tuple): Scaling factors that are applied to the - (dx, dy, dw, dh) deltas. In Fast R-CNN, these were originally set - such that the deltas have unit variance; now they are treated as - hyperparameters of the system. - scale_clamp (float): When predicting deltas, the predicted box scaling - factors (dw and dh) are clamped such that they are <= scale_clamp. - """ - self.weights = weights - self.scale_clamp = scale_clamp - - def get_deltas(self, src_boxes, target_boxes): - """ - Get box regression transformation deltas (dx, dy, dw, dh) that can be used - to transform the `src_boxes` into the `target_boxes`. That is, the relation - ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless - any delta is too large and is clamped). - - Args: - src_boxes (Tensor): source boxes, e.g., object proposals - target_boxes (Tensor): target of the transformation, e.g., ground-truth - boxes. - """ - assert isinstance(src_boxes, torch.Tensor), type(src_boxes) - assert isinstance(target_boxes, torch.Tensor), type(target_boxes) - - src_widths = src_boxes[:, 2] - src_boxes[:, 0] - src_heights = src_boxes[:, 3] - src_boxes[:, 1] - src_ctr_x = src_boxes[:, 0] + 0.5 * src_widths - src_ctr_y = src_boxes[:, 1] + 0.5 * src_heights - - target_widths = target_boxes[:, 2] - target_boxes[:, 0] - target_heights = target_boxes[:, 3] - target_boxes[:, 1] - target_ctr_x = target_boxes[:, 0] + 0.5 * target_widths - target_ctr_y = target_boxes[:, 1] + 0.5 * target_heights - - wx, wy, ww, wh = self.weights - dx = wx * (target_ctr_x - src_ctr_x) / src_widths - dy = wy * (target_ctr_y - src_ctr_y) / src_heights - dw = ww * torch.log(target_widths / src_widths) - dh = wh * torch.log(target_heights / src_heights) - - deltas = torch.stack((dx, dy, dw, dh), dim=1) - assert (src_widths > 0).all().item(), "Input boxes to Box2BoxTransform are not valid!" - return deltas - - def apply_deltas(self, deltas, boxes): - """ - Apply transformation `deltas` (dx, dy, dw, dh) to `boxes`. - - Args: - deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1. - deltas[i] represents k potentially different class-specific - box transformations for the single box boxes[i]. - boxes (Tensor): boxes to transform, of shape (N, 4) - """ - deltas = deltas.float() # ensure fp32 for decoding precision - boxes = boxes.to(deltas.dtype) - - widths = boxes[:, 2] - boxes[:, 0] - heights = boxes[:, 3] - boxes[:, 1] - ctr_x = boxes[:, 0] + 0.5 * widths - ctr_y = boxes[:, 1] + 0.5 * heights - - wx, wy, ww, wh = self.weights - dx = deltas[:, 0::4] / wx - dy = deltas[:, 1::4] / wy - dw = deltas[:, 2::4] / ww - dh = deltas[:, 3::4] / wh - - # Prevent sending too large values into torch.exp() - dw = torch.clamp(dw, max=self.scale_clamp) - dh = torch.clamp(dh, max=self.scale_clamp) - - pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] - pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] - pred_w = torch.exp(dw) * widths[:, None] - pred_h = torch.exp(dh) * heights[:, None] - - x1 = pred_ctr_x - 0.5 * pred_w - y1 = pred_ctr_y - 0.5 * pred_h - x2 = pred_ctr_x + 0.5 * pred_w - y2 = pred_ctr_y + 0.5 * pred_h - pred_boxes = torch.stack((x1, y1, x2, y2), dim=-1) - return pred_boxes.reshape(deltas.shape) - - -@torch.jit.script -class Box2BoxTransformRotated: - """ - The box-to-box transform defined in Rotated R-CNN. The transformation is parameterized - by 5 deltas: (dx, dy, dw, dh, da). The transformation scales the box's width and height - by exp(dw), exp(dh), shifts a box's center by the offset (dx * width, dy * height), - and rotate a box's angle by da (radians). - Note: angles of deltas are in radians while angles of boxes are in degrees. - """ - - def __init__( - self, - weights: Tuple[float, float, float, float, float], - scale_clamp: float = _DEFAULT_SCALE_CLAMP, - ): - """ - Args: - weights (5-element tuple): Scaling factors that are applied to the - (dx, dy, dw, dh, da) deltas. These are treated as - hyperparameters of the system. - scale_clamp (float): When predicting deltas, the predicted box scaling - factors (dw and dh) are clamped such that they are <= scale_clamp. - """ - self.weights = weights - self.scale_clamp = scale_clamp - - def get_deltas(self, src_boxes, target_boxes): - """ - Get box regression transformation deltas (dx, dy, dw, dh, da) that can be used - to transform the `src_boxes` into the `target_boxes`. That is, the relation - ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless - any delta is too large and is clamped). - - Args: - src_boxes (Tensor): Nx5 source boxes, e.g., object proposals - target_boxes (Tensor): Nx5 target of the transformation, e.g., ground-truth - boxes. - """ - assert isinstance(src_boxes, torch.Tensor), type(src_boxes) - assert isinstance(target_boxes, torch.Tensor), type(target_boxes) - - src_ctr_x, src_ctr_y, src_widths, src_heights, src_angles = torch.unbind(src_boxes, dim=1) - - target_ctr_x, target_ctr_y, target_widths, target_heights, target_angles = torch.unbind( - target_boxes, dim=1 - ) - - wx, wy, ww, wh, wa = self.weights - dx = wx * (target_ctr_x - src_ctr_x) / src_widths - dy = wy * (target_ctr_y - src_ctr_y) / src_heights - dw = ww * torch.log(target_widths / src_widths) - dh = wh * torch.log(target_heights / src_heights) - # Angles of deltas are in radians while angles of boxes are in degrees. - # the conversion to radians serve as a way to normalize the values - da = target_angles - src_angles - da = (da + 180.0) % 360.0 - 180.0 # make it in [-180, 180) - da *= wa * math.pi / 180.0 - - deltas = torch.stack((dx, dy, dw, dh, da), dim=1) - assert ( - (src_widths > 0).all().item() - ), "Input boxes to Box2BoxTransformRotated are not valid!" - return deltas - - def apply_deltas(self, deltas, boxes): - """ - Apply transformation `deltas` (dx, dy, dw, dh, da) to `boxes`. - - Args: - deltas (Tensor): transformation deltas of shape (N, k*5). - deltas[i] represents box transformation for the single box boxes[i]. - boxes (Tensor): boxes to transform, of shape (N, 5) - """ - assert deltas.shape[1] % 5 == 0 and boxes.shape[1] == 5 - - boxes = boxes.to(deltas.dtype).unsqueeze(2) - - ctr_x = boxes[:, 0] - ctr_y = boxes[:, 1] - widths = boxes[:, 2] - heights = boxes[:, 3] - angles = boxes[:, 4] - - wx, wy, ww, wh, wa = self.weights - - dx = deltas[:, 0::5] / wx - dy = deltas[:, 1::5] / wy - dw = deltas[:, 2::5] / ww - dh = deltas[:, 3::5] / wh - da = deltas[:, 4::5] / wa - - # Prevent sending too large values into torch.exp() - dw = torch.clamp(dw, max=self.scale_clamp) - dh = torch.clamp(dh, max=self.scale_clamp) - - pred_boxes = torch.zeros_like(deltas) - pred_boxes[:, 0::5] = dx * widths + ctr_x # x_ctr - pred_boxes[:, 1::5] = dy * heights + ctr_y # y_ctr - pred_boxes[:, 2::5] = torch.exp(dw) * widths # width - pred_boxes[:, 3::5] = torch.exp(dh) * heights # height - - # Following original RRPN implementation, - # angles of deltas are in radians while angles of boxes are in degrees. - pred_angle = da * 180.0 / math.pi + angles - pred_angle = (pred_angle + 180.0) % 360.0 - 180.0 # make it in [-180, 180) - - pred_boxes[:, 4::5] = pred_angle - - return pred_boxes - - -class Box2BoxTransformLinear: - """ - The linear box-to-box transform defined in FCOS. The transformation is parameterized - by the distance from the center of (square) src box to 4 edges of the target box. - """ - - def __init__(self, normalize_by_size=True): - """ - Args: - normalize_by_size: normalize deltas by the size of src (anchor) boxes. - """ - self.normalize_by_size = normalize_by_size - - def get_deltas(self, src_boxes, target_boxes): - """ - Get box regression transformation deltas (dx1, dy1, dx2, dy2) that can be used - to transform the `src_boxes` into the `target_boxes`. That is, the relation - ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true. - The center of src must be inside target boxes. - - Args: - src_boxes (Tensor): square source boxes, e.g., anchors - target_boxes (Tensor): target of the transformation, e.g., ground-truth - boxes. - """ - assert isinstance(src_boxes, torch.Tensor), type(src_boxes) - assert isinstance(target_boxes, torch.Tensor), type(target_boxes) - - src_ctr_x = 0.5 * (src_boxes[:, 0] + src_boxes[:, 2]) - src_ctr_y = 0.5 * (src_boxes[:, 1] + src_boxes[:, 3]) - - target_l = src_ctr_x - target_boxes[:, 0] - target_t = src_ctr_y - target_boxes[:, 1] - target_r = target_boxes[:, 2] - src_ctr_x - target_b = target_boxes[:, 3] - src_ctr_y - - deltas = torch.stack((target_l, target_t, target_r, target_b), dim=1) - if self.normalize_by_size: - stride_w = src_boxes[:, 2] - src_boxes[:, 0] - stride_h = src_boxes[:, 3] - src_boxes[:, 1] - strides = torch.stack([stride_w, stride_h, stride_w, stride_h], axis=1) - deltas = deltas / strides - - return deltas - - def apply_deltas(self, deltas, boxes): - """ - Apply transformation `deltas` (dx1, dy1, dx2, dy2) to `boxes`. - - Args: - deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1. - deltas[i] represents k potentially different class-specific - box transformations for the single box boxes[i]. - boxes (Tensor): boxes to transform, of shape (N, 4) - """ - # Ensure the output is a valid box. See Sec 2.1 of https://arxiv.org/abs/2006.09214 - deltas = F.relu(deltas) - boxes = boxes.to(deltas.dtype) - - ctr_x = 0.5 * (boxes[:, 0] + boxes[:, 2]) - ctr_y = 0.5 * (boxes[:, 1] + boxes[:, 3]) - if self.normalize_by_size: - stride_w = boxes[:, 2] - boxes[:, 0] - stride_h = boxes[:, 3] - boxes[:, 1] - strides = torch.stack([stride_w, stride_h, stride_w, stride_h], axis=1) - deltas = deltas * strides - - l = deltas[:, 0::4] - t = deltas[:, 1::4] - r = deltas[:, 2::4] - b = deltas[:, 3::4] - - pred_boxes = torch.zeros_like(deltas) - pred_boxes[:, 0::4] = ctr_x[:, None] - l # x1 - pred_boxes[:, 1::4] = ctr_y[:, None] - t # y1 - pred_boxes[:, 2::4] = ctr_x[:, None] + r # x2 - pred_boxes[:, 3::4] = ctr_y[:, None] + b # y2 - return pred_boxes - - -def _dense_box_regression_loss( - anchors: List[Union[Boxes, torch.Tensor]], - box2box_transform: Box2BoxTransform, - pred_anchor_deltas: List[torch.Tensor], - gt_boxes: List[torch.Tensor], - fg_mask: torch.Tensor, - box_reg_loss_type="smooth_l1", - smooth_l1_beta=0.0, -): - """ - Compute loss for dense multi-level box regression. - Loss is accumulated over ``fg_mask``. - - Args: - anchors: #lvl anchor boxes, each is (HixWixA, 4) - pred_anchor_deltas: #lvl predictions, each is (N, HixWixA, 4) - gt_boxes: N ground truth boxes, each has shape (R, 4) (R = sum(Hi * Wi * A)) - fg_mask: the foreground boolean mask of shape (N, R) to compute loss on - box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou", - "diou", "ciou". - smooth_l1_beta (float): beta parameter for the smooth L1 regression loss. Default to - use L1 loss. Only used when `box_reg_loss_type` is "smooth_l1" - """ - if isinstance(anchors[0], Boxes): - anchors = type(anchors[0]).cat(anchors).tensor # (R, 4) - else: - anchors = cat(anchors) - if box_reg_loss_type == "smooth_l1": - gt_anchor_deltas = [box2box_transform.get_deltas(anchors, k) for k in gt_boxes] - gt_anchor_deltas = torch.stack(gt_anchor_deltas) # (N, R, 4) - loss_box_reg = smooth_l1_loss( - cat(pred_anchor_deltas, dim=1)[fg_mask], - gt_anchor_deltas[fg_mask], - beta=smooth_l1_beta, - reduction="sum", - ) - elif box_reg_loss_type == "giou": - pred_boxes = [ - box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1) - ] - loss_box_reg = giou_loss( - torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum" - ) - elif box_reg_loss_type == "diou": - pred_boxes = [ - box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1) - ] - loss_box_reg = diou_loss( - torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum" - ) - elif box_reg_loss_type == "ciou": - pred_boxes = [ - box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1) - ] - loss_box_reg = ciou_loss( - torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum" - ) - else: - raise ValueError(f"Invalid dense box regression loss type '{box_reg_loss_type}'") - return loss_box_reg diff --git a/detectron2/detectron2/modeling/matcher.py b/detectron2/detectron2/modeling/matcher.py deleted file mode 100644 index 49660fb96a54f059e2e66bbcaec47732bea84501..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/matcher.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from typing import List -import torch - -from detectron2.layers import nonzero_tuple - - -# TODO: the name is too general -class Matcher: - """ - This class assigns to each predicted "element" (e.g., a box) a ground-truth - element. Each predicted element will have exactly zero or one matches; each - ground-truth element may be matched to zero or more predicted elements. - - The matching is determined by the MxN match_quality_matrix, that characterizes - how well each (ground-truth, prediction)-pair match each other. For example, - if the elements are boxes, this matrix may contain box intersection-over-union - overlap values. - - The matcher returns (a) a vector of length N containing the index of the - ground-truth element m in [0, M) that matches to prediction n in [0, N). - (b) a vector of length N containing the labels for each prediction. - """ - - def __init__( - self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False - ): - """ - Args: - thresholds (list): a list of thresholds used to stratify predictions - into levels. - labels (list): a list of values to label predictions belonging at - each level. A label can be one of {-1, 0, 1} signifying - {ignore, negative class, positive class}, respectively. - allow_low_quality_matches (bool): if True, produce additional matches - for predictions with maximum match quality lower than high_threshold. - See set_low_quality_matches_ for more details. - - For example, - thresholds = [0.3, 0.5] - labels = [0, -1, 1] - All predictions with iou < 0.3 will be marked with 0 and - thus will be considered as false positives while training. - All predictions with 0.3 <= iou < 0.5 will be marked with -1 and - thus will be ignored. - All predictions with 0.5 <= iou will be marked with 1 and - thus will be considered as true positives. - """ - # Add -inf and +inf to first and last position in thresholds - thresholds = thresholds[:] - assert thresholds[0] > 0 - thresholds.insert(0, -float("inf")) - thresholds.append(float("inf")) - # Currently torchscript does not support all + generator - assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])]) - assert all([l in [-1, 0, 1] for l in labels]) - assert len(labels) == len(thresholds) - 1 - self.thresholds = thresholds - self.labels = labels - self.allow_low_quality_matches = allow_low_quality_matches - - def __call__(self, match_quality_matrix): - """ - Args: - match_quality_matrix (Tensor[float]): an MxN tensor, containing the - pairwise quality between M ground-truth elements and N predicted - elements. All elements must be >= 0 (due to the us of `torch.nonzero` - for selecting indices in :meth:`set_low_quality_matches_`). - - Returns: - matches (Tensor[int64]): a vector of length N, where matches[i] is a matched - ground-truth index in [0, M) - match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates - whether a prediction is a true or false positive or ignored - """ - assert match_quality_matrix.dim() == 2 - if match_quality_matrix.numel() == 0: - default_matches = match_quality_matrix.new_full( - (match_quality_matrix.size(1),), 0, dtype=torch.int64 - ) - # When no gt boxes exist, we define IOU = 0 and therefore set labels - # to `self.labels[0]`, which usually defaults to background class 0 - # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds - default_match_labels = match_quality_matrix.new_full( - (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8 - ) - return default_matches, default_match_labels - - assert torch.all(match_quality_matrix >= 0) - - # match_quality_matrix is M (gt) x N (predicted) - # Max over gt elements (dim 0) to find best gt candidate for each prediction - matched_vals, matches = match_quality_matrix.max(dim=0) - - match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8) - - for l, low, high in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): - low_high = (matched_vals >= low) & (matched_vals < high) - match_labels[low_high] = l - - if self.allow_low_quality_matches: - self.set_low_quality_matches_(match_labels, match_quality_matrix) - - return matches, match_labels - - def set_low_quality_matches_(self, match_labels, match_quality_matrix): - """ - Produce additional matches for predictions that have only low-quality matches. - Specifically, for each ground-truth G find the set of predictions that have - maximum overlap with it (including ties); for each prediction in that set, if - it is unmatched, then match it to the ground-truth G. - - This function implements the RPN assignment case (i) in Sec. 3.1.2 of - :paper:`Faster R-CNN`. - """ - # For each gt, find the prediction with which it has highest quality - highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) - # Find the highest quality match available, even if it is low, including ties. - # Note that the matches qualities must be positive due to the use of - # `torch.nonzero`. - _, pred_inds_with_highest_quality = nonzero_tuple( - match_quality_matrix == highest_quality_foreach_gt[:, None] - ) - # If an anchor was labeled positive only due to a low-quality match - # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B. - # This follows the implementation in Detectron, and is found to have no significant impact. - match_labels[pred_inds_with_highest_quality] = 1 diff --git a/detectron2/detectron2/modeling/meta_arch/__init__.py b/detectron2/detectron2/modeling/meta_arch/__init__.py deleted file mode 100644 index 6b0668157052ce7b796ef50bc7ee85361e7605b9..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/meta_arch/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -from .build import META_ARCH_REGISTRY, build_model # isort:skip - -from .panoptic_fpn import PanopticFPN - -# import all the meta_arch, so they will be registered -from .rcnn import GeneralizedRCNN, ProposalNetwork -from .dense_detector import DenseDetector -from .retinanet import RetinaNet -from .fcos import FCOS -from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head - - -__all__ = list(globals().keys()) diff --git a/detectron2/detectron2/modeling/meta_arch/build.py b/detectron2/detectron2/modeling/meta_arch/build.py deleted file mode 100644 index 3427215746c9a146bd902f22ea9b26d121c36b27..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/meta_arch/build.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import torch - -from detectron2.utils.logger import _log_api_usage -from detectron2.utils.registry import Registry - -META_ARCH_REGISTRY = Registry("META_ARCH") # noqa F401 isort:skip -META_ARCH_REGISTRY.__doc__ = """ -Registry for meta-architectures, i.e. the whole model. - -The registered object will be called with `obj(cfg)` -and expected to return a `nn.Module` object. -""" - - -def build_model(cfg): - """ - Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``. - Note that it does not load any weights from ``cfg``. - """ - meta_arch = cfg.MODEL.META_ARCHITECTURE - model = META_ARCH_REGISTRY.get(meta_arch)(cfg) - model.to(torch.device(cfg.MODEL.DEVICE)) - _log_api_usage("modeling.meta_arch." + meta_arch) - return model diff --git a/detectron2/detectron2/modeling/meta_arch/dense_detector.py b/detectron2/detectron2/modeling/meta_arch/dense_detector.py deleted file mode 100644 index 74456f95666b89e5507627427bf1b3b8aea9c69f..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/meta_arch/dense_detector.py +++ /dev/null @@ -1,294 +0,0 @@ -import numpy as np -from typing import Dict, List, Optional, Tuple -import torch -from torch import Tensor, nn - -from detectron2.data.detection_utils import convert_image_to_rgb -from detectron2.layers import move_device_like -from detectron2.modeling import Backbone -from detectron2.structures import Boxes, ImageList, Instances -from detectron2.utils.events import get_event_storage - -from ..postprocessing import detector_postprocess - - -def permute_to_N_HWA_K(tensor, K: int): - """ - Transpose/reshape a tensor from (N, (Ai x K), H, W) to (N, (HxWxAi), K) - """ - assert tensor.dim() == 4, tensor.shape - N, _, H, W = tensor.shape - tensor = tensor.view(N, -1, K, H, W) - tensor = tensor.permute(0, 3, 4, 1, 2) - tensor = tensor.reshape(N, -1, K) # Size=(N,HWA,K) - return tensor - - -class DenseDetector(nn.Module): - """ - Base class for dense detector. We define a dense detector as a fully-convolutional model that - makes per-pixel (i.e. dense) predictions. - """ - - def __init__( - self, - backbone: Backbone, - head: nn.Module, - head_in_features: Optional[List[str]] = None, - *, - pixel_mean, - pixel_std, - ): - """ - Args: - backbone: backbone module - head: head module - head_in_features: backbone features to use in head. Default to all backbone features. - pixel_mean (Tuple[float]): - Values to be used for image normalization (BGR order). - To train on images of different number of channels, set different mean & std. - Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675] - pixel_std (Tuple[float]): - When using pre-trained models in Detectron1 or any MSRA models, - std has been absorbed into its conv1 weights, so the std needs to be set 1. - Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std) - """ - super().__init__() - - self.backbone = backbone - self.head = head - if head_in_features is None: - shapes = self.backbone.output_shape() - self.head_in_features = sorted(shapes.keys(), key=lambda x: shapes[x].stride) - else: - self.head_in_features = head_in_features - self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) - self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) - - @property - def device(self): - return self.pixel_mean.device - - def _move_to_current_device(self, x): - return move_device_like(x, self.pixel_mean) - - def forward(self, batched_inputs: List[Dict[str, Tensor]]): - """ - Args: - batched_inputs: a list, batched outputs of :class:`DatasetMapper` . - Each item in the list contains the inputs for one image. - For now, each item in the list is a dict that contains: - - * image: Tensor, image in (C, H, W) format. - * instances: Instances - - Other information that's included in the original dicts, such as: - - * "height", "width" (int): the output resolution of the model, used in inference. - See :meth:`postprocess` for details. - - Returns: - In training, dict[str, Tensor]: mapping from a named loss to a tensor storing the - loss. Used during training only. In inference, the standard output format, described - in :doc:`/tutorials/models`. - """ - images = self.preprocess_image(batched_inputs) - features = self.backbone(images.tensor) - features = [features[f] for f in self.head_in_features] - predictions = self.head(features) - - if self.training: - assert not torch.jit.is_scripting(), "Not supported" - assert "instances" in batched_inputs[0], "Instance annotations are missing in training!" - gt_instances = [x["instances"].to(self.device) for x in batched_inputs] - return self.forward_training(images, features, predictions, gt_instances) - else: - results = self.forward_inference(images, features, predictions) - if torch.jit.is_scripting(): - return results - - processed_results = [] - for results_per_image, input_per_image, image_size in zip( - results, batched_inputs, images.image_sizes - ): - height = input_per_image.get("height", image_size[0]) - width = input_per_image.get("width", image_size[1]) - r = detector_postprocess(results_per_image, height, width) - processed_results.append({"instances": r}) - return processed_results - - def forward_training(self, images, features, predictions, gt_instances): - raise NotImplementedError() - - def preprocess_image(self, batched_inputs: List[Dict[str, Tensor]]): - """ - Normalize, pad and batch the input images. - """ - images = [self._move_to_current_device(x["image"]) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors( - images, - self.backbone.size_divisibility, - padding_constraints=self.backbone.padding_constraints, - ) - return images - - def _transpose_dense_predictions( - self, predictions: List[List[Tensor]], dims_per_anchor: List[int] - ) -> List[List[Tensor]]: - """ - Transpose the dense per-level predictions. - - Args: - predictions: a list of outputs, each is a list of per-level - predictions with shape (N, Ai x K, Hi, Wi), where N is the - number of images, Ai is the number of anchors per location on - level i, K is the dimension of predictions per anchor. - dims_per_anchor: the value of K for each predictions. e.g. 4 for - box prediction, #classes for classification prediction. - - Returns: - List[List[Tensor]]: each prediction is transposed to (N, Hi x Wi x Ai, K). - """ - assert len(predictions) == len(dims_per_anchor) - res: List[List[Tensor]] = [] - for pred, dim_per_anchor in zip(predictions, dims_per_anchor): - pred = [permute_to_N_HWA_K(x, dim_per_anchor) for x in pred] - res.append(pred) - return res - - def _ema_update(self, name: str, value: float, initial_value: float, momentum: float = 0.9): - """ - Apply EMA update to `self.name` using `value`. - - This is mainly used for loss normalizer. In Detectron1, loss is normalized by number - of foreground samples in the batch. When batch size is 1 per GPU, #foreground has a - large variance and using it lead to lower performance. Therefore we maintain an EMA of - #foreground to stabilize the normalizer. - - Args: - name: name of the normalizer - value: the new value to update - initial_value: the initial value to start with - momentum: momentum of EMA - - Returns: - float: the updated EMA value - """ - if hasattr(self, name): - old = getattr(self, name) - else: - old = initial_value - new = old * momentum + value * (1 - momentum) - setattr(self, name, new) - return new - - def _decode_per_level_predictions( - self, - anchors: Boxes, - pred_scores: Tensor, - pred_deltas: Tensor, - score_thresh: float, - topk_candidates: int, - image_size: Tuple[int, int], - ) -> Instances: - """ - Decode boxes and classification predictions of one featuer level, by - the following steps: - 1. filter the predictions based on score threshold and top K scores. - 2. transform the box regression outputs - 3. return the predicted scores, classes and boxes - - Args: - anchors: Boxes, anchor for this feature level - pred_scores: HxWxA,K - pred_deltas: HxWxA,4 - - Returns: - Instances: with field "scores", "pred_boxes", "pred_classes". - """ - # Apply two filtering to make NMS faster. - # 1. Keep boxes with confidence score higher than threshold - keep_idxs = pred_scores > score_thresh - pred_scores = pred_scores[keep_idxs] - topk_idxs = torch.nonzero(keep_idxs) # Kx2 - - # 2. Keep top k top scoring boxes only - topk_idxs_size = topk_idxs.shape[0] - if isinstance(topk_idxs_size, Tensor): - # It's a tensor in tracing - num_topk = torch.clamp(topk_idxs_size, max=topk_candidates) - else: - num_topk = min(topk_idxs_size, topk_candidates) - pred_scores, idxs = pred_scores.topk(num_topk) - topk_idxs = topk_idxs[idxs] - - anchor_idxs, classes_idxs = topk_idxs.unbind(dim=1) - - pred_boxes = self.box2box_transform.apply_deltas( - pred_deltas[anchor_idxs], anchors.tensor[anchor_idxs] - ) - return Instances( - image_size, pred_boxes=Boxes(pred_boxes), scores=pred_scores, pred_classes=classes_idxs - ) - - def _decode_multi_level_predictions( - self, - anchors: List[Boxes], - pred_scores: List[Tensor], - pred_deltas: List[Tensor], - score_thresh: float, - topk_candidates: int, - image_size: Tuple[int, int], - ) -> Instances: - """ - Run `_decode_per_level_predictions` for all feature levels and concat the results. - """ - predictions = [ - self._decode_per_level_predictions( - anchors_i, - box_cls_i, - box_reg_i, - score_thresh, - topk_candidates, - image_size, - ) - # Iterate over every feature level - for box_cls_i, box_reg_i, anchors_i in zip(pred_scores, pred_deltas, anchors) - ] - return predictions[0].cat(predictions) # 'Instances.cat' is not scriptale but this is - - def visualize_training(self, batched_inputs, results): - """ - A function used to visualize ground truth images and final network predictions. - It shows ground truth bounding boxes on the original image and up to 20 - predicted object bounding boxes on the original image. - - Args: - batched_inputs (list): a list that contains input to the model. - results (List[Instances]): a list of #images elements returned by forward_inference(). - """ - from detectron2.utils.visualizer import Visualizer - - assert len(batched_inputs) == len( - results - ), "Cannot visualize inputs and results of different sizes" - storage = get_event_storage() - max_boxes = 20 - - image_index = 0 # only visualize a single image - img = batched_inputs[image_index]["image"] - img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format) - v_gt = Visualizer(img, None) - v_gt = v_gt.overlay_instances(boxes=batched_inputs[image_index]["instances"].gt_boxes) - anno_img = v_gt.get_image() - processed_results = detector_postprocess(results[image_index], img.shape[0], img.shape[1]) - predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu().numpy() - - v_pred = Visualizer(img, None) - v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes]) - prop_img = v_pred.get_image() - vis_img = np.vstack((anno_img, prop_img)) - vis_img = vis_img.transpose(2, 0, 1) - vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results" - storage.put_image(vis_name, vis_img) diff --git a/detectron2/detectron2/modeling/meta_arch/fcos.py b/detectron2/detectron2/modeling/meta_arch/fcos.py deleted file mode 100644 index 7e7140bfa04a8e8bb199a800805cbaf22fdd8f32..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/meta_arch/fcos.py +++ /dev/null @@ -1,328 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import logging -from typing import List, Optional, Tuple -import torch -from fvcore.nn import sigmoid_focal_loss_jit -from torch import nn -from torch.nn import functional as F - -from detectron2.layers import ShapeSpec, batched_nms -from detectron2.structures import Boxes, ImageList, Instances, pairwise_point_box_distance -from detectron2.utils.events import get_event_storage - -from ..anchor_generator import DefaultAnchorGenerator -from ..backbone import Backbone -from ..box_regression import Box2BoxTransformLinear, _dense_box_regression_loss -from .dense_detector import DenseDetector -from .retinanet import RetinaNetHead - -__all__ = ["FCOS"] - -logger = logging.getLogger(__name__) - - -class FCOS(DenseDetector): - """ - Implement FCOS in :paper:`fcos`. - """ - - def __init__( - self, - *, - backbone: Backbone, - head: nn.Module, - head_in_features: Optional[List[str]] = None, - box2box_transform=None, - num_classes, - center_sampling_radius: float = 1.5, - focal_loss_alpha=0.25, - focal_loss_gamma=2.0, - test_score_thresh=0.2, - test_topk_candidates=1000, - test_nms_thresh=0.6, - max_detections_per_image=100, - pixel_mean, - pixel_std, - ): - """ - Args: - center_sampling_radius: radius of the "center" of a groundtruth box, - within which all anchor points are labeled positive. - Other arguments mean the same as in :class:`RetinaNet`. - """ - super().__init__( - backbone, head, head_in_features, pixel_mean=pixel_mean, pixel_std=pixel_std - ) - - self.num_classes = num_classes - - # FCOS uses one anchor point per location. - # We represent the anchor point by a box whose size equals the anchor stride. - feature_shapes = backbone.output_shape() - fpn_strides = [feature_shapes[k].stride for k in self.head_in_features] - self.anchor_generator = DefaultAnchorGenerator( - sizes=[[k] for k in fpn_strides], aspect_ratios=[1.0], strides=fpn_strides - ) - - # FCOS parameterizes box regression by a linear transform, - # where predictions are normalized by anchor stride (equal to anchor size). - if box2box_transform is None: - box2box_transform = Box2BoxTransformLinear(normalize_by_size=True) - self.box2box_transform = box2box_transform - - self.center_sampling_radius = float(center_sampling_radius) - - # Loss parameters: - self.focal_loss_alpha = focal_loss_alpha - self.focal_loss_gamma = focal_loss_gamma - - # Inference parameters: - self.test_score_thresh = test_score_thresh - self.test_topk_candidates = test_topk_candidates - self.test_nms_thresh = test_nms_thresh - self.max_detections_per_image = max_detections_per_image - - def forward_training(self, images, features, predictions, gt_instances): - # Transpose the Hi*Wi*A dimension to the middle: - pred_logits, pred_anchor_deltas, pred_centerness = self._transpose_dense_predictions( - predictions, [self.num_classes, 4, 1] - ) - anchors = self.anchor_generator(features) - gt_labels, gt_boxes = self.label_anchors(anchors, gt_instances) - return self.losses( - anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes, pred_centerness - ) - - @torch.no_grad() - def _match_anchors(self, gt_boxes: Boxes, anchors: List[Boxes]): - """ - Match ground-truth boxes to a set of multi-level anchors. - - Args: - gt_boxes: Ground-truth boxes from instances of an image. - anchors: List of anchors for each feature map (of different scales). - - Returns: - torch.Tensor - A tensor of shape `(M, R)`, given `M` ground-truth boxes and total - `R` anchor points from all feature levels, indicating the quality - of match between m-th box and r-th anchor. Higher value indicates - better match. - """ - # Naming convention: (M = ground-truth boxes, R = anchor points) - # Anchor points are represented as square boxes of size = stride. - num_anchors_per_level = [len(x) for x in anchors] - anchors = Boxes.cat(anchors) # (R, 4) - anchor_centers = anchors.get_centers() # (R, 2) - anchor_sizes = anchors.tensor[:, 2] - anchors.tensor[:, 0] # (R, ) - - lower_bound = anchor_sizes * 4 - lower_bound[: num_anchors_per_level[0]] = 0 - upper_bound = anchor_sizes * 8 - upper_bound[-num_anchors_per_level[-1] :] = float("inf") - - gt_centers = gt_boxes.get_centers() - - # FCOS with center sampling: anchor point must be close enough to - # ground-truth box center. - center_dists = (anchor_centers[None, :, :] - gt_centers[:, None, :]).abs_() - sampling_regions = self.center_sampling_radius * anchor_sizes[None, :] - - match_quality_matrix = center_dists.max(dim=2).values < sampling_regions - - pairwise_dist = pairwise_point_box_distance(anchor_centers, gt_boxes) - pairwise_dist = pairwise_dist.permute(1, 0, 2) # (M, R, 4) - - # The original FCOS anchor matching rule: anchor point must be inside GT. - match_quality_matrix &= pairwise_dist.min(dim=2).values > 0 - - # Multilevel anchor matching in FCOS: each anchor is only responsible - # for certain scale range. - pairwise_dist = pairwise_dist.max(dim=2).values - match_quality_matrix &= (pairwise_dist > lower_bound[None, :]) & ( - pairwise_dist < upper_bound[None, :] - ) - # Match the GT box with minimum area, if there are multiple GT matches. - gt_areas = gt_boxes.area() # (M, ) - - match_quality_matrix = match_quality_matrix.to(torch.float32) - match_quality_matrix *= 1e8 - gt_areas[:, None] - return match_quality_matrix # (M, R) - - @torch.no_grad() - def label_anchors(self, anchors: List[Boxes], gt_instances: List[Instances]): - """ - Same interface as :meth:`RetinaNet.label_anchors`, but implemented with FCOS - anchor matching rule. - - Unlike RetinaNet, there are no ignored anchors. - """ - - gt_labels, matched_gt_boxes = [], [] - - for inst in gt_instances: - if len(inst) > 0: - match_quality_matrix = self._match_anchors(inst.gt_boxes, anchors) - - # Find matched ground-truth box per anchor. Un-matched anchors are - # assigned -1. This is equivalent to using an anchor matcher as used - # in R-CNN/RetinaNet: `Matcher(thresholds=[1e-5], labels=[0, 1])` - match_quality, matched_idxs = match_quality_matrix.max(dim=0) - matched_idxs[match_quality < 1e-5] = -1 - - matched_gt_boxes_i = inst.gt_boxes.tensor[matched_idxs.clip(min=0)] - gt_labels_i = inst.gt_classes[matched_idxs.clip(min=0)] - - # Anchors with matched_idxs = -1 are labeled background. - gt_labels_i[matched_idxs < 0] = self.num_classes - else: - matched_gt_boxes_i = torch.zeros_like(Boxes.cat(anchors).tensor) - gt_labels_i = torch.full( - (len(matched_gt_boxes_i),), - fill_value=self.num_classes, - dtype=torch.long, - device=matched_gt_boxes_i.device, - ) - - gt_labels.append(gt_labels_i) - matched_gt_boxes.append(matched_gt_boxes_i) - - return gt_labels, matched_gt_boxes - - def losses( - self, anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes, pred_centerness - ): - """ - This method is almost identical to :meth:`RetinaNet.losses`, with an extra - "loss_centerness" in the returned dict. - """ - num_images = len(gt_labels) - gt_labels = torch.stack(gt_labels) # (M, R) - - pos_mask = (gt_labels >= 0) & (gt_labels != self.num_classes) - num_pos_anchors = pos_mask.sum().item() - get_event_storage().put_scalar("num_pos_anchors", num_pos_anchors / num_images) - normalizer = self._ema_update("loss_normalizer", max(num_pos_anchors, 1), 300) - - # classification and regression loss - gt_labels_target = F.one_hot(gt_labels, num_classes=self.num_classes + 1)[ - :, :, :-1 - ] # no loss for the last (background) class - loss_cls = sigmoid_focal_loss_jit( - torch.cat(pred_logits, dim=1), - gt_labels_target.to(pred_logits[0].dtype), - alpha=self.focal_loss_alpha, - gamma=self.focal_loss_gamma, - reduction="sum", - ) - - loss_box_reg = _dense_box_regression_loss( - anchors, - self.box2box_transform, - pred_anchor_deltas, - gt_boxes, - pos_mask, - box_reg_loss_type="giou", - ) - - ctrness_targets = self.compute_ctrness_targets(anchors, gt_boxes) # (M, R) - pred_centerness = torch.cat(pred_centerness, dim=1).squeeze(dim=2) # (M, R) - ctrness_loss = F.binary_cross_entropy_with_logits( - pred_centerness[pos_mask], ctrness_targets[pos_mask], reduction="sum" - ) - return { - "loss_fcos_cls": loss_cls / normalizer, - "loss_fcos_loc": loss_box_reg / normalizer, - "loss_fcos_ctr": ctrness_loss / normalizer, - } - - def compute_ctrness_targets(self, anchors: List[Boxes], gt_boxes: List[torch.Tensor]): - anchors = Boxes.cat(anchors).tensor # Rx4 - reg_targets = [self.box2box_transform.get_deltas(anchors, m) for m in gt_boxes] - reg_targets = torch.stack(reg_targets, dim=0) # NxRx4 - if len(reg_targets) == 0: - return reg_targets.new_zeros(len(reg_targets)) - left_right = reg_targets[:, :, [0, 2]] - top_bottom = reg_targets[:, :, [1, 3]] - ctrness = (left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * ( - top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0] - ) - return torch.sqrt(ctrness) - - def forward_inference( - self, - images: ImageList, - features: List[torch.Tensor], - predictions: List[List[torch.Tensor]], - ): - pred_logits, pred_anchor_deltas, pred_centerness = self._transpose_dense_predictions( - predictions, [self.num_classes, 4, 1] - ) - anchors = self.anchor_generator(features) - - results: List[Instances] = [] - for img_idx, image_size in enumerate(images.image_sizes): - scores_per_image = [ - # Multiply and sqrt centerness & classification scores - # (See eqn. 4 in https://arxiv.org/abs/2006.09214) - torch.sqrt(x[img_idx].sigmoid_() * y[img_idx].sigmoid_()) - for x, y in zip(pred_logits, pred_centerness) - ] - deltas_per_image = [x[img_idx] for x in pred_anchor_deltas] - results_per_image = self.inference_single_image( - anchors, scores_per_image, deltas_per_image, image_size - ) - results.append(results_per_image) - return results - - def inference_single_image( - self, - anchors: List[Boxes], - box_cls: List[torch.Tensor], - box_delta: List[torch.Tensor], - image_size: Tuple[int, int], - ): - """ - Identical to :meth:`RetinaNet.inference_single_image. - """ - pred = self._decode_multi_level_predictions( - anchors, - box_cls, - box_delta, - self.test_score_thresh, - self.test_topk_candidates, - image_size, - ) - keep = batched_nms( - pred.pred_boxes.tensor, pred.scores, pred.pred_classes, self.test_nms_thresh - ) - return pred[keep[: self.max_detections_per_image]] - - -class FCOSHead(RetinaNetHead): - """ - The head used in :paper:`fcos`. It adds an additional centerness - prediction branch on top of :class:`RetinaNetHead`. - """ - - def __init__(self, *, input_shape: List[ShapeSpec], conv_dims: List[int], **kwargs): - super().__init__(input_shape=input_shape, conv_dims=conv_dims, num_anchors=1, **kwargs) - # Unlike original FCOS, we do not add an additional learnable scale layer - # because it's found to have no benefits after normalizing regression targets by stride. - self._num_features = len(input_shape) - self.ctrness = nn.Conv2d(conv_dims[-1], 1, kernel_size=3, stride=1, padding=1) - torch.nn.init.normal_(self.ctrness.weight, std=0.01) - torch.nn.init.constant_(self.ctrness.bias, 0) - - def forward(self, features): - assert len(features) == self._num_features - logits = [] - bbox_reg = [] - ctrness = [] - for feature in features: - logits.append(self.cls_score(self.cls_subnet(feature))) - bbox_feature = self.bbox_subnet(feature) - bbox_reg.append(self.bbox_pred(bbox_feature)) - ctrness.append(self.ctrness(bbox_feature)) - return logits, bbox_reg, ctrness diff --git a/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py b/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py deleted file mode 100644 index b31e1c8dc06913d413ae829426e0625fdd5c2f38..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py +++ /dev/null @@ -1,269 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import logging -from typing import Dict, List -import torch -from torch import nn - -from detectron2.config import configurable -from detectron2.structures import ImageList - -from ..postprocessing import detector_postprocess, sem_seg_postprocess -from .build import META_ARCH_REGISTRY -from .rcnn import GeneralizedRCNN -from .semantic_seg import build_sem_seg_head - -__all__ = ["PanopticFPN"] - - -@META_ARCH_REGISTRY.register() -class PanopticFPN(GeneralizedRCNN): - """ - Implement the paper :paper:`PanopticFPN`. - """ - - @configurable - def __init__( - self, - *, - sem_seg_head: nn.Module, - combine_overlap_thresh: float = 0.5, - combine_stuff_area_thresh: float = 4096, - combine_instances_score_thresh: float = 0.5, - **kwargs, - ): - """ - NOTE: this interface is experimental. - - Args: - sem_seg_head: a module for the semantic segmentation head. - combine_overlap_thresh: combine masks into one instances if - they have enough overlap - combine_stuff_area_thresh: ignore stuff areas smaller than this threshold - combine_instances_score_thresh: ignore instances whose score is - smaller than this threshold - - Other arguments are the same as :class:`GeneralizedRCNN`. - """ - super().__init__(**kwargs) - self.sem_seg_head = sem_seg_head - # options when combining instance & semantic outputs - self.combine_overlap_thresh = combine_overlap_thresh - self.combine_stuff_area_thresh = combine_stuff_area_thresh - self.combine_instances_score_thresh = combine_instances_score_thresh - - @classmethod - def from_config(cls, cfg): - ret = super().from_config(cfg) - ret.update( - { - "combine_overlap_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH, - "combine_stuff_area_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT, - "combine_instances_score_thresh": cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH, # noqa - } - ) - ret["sem_seg_head"] = build_sem_seg_head(cfg, ret["backbone"].output_shape()) - logger = logging.getLogger(__name__) - if not cfg.MODEL.PANOPTIC_FPN.COMBINE.ENABLED: - logger.warning( - "PANOPTIC_FPN.COMBINED.ENABLED is no longer used. " - " model.inference(do_postprocess=) should be used to toggle postprocessing." - ) - if cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT != 1.0: - w = cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT - logger.warning( - "PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT should be replaced by weights on each ROI head." - ) - - def update_weight(x): - if isinstance(x, dict): - return {k: v * w for k, v in x.items()} - else: - return x * w - - roi_heads = ret["roi_heads"] - roi_heads.box_predictor.loss_weight = update_weight(roi_heads.box_predictor.loss_weight) - roi_heads.mask_head.loss_weight = update_weight(roi_heads.mask_head.loss_weight) - return ret - - def forward(self, batched_inputs): - """ - Args: - batched_inputs: a list, batched outputs of :class:`DatasetMapper`. - Each item in the list contains the inputs for one image. - - For now, each item in the list is a dict that contains: - - * "image": Tensor, image in (C, H, W) format. - * "instances": Instances - * "sem_seg": semantic segmentation ground truth. - * Other information that's included in the original dicts, such as: - "height", "width" (int): the output resolution of the model, used in inference. - See :meth:`postprocess` for details. - - Returns: - list[dict]: - each dict has the results for one image. The dict contains the following keys: - - * "instances": see :meth:`GeneralizedRCNN.forward` for its format. - * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format. - * "panoptic_seg": See the return value of - :func:`combine_semantic_and_instance_outputs` for its format. - """ - if not self.training: - return self.inference(batched_inputs) - images = self.preprocess_image(batched_inputs) - features = self.backbone(images.tensor) - - assert "sem_seg" in batched_inputs[0] - gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs] - gt_sem_seg = ImageList.from_tensors( - gt_sem_seg, - self.backbone.size_divisibility, - self.sem_seg_head.ignore_value, - self.backbone.padding_constraints, - ).tensor - sem_seg_results, sem_seg_losses = self.sem_seg_head(features, gt_sem_seg) - - gt_instances = [x["instances"].to(self.device) for x in batched_inputs] - proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) - detector_results, detector_losses = self.roi_heads( - images, features, proposals, gt_instances - ) - - losses = sem_seg_losses - losses.update(proposal_losses) - losses.update(detector_losses) - return losses - - def inference(self, batched_inputs: List[Dict[str, torch.Tensor]], do_postprocess: bool = True): - """ - Run inference on the given inputs. - - Args: - batched_inputs (list[dict]): same as in :meth:`forward` - do_postprocess (bool): whether to apply post-processing on the outputs. - - Returns: - When do_postprocess=True, see docs in :meth:`forward`. - Otherwise, returns a (list[Instances], list[Tensor]) that contains - the raw detector outputs, and raw semantic segmentation outputs. - """ - images = self.preprocess_image(batched_inputs) - features = self.backbone(images.tensor) - sem_seg_results, sem_seg_losses = self.sem_seg_head(features, None) - proposals, _ = self.proposal_generator(images, features, None) - detector_results, _ = self.roi_heads(images, features, proposals, None) - - if do_postprocess: - processed_results = [] - for sem_seg_result, detector_result, input_per_image, image_size in zip( - sem_seg_results, detector_results, batched_inputs, images.image_sizes - ): - height = input_per_image.get("height", image_size[0]) - width = input_per_image.get("width", image_size[1]) - sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width) - detector_r = detector_postprocess(detector_result, height, width) - - processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r}) - - panoptic_r = combine_semantic_and_instance_outputs( - detector_r, - sem_seg_r.argmax(dim=0), - self.combine_overlap_thresh, - self.combine_stuff_area_thresh, - self.combine_instances_score_thresh, - ) - processed_results[-1]["panoptic_seg"] = panoptic_r - return processed_results - else: - return detector_results, sem_seg_results - - -def combine_semantic_and_instance_outputs( - instance_results, - semantic_results, - overlap_threshold, - stuff_area_thresh, - instances_score_thresh, -): - """ - Implement a simple combining logic following - "combine_semantic_and_instance_predictions.py" in panopticapi - to produce panoptic segmentation outputs. - - Args: - instance_results: output of :func:`detector_postprocess`. - semantic_results: an (H, W) tensor, each element is the contiguous semantic - category id - - Returns: - panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment. - segments_info (list[dict]): Describe each segment in `panoptic_seg`. - Each dict contains keys "id", "category_id", "isthing". - """ - panoptic_seg = torch.zeros_like(semantic_results, dtype=torch.int32) - - # sort instance outputs by scores - sorted_inds = torch.argsort(-instance_results.scores) - - current_segment_id = 0 - segments_info = [] - - instance_masks = instance_results.pred_masks.to(dtype=torch.bool, device=panoptic_seg.device) - - # Add instances one-by-one, check for overlaps with existing ones - for inst_id in sorted_inds: - score = instance_results.scores[inst_id].item() - if score < instances_score_thresh: - break - mask = instance_masks[inst_id] # H,W - mask_area = mask.sum().item() - - if mask_area == 0: - continue - - intersect = (mask > 0) & (panoptic_seg > 0) - intersect_area = intersect.sum().item() - - if intersect_area * 1.0 / mask_area > overlap_threshold: - continue - - if intersect_area > 0: - mask = mask & (panoptic_seg == 0) - - current_segment_id += 1 - panoptic_seg[mask] = current_segment_id - segments_info.append( - { - "id": current_segment_id, - "isthing": True, - "score": score, - "category_id": instance_results.pred_classes[inst_id].item(), - "instance_id": inst_id.item(), - } - ) - - # Add semantic results to remaining empty areas - semantic_labels = torch.unique(semantic_results).cpu().tolist() - for semantic_label in semantic_labels: - if semantic_label == 0: # 0 is a special "thing" class - continue - mask = (semantic_results == semantic_label) & (panoptic_seg == 0) - mask_area = mask.sum().item() - if mask_area < stuff_area_thresh: - continue - - current_segment_id += 1 - panoptic_seg[mask] = current_segment_id - segments_info.append( - { - "id": current_segment_id, - "isthing": False, - "category_id": semantic_label, - "area": mask_area, - } - ) - - return panoptic_seg, segments_info diff --git a/detectron2/detectron2/modeling/meta_arch/rcnn.py b/detectron2/detectron2/modeling/meta_arch/rcnn.py deleted file mode 100644 index edcbda553a619c314d6175638b485ee5c791a176..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/meta_arch/rcnn.py +++ /dev/null @@ -1,341 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import numpy as np -from typing import Dict, List, Optional, Tuple -import torch -from torch import nn - -from detectron2.config import configurable -from detectron2.data.detection_utils import convert_image_to_rgb -from detectron2.layers import move_device_like -from detectron2.structures import ImageList, Instances -from detectron2.utils.events import get_event_storage -from detectron2.utils.logger import log_first_n - -from ..backbone import Backbone, build_backbone -from ..postprocessing import detector_postprocess -from ..proposal_generator import build_proposal_generator -from ..roi_heads import build_roi_heads -from .build import META_ARCH_REGISTRY - -__all__ = ["GeneralizedRCNN", "ProposalNetwork"] - - -@META_ARCH_REGISTRY.register() -class GeneralizedRCNN(nn.Module): - """ - Generalized R-CNN. Any models that contains the following three components: - 1. Per-image feature extraction (aka backbone) - 2. Region proposal generation - 3. Per-region feature extraction and prediction - """ - - @configurable - def __init__( - self, - *, - backbone: Backbone, - proposal_generator: nn.Module, - roi_heads: nn.Module, - pixel_mean: Tuple[float], - pixel_std: Tuple[float], - input_format: Optional[str] = None, - vis_period: int = 0, - ): - """ - Args: - backbone: a backbone module, must follow detectron2's backbone interface - proposal_generator: a module that generates proposals using backbone features - roi_heads: a ROI head that performs per-region computation - pixel_mean, pixel_std: list or tuple with #channels element, representing - the per-channel mean and std to be used to normalize the input image - input_format: describe the meaning of channels of input. Needed by visualization - vis_period: the period to run visualization. Set to 0 to disable. - """ - super().__init__() - self.backbone = backbone - self.proposal_generator = proposal_generator - self.roi_heads = roi_heads - - self.input_format = input_format - self.vis_period = vis_period - if vis_period > 0: - assert input_format is not None, "input_format is required for visualization!" - - self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) - self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) - assert ( - self.pixel_mean.shape == self.pixel_std.shape - ), f"{self.pixel_mean} and {self.pixel_std} have different shapes!" - - @classmethod - def from_config(cls, cfg): - backbone = build_backbone(cfg) - return { - "backbone": backbone, - "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()), - "roi_heads": build_roi_heads(cfg, backbone.output_shape()), - "input_format": cfg.INPUT.FORMAT, - "vis_period": cfg.VIS_PERIOD, - "pixel_mean": cfg.MODEL.PIXEL_MEAN, - "pixel_std": cfg.MODEL.PIXEL_STD, - } - - @property - def device(self): - return self.pixel_mean.device - - def _move_to_current_device(self, x): - return move_device_like(x, self.pixel_mean) - - def visualize_training(self, batched_inputs, proposals): - """ - A function used to visualize images and proposals. It shows ground truth - bounding boxes on the original image and up to 20 top-scoring predicted - object proposals on the original image. Users can implement different - visualization functions for different models. - - Args: - batched_inputs (list): a list that contains input to the model. - proposals (list): a list that contains predicted proposals. Both - batched_inputs and proposals should have the same length. - """ - from detectron2.utils.visualizer import Visualizer - - storage = get_event_storage() - max_vis_prop = 20 - - for input, prop in zip(batched_inputs, proposals): - img = input["image"] - img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format) - v_gt = Visualizer(img, None) - v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes) - anno_img = v_gt.get_image() - box_size = min(len(prop.proposal_boxes), max_vis_prop) - v_pred = Visualizer(img, None) - v_pred = v_pred.overlay_instances( - boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy() - ) - prop_img = v_pred.get_image() - vis_img = np.concatenate((anno_img, prop_img), axis=1) - vis_img = vis_img.transpose(2, 0, 1) - vis_name = "Left: GT bounding boxes; Right: Predicted proposals" - storage.put_image(vis_name, vis_img) - break # only visualize one image in a batch - - def forward(self, batched_inputs: List[Dict[str, torch.Tensor]]): - """ - Args: - batched_inputs: a list, batched outputs of :class:`DatasetMapper` . - Each item in the list contains the inputs for one image. - For now, each item in the list is a dict that contains: - - * image: Tensor, image in (C, H, W) format. - * instances (optional): groundtruth :class:`Instances` - * proposals (optional): :class:`Instances`, precomputed proposals. - - Other information that's included in the original dicts, such as: - - * "height", "width" (int): the output resolution of the model, used in inference. - See :meth:`postprocess` for details. - - Returns: - list[dict]: - Each dict is the output for one input image. - The dict contains one key "instances" whose value is a :class:`Instances`. - The :class:`Instances` object has the following keys: - "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints" - """ - if not self.training: - return self.inference(batched_inputs) - - images = self.preprocess_image(batched_inputs) - if "instances" in batched_inputs[0]: - gt_instances = [x["instances"].to(self.device) for x in batched_inputs] - else: - gt_instances = None - - features = self.backbone(images.tensor) - - if self.proposal_generator is not None: - proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) - else: - assert "proposals" in batched_inputs[0] - proposals = [x["proposals"].to(self.device) for x in batched_inputs] - proposal_losses = {} - - _, detector_losses = self.roi_heads(images, features, proposals, gt_instances) - if self.vis_period > 0: - storage = get_event_storage() - if storage.iter % self.vis_period == 0: - self.visualize_training(batched_inputs, proposals) - - losses = {} - losses.update(detector_losses) - losses.update(proposal_losses) - return losses - - def inference( - self, - batched_inputs: List[Dict[str, torch.Tensor]], - detected_instances: Optional[List[Instances]] = None, - do_postprocess: bool = True, - ): - """ - Run inference on the given inputs. - - Args: - batched_inputs (list[dict]): same as in :meth:`forward` - detected_instances (None or list[Instances]): if not None, it - contains an `Instances` object per image. The `Instances` - object contains "pred_boxes" and "pred_classes" which are - known boxes in the image. - The inference will then skip the detection of bounding boxes, - and only predict other per-ROI outputs. - do_postprocess (bool): whether to apply post-processing on the outputs. - - Returns: - When do_postprocess=True, same as in :meth:`forward`. - Otherwise, a list[Instances] containing raw network outputs. - """ - assert not self.training - - images = self.preprocess_image(batched_inputs) - features = self.backbone(images.tensor) - - if detected_instances is None: - if self.proposal_generator is not None: - proposals, _ = self.proposal_generator(images, features, None) - else: - assert "proposals" in batched_inputs[0] - proposals = [x["proposals"].to(self.device) for x in batched_inputs] - - results, _ = self.roi_heads(images, features, proposals, None) - else: - detected_instances = [x.to(self.device) for x in detected_instances] - results = self.roi_heads.forward_with_given_boxes(features, detected_instances) - - if do_postprocess: - assert not torch.jit.is_scripting(), "Scripting is not supported for postprocess." - return GeneralizedRCNN._postprocess(results, batched_inputs, images.image_sizes) - return results - - def preprocess_image(self, batched_inputs: List[Dict[str, torch.Tensor]]): - """ - Normalize, pad and batch the input images. - """ - images = [self._move_to_current_device(x["image"]) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors( - images, - self.backbone.size_divisibility, - padding_constraints=self.backbone.padding_constraints, - ) - return images - - @staticmethod - def _postprocess(instances, batched_inputs: List[Dict[str, torch.Tensor]], image_sizes): - """ - Rescale the output instances to the target size. - """ - # note: private function; subject to changes - processed_results = [] - for results_per_image, input_per_image, image_size in zip( - instances, batched_inputs, image_sizes - ): - height = input_per_image.get("height", image_size[0]) - width = input_per_image.get("width", image_size[1]) - r = detector_postprocess(results_per_image, height, width) - processed_results.append({"instances": r}) - return processed_results - - -@META_ARCH_REGISTRY.register() -class ProposalNetwork(nn.Module): - """ - A meta architecture that only predicts object proposals. - """ - - @configurable - def __init__( - self, - *, - backbone: Backbone, - proposal_generator: nn.Module, - pixel_mean: Tuple[float], - pixel_std: Tuple[float], - ): - """ - Args: - backbone: a backbone module, must follow detectron2's backbone interface - proposal_generator: a module that generates proposals using backbone features - pixel_mean, pixel_std: list or tuple with #channels element, representing - the per-channel mean and std to be used to normalize the input image - """ - super().__init__() - self.backbone = backbone - self.proposal_generator = proposal_generator - self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) - self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) - - @classmethod - def from_config(cls, cfg): - backbone = build_backbone(cfg) - return { - "backbone": backbone, - "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()), - "pixel_mean": cfg.MODEL.PIXEL_MEAN, - "pixel_std": cfg.MODEL.PIXEL_STD, - } - - @property - def device(self): - return self.pixel_mean.device - - def _move_to_current_device(self, x): - return move_device_like(x, self.pixel_mean) - - def forward(self, batched_inputs): - """ - Args: - Same as in :class:`GeneralizedRCNN.forward` - - Returns: - list[dict]: - Each dict is the output for one input image. - The dict contains one key "proposals" whose value is a - :class:`Instances` with keys "proposal_boxes" and "objectness_logits". - """ - images = [self._move_to_current_device(x["image"]) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors( - images, - self.backbone.size_divisibility, - padding_constraints=self.backbone.padding_constraints, - ) - features = self.backbone(images.tensor) - - if "instances" in batched_inputs[0]: - gt_instances = [x["instances"].to(self.device) for x in batched_inputs] - elif "targets" in batched_inputs[0]: - log_first_n( - logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 - ) - gt_instances = [x["targets"].to(self.device) for x in batched_inputs] - else: - gt_instances = None - proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) - # In training, the proposals are not useful at all but we generate them anyway. - # This makes RPN-only models about 5% slower. - if self.training: - return proposal_losses - - processed_results = [] - for results_per_image, input_per_image, image_size in zip( - proposals, batched_inputs, images.image_sizes - ): - height = input_per_image.get("height", image_size[0]) - width = input_per_image.get("width", image_size[1]) - r = detector_postprocess(results_per_image, height, width) - processed_results.append({"proposals": r}) - return processed_results diff --git a/detectron2/detectron2/modeling/meta_arch/retinanet.py b/detectron2/detectron2/modeling/meta_arch/retinanet.py deleted file mode 100644 index bd72a8e7fb57bebcdca64c7bc43b8f0f03118bed..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/meta_arch/retinanet.py +++ /dev/null @@ -1,439 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import math -from typing import List, Tuple -import torch -from fvcore.nn import sigmoid_focal_loss_jit -from torch import Tensor, nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.layers import CycleBatchNormList, ShapeSpec, batched_nms, cat, get_norm -from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou -from detectron2.utils.events import get_event_storage - -from ..anchor_generator import build_anchor_generator -from ..backbone import Backbone, build_backbone -from ..box_regression import Box2BoxTransform, _dense_box_regression_loss -from ..matcher import Matcher -from .build import META_ARCH_REGISTRY -from .dense_detector import DenseDetector, permute_to_N_HWA_K # noqa - -__all__ = ["RetinaNet"] - - -logger = logging.getLogger(__name__) - - -@META_ARCH_REGISTRY.register() -class RetinaNet(DenseDetector): - """ - Implement RetinaNet in :paper:`RetinaNet`. - """ - - @configurable - def __init__( - self, - *, - backbone: Backbone, - head: nn.Module, - head_in_features, - anchor_generator, - box2box_transform, - anchor_matcher, - num_classes, - focal_loss_alpha=0.25, - focal_loss_gamma=2.0, - smooth_l1_beta=0.0, - box_reg_loss_type="smooth_l1", - test_score_thresh=0.05, - test_topk_candidates=1000, - test_nms_thresh=0.5, - max_detections_per_image=100, - pixel_mean, - pixel_std, - vis_period=0, - input_format="BGR", - ): - """ - NOTE: this interface is experimental. - - Args: - backbone: a backbone module, must follow detectron2's backbone interface - head (nn.Module): a module that predicts logits and regression deltas - for each level from a list of per-level features - head_in_features (Tuple[str]): Names of the input feature maps to be used in head - anchor_generator (nn.Module): a module that creates anchors from a - list of features. Usually an instance of :class:`AnchorGenerator` - box2box_transform (Box2BoxTransform): defines the transform from anchors boxes to - instance boxes - anchor_matcher (Matcher): label the anchors by matching them with ground truth. - num_classes (int): number of classes. Used to label background proposals. - - # Loss parameters: - focal_loss_alpha (float): focal_loss_alpha - focal_loss_gamma (float): focal_loss_gamma - smooth_l1_beta (float): smooth_l1_beta - box_reg_loss_type (str): Options are "smooth_l1", "giou", "diou", "ciou" - - # Inference parameters: - test_score_thresh (float): Inference cls score threshold, only anchors with - score > INFERENCE_TH are considered for inference (to improve speed) - test_topk_candidates (int): Select topk candidates before NMS - test_nms_thresh (float): Overlap threshold used for non-maximum suppression - (suppress boxes with IoU >= this threshold) - max_detections_per_image (int): - Maximum number of detections to return per image during inference - (100 is based on the limit established for the COCO dataset). - - pixel_mean, pixel_std: see :class:`DenseDetector`. - """ - super().__init__( - backbone, head, head_in_features, pixel_mean=pixel_mean, pixel_std=pixel_std - ) - self.num_classes = num_classes - - # Anchors - self.anchor_generator = anchor_generator - self.box2box_transform = box2box_transform - self.anchor_matcher = anchor_matcher - - # Loss parameters: - self.focal_loss_alpha = focal_loss_alpha - self.focal_loss_gamma = focal_loss_gamma - self.smooth_l1_beta = smooth_l1_beta - self.box_reg_loss_type = box_reg_loss_type - # Inference parameters: - self.test_score_thresh = test_score_thresh - self.test_topk_candidates = test_topk_candidates - self.test_nms_thresh = test_nms_thresh - self.max_detections_per_image = max_detections_per_image - # Vis parameters - self.vis_period = vis_period - self.input_format = input_format - - @classmethod - def from_config(cls, cfg): - backbone = build_backbone(cfg) - backbone_shape = backbone.output_shape() - feature_shapes = [backbone_shape[f] for f in cfg.MODEL.RETINANET.IN_FEATURES] - head = RetinaNetHead(cfg, feature_shapes) - anchor_generator = build_anchor_generator(cfg, feature_shapes) - return { - "backbone": backbone, - "head": head, - "anchor_generator": anchor_generator, - "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS), - "anchor_matcher": Matcher( - cfg.MODEL.RETINANET.IOU_THRESHOLDS, - cfg.MODEL.RETINANET.IOU_LABELS, - allow_low_quality_matches=True, - ), - "pixel_mean": cfg.MODEL.PIXEL_MEAN, - "pixel_std": cfg.MODEL.PIXEL_STD, - "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES, - "head_in_features": cfg.MODEL.RETINANET.IN_FEATURES, - # Loss parameters: - "focal_loss_alpha": cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA, - "focal_loss_gamma": cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA, - "smooth_l1_beta": cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA, - "box_reg_loss_type": cfg.MODEL.RETINANET.BBOX_REG_LOSS_TYPE, - # Inference parameters: - "test_score_thresh": cfg.MODEL.RETINANET.SCORE_THRESH_TEST, - "test_topk_candidates": cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST, - "test_nms_thresh": cfg.MODEL.RETINANET.NMS_THRESH_TEST, - "max_detections_per_image": cfg.TEST.DETECTIONS_PER_IMAGE, - # Vis parameters - "vis_period": cfg.VIS_PERIOD, - "input_format": cfg.INPUT.FORMAT, - } - - def forward_training(self, images, features, predictions, gt_instances): - # Transpose the Hi*Wi*A dimension to the middle: - pred_logits, pred_anchor_deltas = self._transpose_dense_predictions( - predictions, [self.num_classes, 4] - ) - anchors = self.anchor_generator(features) - gt_labels, gt_boxes = self.label_anchors(anchors, gt_instances) - return self.losses(anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes) - - def losses(self, anchors, pred_logits, gt_labels, pred_anchor_deltas, gt_boxes): - """ - Args: - anchors (list[Boxes]): a list of #feature level Boxes - gt_labels, gt_boxes: see output of :meth:`RetinaNet.label_anchors`. - Their shapes are (N, R) and (N, R, 4), respectively, where R is - the total number of anchors across levels, i.e. sum(Hi x Wi x Ai) - pred_logits, pred_anchor_deltas: both are list[Tensor]. Each element in the - list corresponds to one level and has shape (N, Hi * Wi * Ai, K or 4). - Where K is the number of classes used in `pred_logits`. - - Returns: - dict[str, Tensor]: - mapping from a named loss to a scalar tensor storing the loss. - Used during training only. The dict keys are: "loss_cls" and "loss_box_reg" - """ - num_images = len(gt_labels) - gt_labels = torch.stack(gt_labels) # (N, R) - - valid_mask = gt_labels >= 0 - pos_mask = (gt_labels >= 0) & (gt_labels != self.num_classes) - num_pos_anchors = pos_mask.sum().item() - get_event_storage().put_scalar("num_pos_anchors", num_pos_anchors / num_images) - normalizer = self._ema_update("loss_normalizer", max(num_pos_anchors, 1), 100) - - # classification and regression loss - gt_labels_target = F.one_hot(gt_labels[valid_mask], num_classes=self.num_classes + 1)[ - :, :-1 - ] # no loss for the last (background) class - loss_cls = sigmoid_focal_loss_jit( - cat(pred_logits, dim=1)[valid_mask], - gt_labels_target.to(pred_logits[0].dtype), - alpha=self.focal_loss_alpha, - gamma=self.focal_loss_gamma, - reduction="sum", - ) - - loss_box_reg = _dense_box_regression_loss( - anchors, - self.box2box_transform, - pred_anchor_deltas, - gt_boxes, - pos_mask, - box_reg_loss_type=self.box_reg_loss_type, - smooth_l1_beta=self.smooth_l1_beta, - ) - - return { - "loss_cls": loss_cls / normalizer, - "loss_box_reg": loss_box_reg / normalizer, - } - - @torch.no_grad() - def label_anchors(self, anchors, gt_instances): - """ - Args: - anchors (list[Boxes]): A list of #feature level Boxes. - The Boxes contains anchors of this image on the specific feature level. - gt_instances (list[Instances]): a list of N `Instances`s. The i-th - `Instances` contains the ground-truth per-instance annotations - for the i-th input image. - - Returns: - list[Tensor]: List of #img tensors. i-th element is a vector of labels whose length is - the total number of anchors across all feature maps (sum(Hi * Wi * A)). - Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background. - - list[Tensor]: i-th element is a Rx4 tensor, where R is the total number of anchors - across feature maps. The values are the matched gt boxes for each anchor. - Values are undefined for those anchors not labeled as foreground. - """ - anchors = Boxes.cat(anchors) # Rx4 - - gt_labels = [] - matched_gt_boxes = [] - for gt_per_image in gt_instances: - match_quality_matrix = pairwise_iou(gt_per_image.gt_boxes, anchors) - matched_idxs, anchor_labels = self.anchor_matcher(match_quality_matrix) - del match_quality_matrix - - if len(gt_per_image) > 0: - matched_gt_boxes_i = gt_per_image.gt_boxes.tensor[matched_idxs] - - gt_labels_i = gt_per_image.gt_classes[matched_idxs] - # Anchors with label 0 are treated as background. - gt_labels_i[anchor_labels == 0] = self.num_classes - # Anchors with label -1 are ignored. - gt_labels_i[anchor_labels == -1] = -1 - else: - matched_gt_boxes_i = torch.zeros_like(anchors.tensor) - gt_labels_i = torch.zeros_like(matched_idxs) + self.num_classes - - gt_labels.append(gt_labels_i) - matched_gt_boxes.append(matched_gt_boxes_i) - - return gt_labels, matched_gt_boxes - - def forward_inference( - self, images: ImageList, features: List[Tensor], predictions: List[List[Tensor]] - ): - pred_logits, pred_anchor_deltas = self._transpose_dense_predictions( - predictions, [self.num_classes, 4] - ) - anchors = self.anchor_generator(features) - - results: List[Instances] = [] - for img_idx, image_size in enumerate(images.image_sizes): - scores_per_image = [x[img_idx].sigmoid_() for x in pred_logits] - deltas_per_image = [x[img_idx] for x in pred_anchor_deltas] - results_per_image = self.inference_single_image( - anchors, scores_per_image, deltas_per_image, image_size - ) - results.append(results_per_image) - return results - - def inference_single_image( - self, - anchors: List[Boxes], - box_cls: List[Tensor], - box_delta: List[Tensor], - image_size: Tuple[int, int], - ): - """ - Single-image inference. Return bounding-box detection results by thresholding - on scores and applying non-maximum suppression (NMS). - - Arguments: - anchors (list[Boxes]): list of #feature levels. Each entry contains - a Boxes object, which contains all the anchors in that feature level. - box_cls (list[Tensor]): list of #feature levels. Each entry contains - tensor of size (H x W x A, K) - box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4. - image_size (tuple(H, W)): a tuple of the image height and width. - - Returns: - Same as `inference`, but for only one image. - """ - pred = self._decode_multi_level_predictions( - anchors, - box_cls, - box_delta, - self.test_score_thresh, - self.test_topk_candidates, - image_size, - ) - keep = batched_nms( # per-class NMS - pred.pred_boxes.tensor, pred.scores, pred.pred_classes, self.test_nms_thresh - ) - return pred[keep[: self.max_detections_per_image]] - - -class RetinaNetHead(nn.Module): - """ - The head used in RetinaNet for object classification and box regression. - It has two subnets for the two tasks, with a common structure but separate parameters. - """ - - @configurable - def __init__( - self, - *, - input_shape: List[ShapeSpec], - num_classes, - num_anchors, - conv_dims: List[int], - norm="", - prior_prob=0.01, - ): - """ - NOTE: this interface is experimental. - - Args: - input_shape (List[ShapeSpec]): input shape - num_classes (int): number of classes. Used to label background proposals. - num_anchors (int): number of generated anchors - conv_dims (List[int]): dimensions for each convolution layer - norm (str or callable): - Normalization for conv layers except for the two output layers. - See :func:`detectron2.layers.get_norm` for supported types. - prior_prob (float): Prior weight for computing bias - """ - super().__init__() - - self._num_features = len(input_shape) - if norm == "BN" or norm == "SyncBN": - logger.info( - f"Using domain-specific {norm} in RetinaNetHead with len={self._num_features}." - ) - bn_class = nn.BatchNorm2d if norm == "BN" else nn.SyncBatchNorm - - def norm(c): - return CycleBatchNormList( - length=self._num_features, bn_class=bn_class, num_features=c - ) - - else: - norm_name = str(type(get_norm(norm, 32))) - if "BN" in norm_name: - logger.warning( - f"Shared BatchNorm (type={norm_name}) may not work well in RetinaNetHead." - ) - - cls_subnet = [] - bbox_subnet = [] - for in_channels, out_channels in zip( - [input_shape[0].channels] + list(conv_dims), conv_dims - ): - cls_subnet.append( - nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) - ) - if norm: - cls_subnet.append(get_norm(norm, out_channels)) - cls_subnet.append(nn.ReLU()) - bbox_subnet.append( - nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) - ) - if norm: - bbox_subnet.append(get_norm(norm, out_channels)) - bbox_subnet.append(nn.ReLU()) - - self.cls_subnet = nn.Sequential(*cls_subnet) - self.bbox_subnet = nn.Sequential(*bbox_subnet) - self.cls_score = nn.Conv2d( - conv_dims[-1], num_anchors * num_classes, kernel_size=3, stride=1, padding=1 - ) - self.bbox_pred = nn.Conv2d( - conv_dims[-1], num_anchors * 4, kernel_size=3, stride=1, padding=1 - ) - - # Initialization - for modules in [self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred]: - for layer in modules.modules(): - if isinstance(layer, nn.Conv2d): - torch.nn.init.normal_(layer.weight, mean=0, std=0.01) - torch.nn.init.constant_(layer.bias, 0) - - # Use prior in model initialization to improve stability - bias_value = -(math.log((1 - prior_prob) / prior_prob)) - torch.nn.init.constant_(self.cls_score.bias, bias_value) - - @classmethod - def from_config(cls, cfg, input_shape: List[ShapeSpec]): - num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors - assert ( - len(set(num_anchors)) == 1 - ), "Using different number of anchors between levels is not currently supported!" - num_anchors = num_anchors[0] - - return { - "input_shape": input_shape, - "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES, - "conv_dims": [input_shape[0].channels] * cfg.MODEL.RETINANET.NUM_CONVS, - "prior_prob": cfg.MODEL.RETINANET.PRIOR_PROB, - "norm": cfg.MODEL.RETINANET.NORM, - "num_anchors": num_anchors, - } - - def forward(self, features: List[Tensor]): - """ - Arguments: - features (list[Tensor]): FPN feature map tensors in high to low resolution. - Each tensor in the list correspond to different feature levels. - - Returns: - logits (list[Tensor]): #lvl tensors, each has shape (N, AxK, Hi, Wi). - The tensor predicts the classification probability - at each spatial position for each of the A anchors and K object - classes. - bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, Ax4, Hi, Wi). - The tensor predicts 4-vector (dx,dy,dw,dh) box - regression values for every anchor. These values are the - relative offset between the anchor and the ground truth box. - """ - assert len(features) == self._num_features - logits = [] - bbox_reg = [] - for feature in features: - logits.append(self.cls_score(self.cls_subnet(feature))) - bbox_reg.append(self.bbox_pred(self.bbox_subnet(feature))) - return logits, bbox_reg diff --git a/detectron2/detectron2/modeling/meta_arch/semantic_seg.py b/detectron2/detectron2/modeling/meta_arch/semantic_seg.py deleted file mode 100644 index fefbecfb4f9ca84c4cf62c246cdcbf946016f0e6..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/meta_arch/semantic_seg.py +++ /dev/null @@ -1,267 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -from typing import Callable, Dict, Optional, Tuple, Union -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.layers import Conv2d, ShapeSpec, get_norm -from detectron2.structures import ImageList -from detectron2.utils.registry import Registry - -from ..backbone import Backbone, build_backbone -from ..postprocessing import sem_seg_postprocess -from .build import META_ARCH_REGISTRY - -__all__ = [ - "SemanticSegmentor", - "SEM_SEG_HEADS_REGISTRY", - "SemSegFPNHead", - "build_sem_seg_head", -] - - -SEM_SEG_HEADS_REGISTRY = Registry("SEM_SEG_HEADS") -SEM_SEG_HEADS_REGISTRY.__doc__ = """ -Registry for semantic segmentation heads, which make semantic segmentation predictions -from feature maps. -""" - - -@META_ARCH_REGISTRY.register() -class SemanticSegmentor(nn.Module): - """ - Main class for semantic segmentation architectures. - """ - - @configurable - def __init__( - self, - *, - backbone: Backbone, - sem_seg_head: nn.Module, - pixel_mean: Tuple[float], - pixel_std: Tuple[float], - ): - """ - Args: - backbone: a backbone module, must follow detectron2's backbone interface - sem_seg_head: a module that predicts semantic segmentation from backbone features - pixel_mean, pixel_std: list or tuple with #channels element, representing - the per-channel mean and std to be used to normalize the input image - """ - super().__init__() - self.backbone = backbone - self.sem_seg_head = sem_seg_head - self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) - self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) - - @classmethod - def from_config(cls, cfg): - backbone = build_backbone(cfg) - sem_seg_head = build_sem_seg_head(cfg, backbone.output_shape()) - return { - "backbone": backbone, - "sem_seg_head": sem_seg_head, - "pixel_mean": cfg.MODEL.PIXEL_MEAN, - "pixel_std": cfg.MODEL.PIXEL_STD, - } - - @property - def device(self): - return self.pixel_mean.device - - def forward(self, batched_inputs): - """ - Args: - batched_inputs: a list, batched outputs of :class:`DatasetMapper`. - Each item in the list contains the inputs for one image. - - For now, each item in the list is a dict that contains: - - * "image": Tensor, image in (C, H, W) format. - * "sem_seg": semantic segmentation ground truth - * Other information that's included in the original dicts, such as: - "height", "width" (int): the output resolution of the model (may be different - from input resolution), used in inference. - - - Returns: - list[dict]: - Each dict is the output for one input image. - The dict contains one key "sem_seg" whose value is a - Tensor that represents the - per-pixel segmentation prediced by the head. - The prediction has shape KxHxW that represents the logits of - each class for each pixel. - """ - images = [x["image"].to(self.device) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors( - images, - self.backbone.size_divisibility, - padding_constraints=self.backbone.padding_constraints, - ) - - features = self.backbone(images.tensor) - - if "sem_seg" in batched_inputs[0]: - targets = [x["sem_seg"].to(self.device) for x in batched_inputs] - targets = ImageList.from_tensors( - targets, - self.backbone.size_divisibility, - self.sem_seg_head.ignore_value, - self.backbone.padding_constraints, - ).tensor - else: - targets = None - results, losses = self.sem_seg_head(features, targets) - - if self.training: - return losses - - processed_results = [] - for result, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes): - height = input_per_image.get("height", image_size[0]) - width = input_per_image.get("width", image_size[1]) - r = sem_seg_postprocess(result, image_size, height, width) - processed_results.append({"sem_seg": r}) - return processed_results - - -def build_sem_seg_head(cfg, input_shape): - """ - Build a semantic segmentation head from `cfg.MODEL.SEM_SEG_HEAD.NAME`. - """ - name = cfg.MODEL.SEM_SEG_HEAD.NAME - return SEM_SEG_HEADS_REGISTRY.get(name)(cfg, input_shape) - - -@SEM_SEG_HEADS_REGISTRY.register() -class SemSegFPNHead(nn.Module): - """ - A semantic segmentation head described in :paper:`PanopticFPN`. - It takes a list of FPN features as input, and applies a sequence of - 3x3 convs and upsampling to scale all of them to the stride defined by - ``common_stride``. Then these features are added and used to make final - predictions by another 1x1 conv layer. - """ - - @configurable - def __init__( - self, - input_shape: Dict[str, ShapeSpec], - *, - num_classes: int, - conv_dims: int, - common_stride: int, - loss_weight: float = 1.0, - norm: Optional[Union[str, Callable]] = None, - ignore_value: int = -1, - ): - """ - NOTE: this interface is experimental. - - Args: - input_shape: shapes (channels and stride) of the input features - num_classes: number of classes to predict - conv_dims: number of output channels for the intermediate conv layers. - common_stride: the common stride that all features will be upscaled to - loss_weight: loss weight - norm (str or callable): normalization for all conv layers - ignore_value: category id to be ignored during training. - """ - super().__init__() - input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride) - if not len(input_shape): - raise ValueError("SemSegFPNHead(input_shape=) cannot be empty!") - self.in_features = [k for k, v in input_shape] - feature_strides = [v.stride for k, v in input_shape] - feature_channels = [v.channels for k, v in input_shape] - - self.ignore_value = ignore_value - self.common_stride = common_stride - self.loss_weight = loss_weight - - self.scale_heads = [] - for in_feature, stride, channels in zip( - self.in_features, feature_strides, feature_channels - ): - head_ops = [] - head_length = max(1, int(np.log2(stride) - np.log2(self.common_stride))) - for k in range(head_length): - norm_module = get_norm(norm, conv_dims) - conv = Conv2d( - channels if k == 0 else conv_dims, - conv_dims, - kernel_size=3, - stride=1, - padding=1, - bias=not norm, - norm=norm_module, - activation=F.relu, - ) - weight_init.c2_msra_fill(conv) - head_ops.append(conv) - if stride != self.common_stride: - head_ops.append( - nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False) - ) - self.scale_heads.append(nn.Sequential(*head_ops)) - self.add_module(in_feature, self.scale_heads[-1]) - self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0) - weight_init.c2_msra_fill(self.predictor) - - @classmethod - def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): - return { - "input_shape": { - k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES - }, - "ignore_value": cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, - "num_classes": cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, - "conv_dims": cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM, - "common_stride": cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE, - "norm": cfg.MODEL.SEM_SEG_HEAD.NORM, - "loss_weight": cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT, - } - - def forward(self, features, targets=None): - """ - Returns: - In training, returns (None, dict of losses) - In inference, returns (CxHxW logits, {}) - """ - x = self.layers(features) - if self.training: - return None, self.losses(x, targets) - else: - x = F.interpolate( - x, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - return x, {} - - def layers(self, features): - for i, f in enumerate(self.in_features): - if i == 0: - x = self.scale_heads[i](features[f]) - else: - x = x + self.scale_heads[i](features[f]) - x = self.predictor(x) - return x - - def losses(self, predictions, targets): - predictions = predictions.float() # https://github.com/pytorch/pytorch/issues/48163 - predictions = F.interpolate( - predictions, - scale_factor=self.common_stride, - mode="bilinear", - align_corners=False, - ) - loss = F.cross_entropy( - predictions, targets, reduction="mean", ignore_index=self.ignore_value - ) - losses = {"loss_sem_seg": loss * self.loss_weight} - return losses diff --git a/detectron2/detectron2/modeling/mmdet_wrapper.py b/detectron2/detectron2/modeling/mmdet_wrapper.py deleted file mode 100644 index 293b3e9faf34c48456cd3fff37b966af9042fe4e..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/mmdet_wrapper.py +++ /dev/null @@ -1,273 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import itertools -import logging -import numpy as np -from collections import OrderedDict -from collections.abc import Mapping -from typing import Dict, List, Optional, Tuple, Union -import torch -from omegaconf import DictConfig, OmegaConf -from torch import Tensor, nn - -from detectron2.layers import ShapeSpec -from detectron2.structures import BitMasks, Boxes, ImageList, Instances -from detectron2.utils.events import get_event_storage - -from .backbone import Backbone - -logger = logging.getLogger(__name__) - - -def _to_container(cfg): - """ - mmdet will assert the type of dict/list. - So convert omegaconf objects to dict/list. - """ - if isinstance(cfg, DictConfig): - cfg = OmegaConf.to_container(cfg, resolve=True) - from mmcv.utils import ConfigDict - - return ConfigDict(cfg) - - -class MMDetBackbone(Backbone): - """ - Wrapper of mmdetection backbones to use in detectron2. - - mmdet backbones produce list/tuple of tensors, while detectron2 backbones - produce a dict of tensors. This class wraps the given backbone to produce - output in detectron2's convention, so it can be used in place of detectron2 - backbones. - """ - - def __init__( - self, - backbone: Union[nn.Module, Mapping], - neck: Union[nn.Module, Mapping, None] = None, - *, - output_shapes: List[ShapeSpec], - output_names: Optional[List[str]] = None, - ): - """ - Args: - backbone: either a backbone module or a mmdet config dict that defines a - backbone. The backbone takes a 4D image tensor and returns a - sequence of tensors. - neck: either a backbone module or a mmdet config dict that defines a - neck. The neck takes outputs of backbone and returns a - sequence of tensors. If None, no neck is used. - output_shapes: shape for every output of the backbone (or neck, if given). - stride and channels are often needed. - output_names: names for every output of the backbone (or neck, if given). - By default, will use "out0", "out1", ... - """ - super().__init__() - if isinstance(backbone, Mapping): - from mmdet.models import build_backbone - - backbone = build_backbone(_to_container(backbone)) - self.backbone = backbone - - if isinstance(neck, Mapping): - from mmdet.models import build_neck - - neck = build_neck(_to_container(neck)) - self.neck = neck - - # "Neck" weights, if any, are part of neck itself. This is the interface - # of mmdet so we follow it. Reference: - # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py - logger.info("Initializing mmdet backbone weights...") - self.backbone.init_weights() - # train() in mmdet modules is non-trivial, and has to be explicitly - # called. Reference: - # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/backbones/resnet.py - self.backbone.train() - if self.neck is not None: - logger.info("Initializing mmdet neck weights ...") - if isinstance(self.neck, nn.Sequential): - for m in self.neck: - m.init_weights() - else: - self.neck.init_weights() - self.neck.train() - - self._output_shapes = output_shapes - if not output_names: - output_names = [f"out{i}" for i in range(len(output_shapes))] - self._output_names = output_names - - def forward(self, x) -> Dict[str, Tensor]: - outs = self.backbone(x) - if self.neck is not None: - outs = self.neck(outs) - assert isinstance( - outs, (list, tuple) - ), "mmdet backbone should return a list/tuple of tensors!" - if len(outs) != len(self._output_shapes): - raise ValueError( - "Length of output_shapes does not match outputs from the mmdet backbone: " - f"{len(outs)} != {len(self._output_shapes)}" - ) - return {k: v for k, v in zip(self._output_names, outs)} - - def output_shape(self) -> Dict[str, ShapeSpec]: - return {k: v for k, v in zip(self._output_names, self._output_shapes)} - - -class MMDetDetector(nn.Module): - """ - Wrapper of a mmdetection detector model, for detection and instance segmentation. - Input/output formats of this class follow detectron2's convention, so a - mmdetection model can be trained and evaluated in detectron2. - """ - - def __init__( - self, - detector: Union[nn.Module, Mapping], - *, - # Default is 32 regardless of model: - # https://github.com/open-mmlab/mmdetection/tree/master/configs/_base_/datasets - size_divisibility=32, - pixel_mean: Tuple[float], - pixel_std: Tuple[float], - ): - """ - Args: - detector: a mmdet detector, or a mmdet config dict that defines a detector. - size_divisibility: pad input images to multiple of this number - pixel_mean: per-channel mean to normalize input image - pixel_std: per-channel stddev to normalize input image - """ - super().__init__() - if isinstance(detector, Mapping): - from mmdet.models import build_detector - - detector = build_detector(_to_container(detector)) - self.detector = detector - self.detector.init_weights() - self.size_divisibility = size_divisibility - - self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) - self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) - assert ( - self.pixel_mean.shape == self.pixel_std.shape - ), f"{self.pixel_mean} and {self.pixel_std} have different shapes!" - - def forward(self, batched_inputs: List[Dict[str, torch.Tensor]]): - images = [x["image"].to(self.device) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors(images, size_divisibility=self.size_divisibility).tensor - metas = [] - rescale = {"height" in x for x in batched_inputs} - if len(rescale) != 1: - raise ValueError("Some inputs have original height/width, but some don't!") - rescale = list(rescale)[0] - output_shapes = [] - for input in batched_inputs: - meta = {} - c, h, w = input["image"].shape - meta["img_shape"] = meta["ori_shape"] = (h, w, c) - if rescale: - scale_factor = np.array( - [w / input["width"], h / input["height"]] * 2, dtype="float32" - ) - ori_shape = (input["height"], input["width"]) - output_shapes.append(ori_shape) - meta["ori_shape"] = ori_shape + (c,) - else: - scale_factor = 1.0 - output_shapes.append((h, w)) - meta["scale_factor"] = scale_factor - meta["flip"] = False - padh, padw = images.shape[-2:] - meta["pad_shape"] = (padh, padw, c) - metas.append(meta) - - if self.training: - gt_instances = [x["instances"].to(self.device) for x in batched_inputs] - if gt_instances[0].has("gt_masks"): - from mmdet.core import PolygonMasks as mm_PolygonMasks, BitmapMasks as mm_BitMasks - - def convert_mask(m, shape): - # mmdet mask format - if isinstance(m, BitMasks): - return mm_BitMasks(m.tensor.cpu().numpy(), shape[0], shape[1]) - else: - return mm_PolygonMasks(m.polygons, shape[0], shape[1]) - - gt_masks = [convert_mask(x.gt_masks, x.image_size) for x in gt_instances] - losses_and_metrics = self.detector.forward_train( - images, - metas, - [x.gt_boxes.tensor for x in gt_instances], - [x.gt_classes for x in gt_instances], - gt_masks=gt_masks, - ) - else: - losses_and_metrics = self.detector.forward_train( - images, - metas, - [x.gt_boxes.tensor for x in gt_instances], - [x.gt_classes for x in gt_instances], - ) - return _parse_losses(losses_and_metrics) - else: - results = self.detector.simple_test(images, metas, rescale=rescale) - results = [ - {"instances": _convert_mmdet_result(r, shape)} - for r, shape in zip(results, output_shapes) - ] - return results - - @property - def device(self): - return self.pixel_mean.device - - -# Reference: show_result() in -# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/base.py -def _convert_mmdet_result(result, shape: Tuple[int, int]) -> Instances: - if isinstance(result, tuple): - bbox_result, segm_result = result - if isinstance(segm_result, tuple): - segm_result = segm_result[0] - else: - bbox_result, segm_result = result, None - - bboxes = torch.from_numpy(np.vstack(bbox_result)) # Nx5 - bboxes, scores = bboxes[:, :4], bboxes[:, -1] - labels = [ - torch.full((bbox.shape[0],), i, dtype=torch.int32) for i, bbox in enumerate(bbox_result) - ] - labels = torch.cat(labels) - inst = Instances(shape) - inst.pred_boxes = Boxes(bboxes) - inst.scores = scores - inst.pred_classes = labels - - if segm_result is not None and len(labels) > 0: - segm_result = list(itertools.chain(*segm_result)) - segm_result = [torch.from_numpy(x) if isinstance(x, np.ndarray) else x for x in segm_result] - segm_result = torch.stack(segm_result, dim=0) - inst.pred_masks = segm_result - return inst - - -# reference: https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/base.py -def _parse_losses(losses: Dict[str, Tensor]) -> Dict[str, Tensor]: - log_vars = OrderedDict() - for loss_name, loss_value in losses.items(): - if isinstance(loss_value, torch.Tensor): - log_vars[loss_name] = loss_value.mean() - elif isinstance(loss_value, list): - log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) - else: - raise TypeError(f"{loss_name} is not a tensor or list of tensors") - - if "loss" not in loss_name: - # put metrics to storage; don't return them - storage = get_event_storage() - value = log_vars.pop(loss_name).cpu().item() - storage.put_scalar(loss_name, value) - return log_vars diff --git a/detectron2/detectron2/modeling/poolers.py b/detectron2/detectron2/modeling/poolers.py deleted file mode 100644 index 3393794507c6504bf6ac1bfae7a1c80a0d81725e..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/poolers.py +++ /dev/null @@ -1,263 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import math -from typing import List, Optional -import torch -from torch import nn -from torchvision.ops import RoIPool - -from detectron2.layers import ROIAlign, ROIAlignRotated, cat, nonzero_tuple, shapes_to_tensor -from detectron2.structures import Boxes -from detectron2.utils.tracing import assert_fx_safe, is_fx_tracing - -""" -To export ROIPooler to torchscript, in this file, variables that should be annotated with -`Union[List[Boxes], List[RotatedBoxes]]` are only annotated with `List[Boxes]`. - -TODO: Correct these annotations when torchscript support `Union`. -https://github.com/pytorch/pytorch/issues/41412 -""" - -__all__ = ["ROIPooler"] - - -def assign_boxes_to_levels( - box_lists: List[Boxes], - min_level: int, - max_level: int, - canonical_box_size: int, - canonical_level: int, -): - """ - Map each box in `box_lists` to a feature map level index and return the assignment - vector. - - Args: - box_lists (list[Boxes] | list[RotatedBoxes]): A list of N Boxes or N RotatedBoxes, - where N is the number of images in the batch. - min_level (int): Smallest feature map level index. The input is considered index 0, - the output of stage 1 is index 1, and so. - max_level (int): Largest feature map level index. - canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). - canonical_level (int): The feature map level index on which a canonically-sized box - should be placed. - - Returns: - A tensor of length M, where M is the total number of boxes aggregated over all - N batch images. The memory layout corresponds to the concatenation of boxes - from all images. Each element is the feature map index, as an offset from - `self.min_level`, for the corresponding box (so value i means the box is at - `self.min_level + i`). - """ - box_sizes = torch.sqrt(cat([boxes.area() for boxes in box_lists])) - # Eqn.(1) in FPN paper - level_assignments = torch.floor( - canonical_level + torch.log2(box_sizes / canonical_box_size + 1e-8) - ) - # clamp level to (min, max), in case the box size is too large or too small - # for the available feature maps - level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level) - return level_assignments.to(torch.int64) - min_level - - -# script the module to avoid hardcoded device type -@torch.jit.script_if_tracing -def _convert_boxes_to_pooler_format(boxes: torch.Tensor, sizes: torch.Tensor) -> torch.Tensor: - sizes = sizes.to(device=boxes.device) - indices = torch.repeat_interleave( - torch.arange(len(sizes), dtype=boxes.dtype, device=boxes.device), sizes - ) - return cat([indices[:, None], boxes], dim=1) - - -def convert_boxes_to_pooler_format(box_lists: List[Boxes]): - """ - Convert all boxes in `box_lists` to the low-level format used by ROI pooling ops - (see description under Returns). - - Args: - box_lists (list[Boxes] | list[RotatedBoxes]): - A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch. - - Returns: - When input is list[Boxes]: - A tensor of shape (M, 5), where M is the total number of boxes aggregated over all - N batch images. - The 5 columns are (batch index, x0, y0, x1, y1), where batch index - is the index in [0, N) identifying which batch image the box with corners at - (x0, y0, x1, y1) comes from. - When input is list[RotatedBoxes]: - A tensor of shape (M, 6), where M is the total number of boxes aggregated over all - N batch images. - The 6 columns are (batch index, x_ctr, y_ctr, width, height, angle_degrees), - where batch index is the index in [0, N) identifying which batch image the - rotated box (x_ctr, y_ctr, width, height, angle_degrees) comes from. - """ - boxes = torch.cat([x.tensor for x in box_lists], dim=0) - # __len__ returns Tensor in tracing. - sizes = shapes_to_tensor([x.__len__() for x in box_lists]) - return _convert_boxes_to_pooler_format(boxes, sizes) - - -@torch.jit.script_if_tracing -def _create_zeros( - batch_target: Optional[torch.Tensor], - channels: int, - height: int, - width: int, - like_tensor: torch.Tensor, -) -> torch.Tensor: - batches = batch_target.shape[0] if batch_target is not None else 0 - sizes = (batches, channels, height, width) - return torch.zeros(sizes, dtype=like_tensor.dtype, device=like_tensor.device) - - -class ROIPooler(nn.Module): - """ - Region of interest feature map pooler that supports pooling from one or more - feature maps. - """ - - def __init__( - self, - output_size, - scales, - sampling_ratio, - pooler_type, - canonical_box_size=224, - canonical_level=4, - ): - """ - Args: - output_size (int, tuple[int] or list[int]): output size of the pooled region, - e.g., 14 x 14. If tuple or list is given, the length must be 2. - scales (list[float]): The scale for each low-level pooling op relative to - the input image. For a feature map with stride s relative to the input - image, scale is defined as 1/s. The stride must be power of 2. - When there are multiple scales, they must form a pyramid, i.e. they must be - a monotically decreasing geometric sequence with a factor of 1/2. - sampling_ratio (int): The `sampling_ratio` parameter for the ROIAlign op. - pooler_type (string): Name of the type of pooling operation that should be applied. - For instance, "ROIPool" or "ROIAlignV2". - canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). The default - is heuristically defined as 224 pixels in the FPN paper (based on ImageNet - pre-training). - canonical_level (int): The feature map level index from which a canonically-sized box - should be placed. The default is defined as level 4 (stride=16) in the FPN paper, - i.e., a box of size 224x224 will be placed on the feature with stride=16. - The box placement for all boxes will be determined from their sizes w.r.t - canonical_box_size. For example, a box whose area is 4x that of a canonical box - should be used to pool features from feature level ``canonical_level+1``. - - Note that the actual input feature maps given to this module may not have - sufficiently many levels for the input boxes. If the boxes are too large or too - small for the input feature maps, the closest level will be used. - """ - super().__init__() - - if isinstance(output_size, int): - output_size = (output_size, output_size) - assert len(output_size) == 2 - assert isinstance(output_size[0], int) and isinstance(output_size[1], int) - self.output_size = output_size - - if pooler_type == "ROIAlign": - self.level_poolers = nn.ModuleList( - ROIAlign( - output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=False - ) - for scale in scales - ) - elif pooler_type == "ROIAlignV2": - self.level_poolers = nn.ModuleList( - ROIAlign( - output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=True - ) - for scale in scales - ) - elif pooler_type == "ROIPool": - self.level_poolers = nn.ModuleList( - RoIPool(output_size, spatial_scale=scale) for scale in scales - ) - elif pooler_type == "ROIAlignRotated": - self.level_poolers = nn.ModuleList( - ROIAlignRotated(output_size, spatial_scale=scale, sampling_ratio=sampling_ratio) - for scale in scales - ) - else: - raise ValueError("Unknown pooler type: {}".format(pooler_type)) - - # Map scale (defined as 1 / stride) to its feature map level under the - # assumption that stride is a power of 2. - min_level = -(math.log2(scales[0])) - max_level = -(math.log2(scales[-1])) - assert math.isclose(min_level, int(min_level)) and math.isclose( - max_level, int(max_level) - ), "Featuremap stride is not power of 2!" - self.min_level = int(min_level) - self.max_level = int(max_level) - assert ( - len(scales) == self.max_level - self.min_level + 1 - ), "[ROIPooler] Sizes of input featuremaps do not form a pyramid!" - assert 0 <= self.min_level and self.min_level <= self.max_level - self.canonical_level = canonical_level - assert canonical_box_size > 0 - self.canonical_box_size = canonical_box_size - - def forward(self, x: List[torch.Tensor], box_lists: List[Boxes]): - """ - Args: - x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those - used to construct this module. - box_lists (list[Boxes] | list[RotatedBoxes]): - A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch. - The box coordinates are defined on the original image and - will be scaled by the `scales` argument of :class:`ROIPooler`. - - Returns: - Tensor: - A tensor of shape (M, C, output_size, output_size) where M is the total number of - boxes aggregated over all N batch images and C is the number of channels in `x`. - """ - num_level_assignments = len(self.level_poolers) - - if not is_fx_tracing(): - torch._assert( - isinstance(x, list) and isinstance(box_lists, list), - "Arguments to pooler must be lists", - ) - assert_fx_safe( - len(x) == num_level_assignments, - "unequal value, num_level_assignments={}, but x is list of {} Tensors".format( - num_level_assignments, len(x) - ), - ) - assert_fx_safe( - len(box_lists) == x[0].size(0), - "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format( - x[0].size(0), len(box_lists) - ), - ) - if len(box_lists) == 0: - return _create_zeros(None, x[0].shape[1], *self.output_size, x[0]) - - pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists) - - if num_level_assignments == 1: - return self.level_poolers[0](x[0], pooler_fmt_boxes) - - level_assignments = assign_boxes_to_levels( - box_lists, self.min_level, self.max_level, self.canonical_box_size, self.canonical_level - ) - - num_channels = x[0].shape[1] - output_size = self.output_size[0] - - output = _create_zeros(pooler_fmt_boxes, num_channels, output_size, output_size, x[0]) - - for level, pooler in enumerate(self.level_poolers): - inds = nonzero_tuple(level_assignments == level)[0] - pooler_fmt_boxes_level = pooler_fmt_boxes[inds] - # Use index_put_ instead of advance indexing, to avoid pytorch/issues/49852 - output.index_put_((inds,), pooler(x[level], pooler_fmt_boxes_level)) - - return output diff --git a/detectron2/detectron2/modeling/postprocessing.py b/detectron2/detectron2/modeling/postprocessing.py deleted file mode 100644 index 84512606a43d6991df0ae1f046164eb3c70d751a..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/postprocessing.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import torch -from torch.nn import functional as F - -from detectron2.structures import Instances, ROIMasks - - -# perhaps should rename to "resize_instance" -def detector_postprocess( - results: Instances, output_height: int, output_width: int, mask_threshold: float = 0.5 -): - """ - Resize the output instances. - The input images are often resized when entering an object detector. - As a result, we often need the outputs of the detector in a different - resolution from its inputs. - - This function will resize the raw outputs of an R-CNN detector - to produce outputs according to the desired output resolution. - - Args: - results (Instances): the raw outputs from the detector. - `results.image_size` contains the input image resolution the detector sees. - This object might be modified in-place. - output_height, output_width: the desired output resolution. - Returns: - Instances: the resized output from the model, based on the output resolution - """ - if isinstance(output_width, torch.Tensor): - # This shape might (but not necessarily) be tensors during tracing. - # Converts integer tensors to float temporaries to ensure true - # division is performed when computing scale_x and scale_y. - output_width_tmp = output_width.float() - output_height_tmp = output_height.float() - new_size = torch.stack([output_height, output_width]) - else: - new_size = (output_height, output_width) - output_width_tmp = output_width - output_height_tmp = output_height - - scale_x, scale_y = ( - output_width_tmp / results.image_size[1], - output_height_tmp / results.image_size[0], - ) - results = Instances(new_size, **results.get_fields()) - - if results.has("pred_boxes"): - output_boxes = results.pred_boxes - elif results.has("proposal_boxes"): - output_boxes = results.proposal_boxes - else: - output_boxes = None - assert output_boxes is not None, "Predictions must contain boxes!" - - output_boxes.scale(scale_x, scale_y) - output_boxes.clip(results.image_size) - - results = results[output_boxes.nonempty()] - - if results.has("pred_masks"): - if isinstance(results.pred_masks, ROIMasks): - roi_masks = results.pred_masks - else: - # pred_masks is a tensor of shape (N, 1, M, M) - roi_masks = ROIMasks(results.pred_masks[:, 0, :, :]) - results.pred_masks = roi_masks.to_bitmasks( - results.pred_boxes, output_height, output_width, mask_threshold - ).tensor # TODO return ROIMasks/BitMask object in the future - - if results.has("pred_keypoints"): - results.pred_keypoints[:, :, 0] *= scale_x - results.pred_keypoints[:, :, 1] *= scale_y - - return results - - -def sem_seg_postprocess(result, img_size, output_height, output_width): - """ - Return semantic segmentation predictions in the original resolution. - - The input images are often resized when entering semantic segmentor. Moreover, in same - cases, they also padded inside segmentor to be divisible by maximum network stride. - As a result, we often need the predictions of the segmentor in a different - resolution from its inputs. - - Args: - result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W), - where C is the number of classes, and H, W are the height and width of the prediction. - img_size (tuple): image size that segmentor is taking as input. - output_height, output_width: the desired output resolution. - - Returns: - semantic segmentation prediction (Tensor): A tensor of the shape - (C, output_height, output_width) that contains per-pixel soft predictions. - """ - result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1) - result = F.interpolate( - result, size=(output_height, output_width), mode="bilinear", align_corners=False - )[0] - return result diff --git a/detectron2/detectron2/modeling/proposal_generator/__init__.py b/detectron2/detectron2/modeling/proposal_generator/__init__.py deleted file mode 100644 index 3f4e4df7645c67b7a013295207b98fe70b2e574c..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/proposal_generator/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator -from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN, StandardRPNHead - -__all__ = list(globals().keys()) diff --git a/detectron2/detectron2/modeling/proposal_generator/build.py b/detectron2/detectron2/modeling/proposal_generator/build.py deleted file mode 100644 index 34eb12d00d94ff905b796e75e2c4c5845257c8e9..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/proposal_generator/build.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from detectron2.utils.registry import Registry - -PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR") -PROPOSAL_GENERATOR_REGISTRY.__doc__ = """ -Registry for proposal generator, which produces object proposals from feature maps. - -The registered object will be called with `obj(cfg, input_shape)`. -The call should return a `nn.Module` object. -""" - -from . import rpn, rrpn # noqa F401 isort:skip - - -def build_proposal_generator(cfg, input_shape): - """ - Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`. - The name can be "PrecomputedProposals" to use no proposal generator. - """ - name = cfg.MODEL.PROPOSAL_GENERATOR.NAME - if name == "PrecomputedProposals": - return None - - return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape) diff --git a/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py b/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py deleted file mode 100644 index 0fdf5dc15c125163c124ab3d04c13bd5b4261588..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import math -from typing import List, Tuple, Union -import torch - -from detectron2.layers import batched_nms, cat, move_device_like -from detectron2.structures import Boxes, Instances - -logger = logging.getLogger(__name__) - - -def _is_tracing(): - # (fixed in TORCH_VERSION >= 1.9) - if torch.jit.is_scripting(): - # https://github.com/pytorch/pytorch/issues/47379 - return False - else: - return torch.jit.is_tracing() - - -def find_top_rpn_proposals( - proposals: List[torch.Tensor], - pred_objectness_logits: List[torch.Tensor], - image_sizes: List[Tuple[int, int]], - nms_thresh: float, - pre_nms_topk: int, - post_nms_topk: int, - min_box_size: float, - training: bool, -): - """ - For each feature map, select the `pre_nms_topk` highest scoring proposals, - apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` - highest scoring proposals among all the feature maps for each image. - - Args: - proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4). - All proposal predictions on the feature maps. - pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). - image_sizes (list[tuple]): sizes (h, w) for each image - nms_thresh (float): IoU threshold to use for NMS - pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. - When RPN is run on multiple feature maps (as in FPN) this number is per - feature map. - post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. - When RPN is run on multiple feature maps (as in FPN) this number is total, - over all feature maps. - min_box_size (float): minimum proposal box side length in pixels (absolute units - wrt input images). - training (bool): True if proposals are to be used in training, otherwise False. - This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." - comment. - - Returns: - list[Instances]: list of N Instances. The i-th Instances - stores post_nms_topk object proposals for image i, sorted by their - objectness score in descending order. - """ - num_images = len(image_sizes) - device = ( - proposals[0].device - if torch.jit.is_scripting() - else ("cpu" if torch.jit.is_tracing() else proposals[0].device) - ) - - # 1. Select top-k anchor for every level and every image - topk_scores = [] # #lvl Tensor, each of shape N x topk - topk_proposals = [] - level_ids = [] # #lvl Tensor, each of shape (topk,) - batch_idx = move_device_like(torch.arange(num_images, device=device), proposals[0]) - for level_id, (proposals_i, logits_i) in enumerate(zip(proposals, pred_objectness_logits)): - Hi_Wi_A = logits_i.shape[1] - if isinstance(Hi_Wi_A, torch.Tensor): # it's a tensor in tracing - num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk) - else: - num_proposals_i = min(Hi_Wi_A, pre_nms_topk) - - topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) - - # each is N x topk - topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4 - - topk_proposals.append(topk_proposals_i) - topk_scores.append(topk_scores_i) - level_ids.append( - move_device_like( - torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device), - proposals[0], - ) - ) - - # 2. Concat all levels together - topk_scores = cat(topk_scores, dim=1) - topk_proposals = cat(topk_proposals, dim=1) - level_ids = cat(level_ids, dim=0) - - # 3. For each image, run a per-level NMS, and choose topk results. - results: List[Instances] = [] - for n, image_size in enumerate(image_sizes): - boxes = Boxes(topk_proposals[n]) - scores_per_img = topk_scores[n] - lvl = level_ids - - valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) - if not valid_mask.all(): - if training: - raise FloatingPointError( - "Predicted boxes or scores contain Inf/NaN. Training has diverged." - ) - boxes = boxes[valid_mask] - scores_per_img = scores_per_img[valid_mask] - lvl = lvl[valid_mask] - boxes.clip(image_size) - - # filter empty boxes - keep = boxes.nonempty(threshold=min_box_size) - if _is_tracing() or keep.sum().item() != len(boxes): - boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep] - - keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh) - # In Detectron1, there was different behavior during training vs. testing. - # (https://github.com/facebookresearch/Detectron/issues/459) - # During training, topk is over the proposals from *all* images in the training batch. - # During testing, it is over the proposals for each image separately. - # As a result, the training behavior becomes batch-dependent, - # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. - # This bug is addressed in Detectron2 to make the behavior independent of batch size. - keep = keep[:post_nms_topk] # keep is already sorted - - res = Instances(image_size) - res.proposal_boxes = boxes[keep] - res.objectness_logits = scores_per_img[keep] - results.append(res) - return results - - -def add_ground_truth_to_proposals( - gt: Union[List[Instances], List[Boxes]], proposals: List[Instances] -) -> List[Instances]: - """ - Call `add_ground_truth_to_proposals_single_image` for all images. - - Args: - gt(Union[List[Instances], List[Boxes]): list of N elements. Element i is a Instances - representing the ground-truth for image i. - proposals (list[Instances]): list of N elements. Element i is a Instances - representing the proposals for image i. - - Returns: - list[Instances]: list of N Instances. Each is the proposals for the image, - with field "proposal_boxes" and "objectness_logits". - """ - assert gt is not None - - if len(proposals) != len(gt): - raise ValueError("proposals and gt should have the same length as the number of images!") - if len(proposals) == 0: - return proposals - - return [ - add_ground_truth_to_proposals_single_image(gt_i, proposals_i) - for gt_i, proposals_i in zip(gt, proposals) - ] - - -def add_ground_truth_to_proposals_single_image( - gt: Union[Instances, Boxes], proposals: Instances -) -> Instances: - """ - Augment `proposals` with `gt`. - - Args: - Same as `add_ground_truth_to_proposals`, but with gt and proposals - per image. - - Returns: - Same as `add_ground_truth_to_proposals`, but for only one image. - """ - if isinstance(gt, Boxes): - # convert Boxes to Instances - gt = Instances(proposals.image_size, gt_boxes=gt) - - gt_boxes = gt.gt_boxes - device = proposals.objectness_logits.device - # Assign all ground-truth boxes an objectness logit corresponding to - # P(object) = sigmoid(logit) =~ 1. - gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10))) - gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device) - - # Concatenating gt_boxes with proposals requires them to have the same fields - gt_proposal = Instances(proposals.image_size, **gt.get_fields()) - gt_proposal.proposal_boxes = gt_boxes - gt_proposal.objectness_logits = gt_logits - - for key in proposals.get_fields().keys(): - assert gt_proposal.has( - key - ), "The attribute '{}' in `proposals` does not exist in `gt`".format(key) - - # NOTE: Instances.cat only use fields from the first item. Extra fields in latter items - # will be thrown away. - new_proposals = Instances.cat([proposals, gt_proposal]) - - return new_proposals diff --git a/detectron2/detectron2/modeling/proposal_generator/rpn.py b/detectron2/detectron2/modeling/proposal_generator/rpn.py deleted file mode 100644 index 99cd536d2f9880d2049390c45f73eb22335e1b82..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/proposal_generator/rpn.py +++ /dev/null @@ -1,533 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from typing import Dict, List, Optional, Tuple, Union -import torch -import torch.nn.functional as F -from torch import nn - -from detectron2.config import configurable -from detectron2.layers import Conv2d, ShapeSpec, cat -from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou -from detectron2.utils.events import get_event_storage -from detectron2.utils.memory import retry_if_cuda_oom -from detectron2.utils.registry import Registry - -from ..anchor_generator import build_anchor_generator -from ..box_regression import Box2BoxTransform, _dense_box_regression_loss -from ..matcher import Matcher -from ..sampling import subsample_labels -from .build import PROPOSAL_GENERATOR_REGISTRY -from .proposal_utils import find_top_rpn_proposals - -RPN_HEAD_REGISTRY = Registry("RPN_HEAD") -RPN_HEAD_REGISTRY.__doc__ = """ -Registry for RPN heads, which take feature maps and perform -objectness classification and bounding box regression for anchors. - -The registered object will be called with `obj(cfg, input_shape)`. -The call should return a `nn.Module` object. -""" - - -""" -Shape shorthand in this module: - - N: number of images in the minibatch - L: number of feature maps per image on which RPN is run - A: number of cell anchors (must be the same for all feature maps) - Hi, Wi: height and width of the i-th feature map - B: size of the box parameterization - -Naming convention: - - objectness: refers to the binary classification of an anchor as object vs. not object. - - deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box - transform (see :class:`box_regression.Box2BoxTransform`), or 5d for rotated boxes. - - pred_objectness_logits: predicted objectness scores in [-inf, +inf]; use - sigmoid(pred_objectness_logits) to estimate P(object). - - gt_labels: ground-truth binary classification labels for objectness - - pred_anchor_deltas: predicted box2box transform deltas - - gt_anchor_deltas: ground-truth box2box transform deltas -""" - - -def build_rpn_head(cfg, input_shape): - """ - Build an RPN head defined by `cfg.MODEL.RPN.HEAD_NAME`. - """ - name = cfg.MODEL.RPN.HEAD_NAME - return RPN_HEAD_REGISTRY.get(name)(cfg, input_shape) - - -@RPN_HEAD_REGISTRY.register() -class StandardRPNHead(nn.Module): - """ - Standard RPN classification and regression heads described in :paper:`Faster R-CNN`. - Uses a 3x3 conv to produce a shared hidden state from which one 1x1 conv predicts - objectness logits for each anchor and a second 1x1 conv predicts bounding-box deltas - specifying how to deform each anchor into an object proposal. - """ - - @configurable - def __init__( - self, *, in_channels: int, num_anchors: int, box_dim: int = 4, conv_dims: List[int] = (-1,) - ): - """ - NOTE: this interface is experimental. - - Args: - in_channels (int): number of input feature channels. When using multiple - input features, they must have the same number of channels. - num_anchors (int): number of anchors to predict for *each spatial position* - on the feature map. The total number of anchors for each - feature map will be `num_anchors * H * W`. - box_dim (int): dimension of a box, which is also the number of box regression - predictions to make for each anchor. An axis aligned box has - box_dim=4, while a rotated box has box_dim=5. - conv_dims (list[int]): a list of integers representing the output channels - of N conv layers. Set it to -1 to use the same number of output channels - as input channels. - """ - super().__init__() - cur_channels = in_channels - # Keeping the old variable names and structure for backwards compatiblity. - # Otherwise the old checkpoints will fail to load. - if len(conv_dims) == 1: - out_channels = cur_channels if conv_dims[0] == -1 else conv_dims[0] - # 3x3 conv for the hidden representation - self.conv = self._get_rpn_conv(cur_channels, out_channels) - cur_channels = out_channels - else: - self.conv = nn.Sequential() - for k, conv_dim in enumerate(conv_dims): - out_channels = cur_channels if conv_dim == -1 else conv_dim - if out_channels <= 0: - raise ValueError( - f"Conv output channels should be greater than 0. Got {out_channels}" - ) - conv = self._get_rpn_conv(cur_channels, out_channels) - self.conv.add_module(f"conv{k}", conv) - cur_channels = out_channels - # 1x1 conv for predicting objectness logits - self.objectness_logits = nn.Conv2d(cur_channels, num_anchors, kernel_size=1, stride=1) - # 1x1 conv for predicting box2box transform deltas - self.anchor_deltas = nn.Conv2d(cur_channels, num_anchors * box_dim, kernel_size=1, stride=1) - - # Keeping the order of weights initialization same for backwards compatiblility. - for layer in self.modules(): - if isinstance(layer, nn.Conv2d): - nn.init.normal_(layer.weight, std=0.01) - nn.init.constant_(layer.bias, 0) - - def _get_rpn_conv(self, in_channels, out_channels): - return Conv2d( - in_channels, - out_channels, - kernel_size=3, - stride=1, - padding=1, - activation=nn.ReLU(), - ) - - @classmethod - def from_config(cls, cfg, input_shape): - # Standard RPN is shared across levels: - in_channels = [s.channels for s in input_shape] - assert len(set(in_channels)) == 1, "Each level must have the same channel!" - in_channels = in_channels[0] - - # RPNHead should take the same input as anchor generator - # NOTE: it assumes that creating an anchor generator does not have unwanted side effect. - anchor_generator = build_anchor_generator(cfg, input_shape) - num_anchors = anchor_generator.num_anchors - box_dim = anchor_generator.box_dim - assert ( - len(set(num_anchors)) == 1 - ), "Each level must have the same number of anchors per spatial position" - return { - "in_channels": in_channels, - "num_anchors": num_anchors[0], - "box_dim": box_dim, - "conv_dims": cfg.MODEL.RPN.CONV_DIMS, - } - - def forward(self, features: List[torch.Tensor]): - """ - Args: - features (list[Tensor]): list of feature maps - - Returns: - list[Tensor]: A list of L elements. - Element i is a tensor of shape (N, A, Hi, Wi) representing - the predicted objectness logits for all anchors. A is the number of cell anchors. - list[Tensor]: A list of L elements. Element i is a tensor of shape - (N, A*box_dim, Hi, Wi) representing the predicted "deltas" used to transform anchors - to proposals. - """ - pred_objectness_logits = [] - pred_anchor_deltas = [] - for x in features: - t = self.conv(x) - pred_objectness_logits.append(self.objectness_logits(t)) - pred_anchor_deltas.append(self.anchor_deltas(t)) - return pred_objectness_logits, pred_anchor_deltas - - -@PROPOSAL_GENERATOR_REGISTRY.register() -class RPN(nn.Module): - """ - Region Proposal Network, introduced by :paper:`Faster R-CNN`. - """ - - @configurable - def __init__( - self, - *, - in_features: List[str], - head: nn.Module, - anchor_generator: nn.Module, - anchor_matcher: Matcher, - box2box_transform: Box2BoxTransform, - batch_size_per_image: int, - positive_fraction: float, - pre_nms_topk: Tuple[float, float], - post_nms_topk: Tuple[float, float], - nms_thresh: float = 0.7, - min_box_size: float = 0.0, - anchor_boundary_thresh: float = -1.0, - loss_weight: Union[float, Dict[str, float]] = 1.0, - box_reg_loss_type: str = "smooth_l1", - smooth_l1_beta: float = 0.0, - ): - """ - NOTE: this interface is experimental. - - Args: - in_features (list[str]): list of names of input features to use - head (nn.Module): a module that predicts logits and regression deltas - for each level from a list of per-level features - anchor_generator (nn.Module): a module that creates anchors from a - list of features. Usually an instance of :class:`AnchorGenerator` - anchor_matcher (Matcher): label the anchors by matching them with ground truth. - box2box_transform (Box2BoxTransform): defines the transform from anchors boxes to - instance boxes - batch_size_per_image (int): number of anchors per image to sample for training - positive_fraction (float): fraction of foreground anchors to sample for training - pre_nms_topk (tuple[float]): (train, test) that represents the - number of top k proposals to select before NMS, in - training and testing. - post_nms_topk (tuple[float]): (train, test) that represents the - number of top k proposals to select after NMS, in - training and testing. - nms_thresh (float): NMS threshold used to de-duplicate the predicted proposals - min_box_size (float): remove proposal boxes with any side smaller than this threshold, - in the unit of input image pixels - anchor_boundary_thresh (float): legacy option - loss_weight (float|dict): weights to use for losses. Can be single float for weighting - all rpn losses together, or a dict of individual weightings. Valid dict keys are: - "loss_rpn_cls" - applied to classification loss - "loss_rpn_loc" - applied to box regression loss - box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou". - smooth_l1_beta (float): beta parameter for the smooth L1 regression loss. Default to - use L1 loss. Only used when `box_reg_loss_type` is "smooth_l1" - """ - super().__init__() - self.in_features = in_features - self.rpn_head = head - self.anchor_generator = anchor_generator - self.anchor_matcher = anchor_matcher - self.box2box_transform = box2box_transform - self.batch_size_per_image = batch_size_per_image - self.positive_fraction = positive_fraction - # Map from self.training state to train/test settings - self.pre_nms_topk = {True: pre_nms_topk[0], False: pre_nms_topk[1]} - self.post_nms_topk = {True: post_nms_topk[0], False: post_nms_topk[1]} - self.nms_thresh = nms_thresh - self.min_box_size = float(min_box_size) - self.anchor_boundary_thresh = anchor_boundary_thresh - if isinstance(loss_weight, float): - loss_weight = {"loss_rpn_cls": loss_weight, "loss_rpn_loc": loss_weight} - self.loss_weight = loss_weight - self.box_reg_loss_type = box_reg_loss_type - self.smooth_l1_beta = smooth_l1_beta - - @classmethod - def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): - in_features = cfg.MODEL.RPN.IN_FEATURES - ret = { - "in_features": in_features, - "min_box_size": cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE, - "nms_thresh": cfg.MODEL.RPN.NMS_THRESH, - "batch_size_per_image": cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE, - "positive_fraction": cfg.MODEL.RPN.POSITIVE_FRACTION, - "loss_weight": { - "loss_rpn_cls": cfg.MODEL.RPN.LOSS_WEIGHT, - "loss_rpn_loc": cfg.MODEL.RPN.BBOX_REG_LOSS_WEIGHT * cfg.MODEL.RPN.LOSS_WEIGHT, - }, - "anchor_boundary_thresh": cfg.MODEL.RPN.BOUNDARY_THRESH, - "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS), - "box_reg_loss_type": cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE, - "smooth_l1_beta": cfg.MODEL.RPN.SMOOTH_L1_BETA, - } - - ret["pre_nms_topk"] = (cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN, cfg.MODEL.RPN.PRE_NMS_TOPK_TEST) - ret["post_nms_topk"] = (cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN, cfg.MODEL.RPN.POST_NMS_TOPK_TEST) - - ret["anchor_generator"] = build_anchor_generator(cfg, [input_shape[f] for f in in_features]) - ret["anchor_matcher"] = Matcher( - cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True - ) - ret["head"] = build_rpn_head(cfg, [input_shape[f] for f in in_features]) - return ret - - def _subsample_labels(self, label): - """ - Randomly sample a subset of positive and negative examples, and overwrite - the label vector to the ignore value (-1) for all elements that are not - included in the sample. - - Args: - labels (Tensor): a vector of -1, 0, 1. Will be modified in-place and returned. - """ - pos_idx, neg_idx = subsample_labels( - label, self.batch_size_per_image, self.positive_fraction, 0 - ) - # Fill with the ignore label (-1), then set positive and negative labels - label.fill_(-1) - label.scatter_(0, pos_idx, 1) - label.scatter_(0, neg_idx, 0) - return label - - @torch.jit.unused - @torch.no_grad() - def label_and_sample_anchors( - self, anchors: List[Boxes], gt_instances: List[Instances] - ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: - """ - Args: - anchors (list[Boxes]): anchors for each feature map. - gt_instances: the ground-truth instances for each image. - - Returns: - list[Tensor]: - List of #img tensors. i-th element is a vector of labels whose length is - the total number of anchors across all feature maps R = sum(Hi * Wi * A). - Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative - class; 1 = positive class. - list[Tensor]: - i-th element is a Rx4 tensor. The values are the matched gt boxes for each - anchor. Values are undefined for those anchors not labeled as 1. - """ - anchors = Boxes.cat(anchors) - - gt_boxes = [x.gt_boxes for x in gt_instances] - image_sizes = [x.image_size for x in gt_instances] - del gt_instances - - gt_labels = [] - matched_gt_boxes = [] - for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes): - """ - image_size_i: (h, w) for the i-th image - gt_boxes_i: ground-truth boxes for i-th image - """ - - match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors) - matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix) - # Matching is memory-expensive and may result in CPU tensors. But the result is small - gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) - del match_quality_matrix - - if self.anchor_boundary_thresh >= 0: - # Discard anchors that go out of the boundaries of the image - # NOTE: This is legacy functionality that is turned off by default in Detectron2 - anchors_inside_image = anchors.inside_box(image_size_i, self.anchor_boundary_thresh) - gt_labels_i[~anchors_inside_image] = -1 - - # A vector of labels (-1, 0, 1) for each anchor - gt_labels_i = self._subsample_labels(gt_labels_i) - - if len(gt_boxes_i) == 0: - # These values won't be used anyway since the anchor is labeled as background - matched_gt_boxes_i = torch.zeros_like(anchors.tensor) - else: - # TODO wasted indexing computation for ignored boxes - matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor - - gt_labels.append(gt_labels_i) # N,AHW - matched_gt_boxes.append(matched_gt_boxes_i) - return gt_labels, matched_gt_boxes - - @torch.jit.unused - def losses( - self, - anchors: List[Boxes], - pred_objectness_logits: List[torch.Tensor], - gt_labels: List[torch.Tensor], - pred_anchor_deltas: List[torch.Tensor], - gt_boxes: List[torch.Tensor], - ) -> Dict[str, torch.Tensor]: - """ - Return the losses from a set of RPN predictions and their associated ground-truth. - - Args: - anchors (list[Boxes or RotatedBoxes]): anchors for each feature map, each - has shape (Hi*Wi*A, B), where B is box dimension (4 or 5). - pred_objectness_logits (list[Tensor]): A list of L elements. - Element i is a tensor of shape (N, Hi*Wi*A) representing - the predicted objectness logits for all anchors. - gt_labels (list[Tensor]): Output of :meth:`label_and_sample_anchors`. - pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape - (N, Hi*Wi*A, 4 or 5) representing the predicted "deltas" used to transform anchors - to proposals. - gt_boxes (list[Tensor]): Output of :meth:`label_and_sample_anchors`. - - Returns: - dict[loss name -> loss value]: A dict mapping from loss name to loss value. - Loss names are: `loss_rpn_cls` for objectness classification and - `loss_rpn_loc` for proposal localization. - """ - num_images = len(gt_labels) - gt_labels = torch.stack(gt_labels) # (N, sum(Hi*Wi*Ai)) - - # Log the number of positive/negative anchors per-image that's used in training - pos_mask = gt_labels == 1 - num_pos_anchors = pos_mask.sum().item() - num_neg_anchors = (gt_labels == 0).sum().item() - storage = get_event_storage() - storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images) - storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images) - - localization_loss = _dense_box_regression_loss( - anchors, - self.box2box_transform, - pred_anchor_deltas, - gt_boxes, - pos_mask, - box_reg_loss_type=self.box_reg_loss_type, - smooth_l1_beta=self.smooth_l1_beta, - ) - - valid_mask = gt_labels >= 0 - objectness_loss = F.binary_cross_entropy_with_logits( - cat(pred_objectness_logits, dim=1)[valid_mask], - gt_labels[valid_mask].to(torch.float32), - reduction="sum", - ) - normalizer = self.batch_size_per_image * num_images - losses = { - "loss_rpn_cls": objectness_loss / normalizer, - # The original Faster R-CNN paper uses a slightly different normalizer - # for loc loss. But it doesn't matter in practice - "loss_rpn_loc": localization_loss / normalizer, - } - losses = {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()} - return losses - - def forward( - self, - images: ImageList, - features: Dict[str, torch.Tensor], - gt_instances: Optional[List[Instances]] = None, - ): - """ - Args: - images (ImageList): input images of length `N` - features (dict[str, Tensor]): input data as a mapping from feature - map name to tensor. Axis 0 represents the number of images `N` in - the input data; axes 1-3 are channels, height, and width, which may - vary between feature maps (e.g., if a feature pyramid is used). - gt_instances (list[Instances], optional): a length `N` list of `Instances`s. - Each `Instances` stores ground-truth instances for the corresponding image. - - Returns: - proposals: list[Instances]: contains fields "proposal_boxes", "objectness_logits" - loss: dict[Tensor] or None - """ - features = [features[f] for f in self.in_features] - anchors = self.anchor_generator(features) - - pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features) - # Transpose the Hi*Wi*A dimension to the middle: - pred_objectness_logits = [ - # (N, A, Hi, Wi) -> (N, Hi, Wi, A) -> (N, Hi*Wi*A) - score.permute(0, 2, 3, 1).flatten(1) - for score in pred_objectness_logits - ] - pred_anchor_deltas = [ - # (N, A*B, Hi, Wi) -> (N, A, B, Hi, Wi) -> (N, Hi, Wi, A, B) -> (N, Hi*Wi*A, B) - x.view(x.shape[0], -1, self.anchor_generator.box_dim, x.shape[-2], x.shape[-1]) - .permute(0, 3, 4, 1, 2) - .flatten(1, -2) - for x in pred_anchor_deltas - ] - - if self.training: - assert gt_instances is not None, "RPN requires gt_instances in training!" - gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances) - losses = self.losses( - anchors, pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes - ) - else: - losses = {} - proposals = self.predict_proposals( - anchors, pred_objectness_logits, pred_anchor_deltas, images.image_sizes - ) - return proposals, losses - - def predict_proposals( - self, - anchors: List[Boxes], - pred_objectness_logits: List[torch.Tensor], - pred_anchor_deltas: List[torch.Tensor], - image_sizes: List[Tuple[int, int]], - ): - """ - Decode all the predicted box regression deltas to proposals. Find the top proposals - by applying NMS and removing boxes that are too small. - - Returns: - proposals (list[Instances]): list of N Instances. The i-th Instances - stores post_nms_topk object proposals for image i, sorted by their - objectness score in descending order. - """ - # The proposals are treated as fixed for joint training with roi heads. - # This approach ignores the derivative w.r.t. the proposal boxes’ coordinates that - # are also network responses. - with torch.no_grad(): - pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) - return find_top_rpn_proposals( - pred_proposals, - pred_objectness_logits, - image_sizes, - self.nms_thresh, - self.pre_nms_topk[self.training], - self.post_nms_topk[self.training], - self.min_box_size, - self.training, - ) - - def _decode_proposals(self, anchors: List[Boxes], pred_anchor_deltas: List[torch.Tensor]): - """ - Transform anchors into proposals by applying the predicted anchor deltas. - - Returns: - proposals (list[Tensor]): A list of L tensors. Tensor i has shape - (N, Hi*Wi*A, B) - """ - N = pred_anchor_deltas[0].shape[0] - proposals = [] - # For each feature map - for anchors_i, pred_anchor_deltas_i in zip(anchors, pred_anchor_deltas): - B = anchors_i.tensor.size(1) - pred_anchor_deltas_i = pred_anchor_deltas_i.reshape(-1, B) - # Expand anchors to shape (N*Hi*Wi*A, B) - anchors_i = anchors_i.tensor.unsqueeze(0).expand(N, -1, -1).reshape(-1, B) - proposals_i = self.box2box_transform.apply_deltas(pred_anchor_deltas_i, anchors_i) - # Append feature map proposals with shape (N, Hi*Wi*A, B) - proposals.append(proposals_i.view(N, -1, B)) - return proposals diff --git a/detectron2/detectron2/modeling/proposal_generator/rrpn.py b/detectron2/detectron2/modeling/proposal_generator/rrpn.py deleted file mode 100644 index 1a3cd282c2d1ede5c60a7c2c84846cbeed7808f0..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/proposal_generator/rrpn.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import itertools -import logging -from typing import Dict, List -import torch - -from detectron2.config import configurable -from detectron2.layers import ShapeSpec, batched_nms_rotated, cat -from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated -from detectron2.utils.memory import retry_if_cuda_oom - -from ..box_regression import Box2BoxTransformRotated -from .build import PROPOSAL_GENERATOR_REGISTRY -from .proposal_utils import _is_tracing -from .rpn import RPN - -logger = logging.getLogger(__name__) - - -def find_top_rrpn_proposals( - proposals, - pred_objectness_logits, - image_sizes, - nms_thresh, - pre_nms_topk, - post_nms_topk, - min_box_size, - training, -): - """ - For each feature map, select the `pre_nms_topk` highest scoring proposals, - apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` - highest scoring proposals among all the feature maps if `training` is True, - otherwise, returns the highest `post_nms_topk` scoring proposals for each - feature map. - - Args: - proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 5). - All proposal predictions on the feature maps. - pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). - image_sizes (list[tuple]): sizes (h, w) for each image - nms_thresh (float): IoU threshold to use for NMS - pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. - When RRPN is run on multiple feature maps (as in FPN) this number is per - feature map. - post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. - When RRPN is run on multiple feature maps (as in FPN) this number is total, - over all feature maps. - min_box_size(float): minimum proposal box side length in pixels (absolute units wrt - input images). - training (bool): True if proposals are to be used in training, otherwise False. - This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." - comment. - - Returns: - proposals (list[Instances]): list of N Instances. The i-th Instances - stores post_nms_topk object proposals for image i. - """ - num_images = len(image_sizes) - device = proposals[0].device - - # 1. Select top-k anchor for every level and every image - topk_scores = [] # #lvl Tensor, each of shape N x topk - topk_proposals = [] - level_ids = [] # #lvl Tensor, each of shape (topk,) - batch_idx = torch.arange(num_images, device=device) - for level_id, proposals_i, logits_i in zip( - itertools.count(), proposals, pred_objectness_logits - ): - Hi_Wi_A = logits_i.shape[1] - if isinstance(Hi_Wi_A, torch.Tensor): # it's a tensor in tracing - num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk) - else: - num_proposals_i = min(Hi_Wi_A, pre_nms_topk) - - topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) - - # each is N x topk - topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 5 - - topk_proposals.append(topk_proposals_i) - topk_scores.append(topk_scores_i) - level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device)) - - # 2. Concat all levels together - topk_scores = cat(topk_scores, dim=1) - topk_proposals = cat(topk_proposals, dim=1) - level_ids = cat(level_ids, dim=0) - - # 3. For each image, run a per-level NMS, and choose topk results. - results = [] - for n, image_size in enumerate(image_sizes): - boxes = RotatedBoxes(topk_proposals[n]) - scores_per_img = topk_scores[n] - lvl = level_ids - - valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) - if not valid_mask.all(): - if training: - raise FloatingPointError( - "Predicted boxes or scores contain Inf/NaN. Training has diverged." - ) - boxes = boxes[valid_mask] - scores_per_img = scores_per_img[valid_mask] - lvl = lvl[valid_mask] - boxes.clip(image_size) - - # filter empty boxes - keep = boxes.nonempty(threshold=min_box_size) - if _is_tracing() or keep.sum().item() != len(boxes): - boxes, scores_per_img, lvl = (boxes[keep], scores_per_img[keep], lvl[keep]) - - keep = batched_nms_rotated(boxes.tensor, scores_per_img, lvl, nms_thresh) - # In Detectron1, there was different behavior during training vs. testing. - # (https://github.com/facebookresearch/Detectron/issues/459) - # During training, topk is over the proposals from *all* images in the training batch. - # During testing, it is over the proposals for each image separately. - # As a result, the training behavior becomes batch-dependent, - # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. - # This bug is addressed in Detectron2 to make the behavior independent of batch size. - keep = keep[:post_nms_topk] - - res = Instances(image_size) - res.proposal_boxes = boxes[keep] - res.objectness_logits = scores_per_img[keep] - results.append(res) - return results - - -@PROPOSAL_GENERATOR_REGISTRY.register() -class RRPN(RPN): - """ - Rotated Region Proposal Network described in :paper:`RRPN`. - """ - - @configurable - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - if self.anchor_boundary_thresh >= 0: - raise NotImplementedError( - "anchor_boundary_thresh is a legacy option not implemented for RRPN." - ) - - @classmethod - def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): - ret = super().from_config(cfg, input_shape) - ret["box2box_transform"] = Box2BoxTransformRotated(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) - return ret - - @torch.no_grad() - def label_and_sample_anchors(self, anchors: List[RotatedBoxes], gt_instances: List[Instances]): - """ - Args: - anchors (list[RotatedBoxes]): anchors for each feature map. - gt_instances: the ground-truth instances for each image. - - Returns: - list[Tensor]: - List of #img tensors. i-th element is a vector of labels whose length is - the total number of anchors across feature maps. Label values are in {-1, 0, 1}, - with meanings: -1 = ignore; 0 = negative class; 1 = positive class. - list[Tensor]: - i-th element is a Nx5 tensor, where N is the total number of anchors across - feature maps. The values are the matched gt boxes for each anchor. - Values are undefined for those anchors not labeled as 1. - """ - anchors = RotatedBoxes.cat(anchors) - - gt_boxes = [x.gt_boxes for x in gt_instances] - del gt_instances - - gt_labels = [] - matched_gt_boxes = [] - for gt_boxes_i in gt_boxes: - """ - gt_boxes_i: ground-truth boxes for i-th image - """ - match_quality_matrix = retry_if_cuda_oom(pairwise_iou_rotated)(gt_boxes_i, anchors) - matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix) - # Matching is memory-expensive and may result in CPU tensors. But the result is small - gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) - - # A vector of labels (-1, 0, 1) for each anchor - gt_labels_i = self._subsample_labels(gt_labels_i) - - if len(gt_boxes_i) == 0: - # These values won't be used anyway since the anchor is labeled as background - matched_gt_boxes_i = torch.zeros_like(anchors.tensor) - else: - # TODO wasted indexing computation for ignored boxes - matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor - - gt_labels.append(gt_labels_i) # N,AHW - matched_gt_boxes.append(matched_gt_boxes_i) - return gt_labels, matched_gt_boxes - - @torch.no_grad() - def predict_proposals(self, anchors, pred_objectness_logits, pred_anchor_deltas, image_sizes): - pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) - return find_top_rrpn_proposals( - pred_proposals, - pred_objectness_logits, - image_sizes, - self.nms_thresh, - self.pre_nms_topk[self.training], - self.post_nms_topk[self.training], - self.min_box_size, - self.training, - ) diff --git a/detectron2/detectron2/modeling/roi_heads/__init__.py b/detectron2/detectron2/modeling/roi_heads/__init__.py deleted file mode 100644 index d13e9c57235b982f3e0645bc316de2b75755dfda..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/roi_heads/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head, FastRCNNConvFCHead -from .keypoint_head import ( - ROI_KEYPOINT_HEAD_REGISTRY, - build_keypoint_head, - BaseKeypointRCNNHead, - KRCNNConvDeconvUpsampleHead, -) -from .mask_head import ( - ROI_MASK_HEAD_REGISTRY, - build_mask_head, - BaseMaskRCNNHead, - MaskRCNNConvUpsampleHead, -) -from .roi_heads import ( - ROI_HEADS_REGISTRY, - ROIHeads, - Res5ROIHeads, - StandardROIHeads, - build_roi_heads, - select_foreground_proposals, -) -from .cascade_rcnn import CascadeROIHeads -from .rotated_fast_rcnn import RROIHeads -from .fast_rcnn import FastRCNNOutputLayers - -from . import cascade_rcnn # isort:skip - -__all__ = list(globals().keys()) diff --git a/detectron2/detectron2/modeling/roi_heads/box_head.py b/detectron2/detectron2/modeling/roi_heads/box_head.py deleted file mode 100644 index 5d0370b0400d9268f13c905e4096a84ce42e9bfd..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/roi_heads/box_head.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -from typing import List -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn - -from detectron2.config import configurable -from detectron2.layers import Conv2d, ShapeSpec, get_norm -from detectron2.utils.registry import Registry - -__all__ = ["FastRCNNConvFCHead", "build_box_head", "ROI_BOX_HEAD_REGISTRY"] - -ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD") -ROI_BOX_HEAD_REGISTRY.__doc__ = """ -Registry for box heads, which make box predictions from per-region features. - -The registered object will be called with `obj(cfg, input_shape)`. -""" - - -# To get torchscript support, we make the head a subclass of `nn.Sequential`. -# Therefore, to add new layers in this head class, please make sure they are -# added in the order they will be used in forward(). -@ROI_BOX_HEAD_REGISTRY.register() -class FastRCNNConvFCHead(nn.Sequential): - """ - A head with several 3x3 conv layers (each followed by norm & relu) and then - several fc layers (each followed by relu). - """ - - @configurable - def __init__( - self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm="" - ): - """ - NOTE: this interface is experimental. - - Args: - input_shape (ShapeSpec): shape of the input feature. - conv_dims (list[int]): the output dimensions of the conv layers - fc_dims (list[int]): the output dimensions of the fc layers - conv_norm (str or callable): normalization for the conv layers. - See :func:`detectron2.layers.get_norm` for supported types. - """ - super().__init__() - assert len(conv_dims) + len(fc_dims) > 0 - - self._output_size = (input_shape.channels, input_shape.height, input_shape.width) - - self.conv_norm_relus = [] - for k, conv_dim in enumerate(conv_dims): - conv = Conv2d( - self._output_size[0], - conv_dim, - kernel_size=3, - padding=1, - bias=not conv_norm, - norm=get_norm(conv_norm, conv_dim), - activation=nn.ReLU(), - ) - self.add_module("conv{}".format(k + 1), conv) - self.conv_norm_relus.append(conv) - self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) - - self.fcs = [] - for k, fc_dim in enumerate(fc_dims): - if k == 0: - self.add_module("flatten", nn.Flatten()) - fc = nn.Linear(int(np.prod(self._output_size)), fc_dim) - self.add_module("fc{}".format(k + 1), fc) - self.add_module("fc_relu{}".format(k + 1), nn.ReLU()) - self.fcs.append(fc) - self._output_size = fc_dim - - for layer in self.conv_norm_relus: - weight_init.c2_msra_fill(layer) - for layer in self.fcs: - weight_init.c2_xavier_fill(layer) - - @classmethod - def from_config(cls, cfg, input_shape): - num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV - conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM - num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC - fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM - return { - "input_shape": input_shape, - "conv_dims": [conv_dim] * num_conv, - "fc_dims": [fc_dim] * num_fc, - "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM, - } - - def forward(self, x): - for layer in self: - x = layer(x) - return x - - @property - @torch.jit.unused - def output_shape(self): - """ - Returns: - ShapeSpec: the output feature shape - """ - o = self._output_size - if isinstance(o, int): - return ShapeSpec(channels=o) - else: - return ShapeSpec(channels=o[0], height=o[1], width=o[2]) - - -def build_box_head(cfg, input_shape): - """ - Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`. - """ - name = cfg.MODEL.ROI_BOX_HEAD.NAME - return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape) diff --git a/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py b/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py deleted file mode 100644 index a0ca70fe23a1d406ee9bed6204a987d7e0708b91..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py +++ /dev/null @@ -1,299 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from typing import List -import torch -from torch import nn -from torch.autograd.function import Function - -from detectron2.config import configurable -from detectron2.layers import ShapeSpec -from detectron2.structures import Boxes, Instances, pairwise_iou -from detectron2.utils.events import get_event_storage - -from ..box_regression import Box2BoxTransform -from ..matcher import Matcher -from ..poolers import ROIPooler -from .box_head import build_box_head -from .fast_rcnn import FastRCNNOutputLayers, fast_rcnn_inference -from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads - - -class _ScaleGradient(Function): - @staticmethod - def forward(ctx, input, scale): - ctx.scale = scale - return input - - @staticmethod - def backward(ctx, grad_output): - return grad_output * ctx.scale, None - - -@ROI_HEADS_REGISTRY.register() -class CascadeROIHeads(StandardROIHeads): - """ - The ROI heads that implement :paper:`Cascade R-CNN`. - """ - - @configurable - def __init__( - self, - *, - box_in_features: List[str], - box_pooler: ROIPooler, - box_heads: List[nn.Module], - box_predictors: List[nn.Module], - proposal_matchers: List[Matcher], - **kwargs, - ): - """ - NOTE: this interface is experimental. - - Args: - box_pooler (ROIPooler): pooler that extracts region features from given boxes - box_heads (list[nn.Module]): box head for each cascade stage - box_predictors (list[nn.Module]): box predictor for each cascade stage - proposal_matchers (list[Matcher]): matcher with different IoU thresholds to - match boxes with ground truth for each stage. The first matcher matches - RPN proposals with ground truth, the other matchers use boxes predicted - by the previous stage as proposals and match them with ground truth. - """ - assert "proposal_matcher" not in kwargs, ( - "CascadeROIHeads takes 'proposal_matchers=' for each stage instead " - "of one 'proposal_matcher='." - ) - # The first matcher matches RPN proposals with ground truth, done in the base class - kwargs["proposal_matcher"] = proposal_matchers[0] - num_stages = self.num_cascade_stages = len(box_heads) - box_heads = nn.ModuleList(box_heads) - box_predictors = nn.ModuleList(box_predictors) - assert len(box_predictors) == num_stages, f"{len(box_predictors)} != {num_stages}!" - assert len(proposal_matchers) == num_stages, f"{len(proposal_matchers)} != {num_stages}!" - super().__init__( - box_in_features=box_in_features, - box_pooler=box_pooler, - box_head=box_heads, - box_predictor=box_predictors, - **kwargs, - ) - self.proposal_matchers = proposal_matchers - - @classmethod - def from_config(cls, cfg, input_shape): - ret = super().from_config(cfg, input_shape) - ret.pop("proposal_matcher") - return ret - - @classmethod - def _init_box_head(cls, cfg, input_shape): - # fmt: off - in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION - pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) - sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO - pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE - cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS - cascade_ious = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS - assert len(cascade_bbox_reg_weights) == len(cascade_ious) - assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, \ - "CascadeROIHeads only support class-agnostic regression now!" - assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0] - # fmt: on - - in_channels = [input_shape[f].channels for f in in_features] - # Check all channel counts are equal - assert len(set(in_channels)) == 1, in_channels - in_channels = in_channels[0] - - box_pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - pooled_shape = ShapeSpec( - channels=in_channels, width=pooler_resolution, height=pooler_resolution - ) - - box_heads, box_predictors, proposal_matchers = [], [], [] - for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights): - box_head = build_box_head(cfg, pooled_shape) - box_heads.append(box_head) - box_predictors.append( - FastRCNNOutputLayers( - cfg, - box_head.output_shape, - box2box_transform=Box2BoxTransform(weights=bbox_reg_weights), - ) - ) - proposal_matchers.append(Matcher([match_iou], [0, 1], allow_low_quality_matches=False)) - return { - "box_in_features": in_features, - "box_pooler": box_pooler, - "box_heads": box_heads, - "box_predictors": box_predictors, - "proposal_matchers": proposal_matchers, - } - - def forward(self, images, features, proposals, targets=None): - del images - if self.training: - proposals = self.label_and_sample_proposals(proposals, targets) - - if self.training: - # Need targets to box head - losses = self._forward_box(features, proposals, targets) - losses.update(self._forward_mask(features, proposals)) - losses.update(self._forward_keypoint(features, proposals)) - return proposals, losses - else: - pred_instances = self._forward_box(features, proposals) - pred_instances = self.forward_with_given_boxes(features, pred_instances) - return pred_instances, {} - - def _forward_box(self, features, proposals, targets=None): - """ - Args: - features, targets: the same as in - Same as in :meth:`ROIHeads.forward`. - proposals (list[Instances]): the per-image object proposals with - their matching ground truth. - Each has fields "proposal_boxes", and "objectness_logits", - "gt_classes", "gt_boxes". - """ - features = [features[f] for f in self.box_in_features] - head_outputs = [] # (predictor, predictions, proposals) - prev_pred_boxes = None - image_sizes = [x.image_size for x in proposals] - for k in range(self.num_cascade_stages): - if k > 0: - # The output boxes of the previous stage are used to create the input - # proposals of the next stage. - proposals = self._create_proposals_from_boxes(prev_pred_boxes, image_sizes) - if self.training: - proposals = self._match_and_label_boxes(proposals, k, targets) - predictions = self._run_stage(features, proposals, k) - prev_pred_boxes = self.box_predictor[k].predict_boxes(predictions, proposals) - head_outputs.append((self.box_predictor[k], predictions, proposals)) - - if self.training: - losses = {} - storage = get_event_storage() - for stage, (predictor, predictions, proposals) in enumerate(head_outputs): - with storage.name_scope("stage{}".format(stage)): - stage_losses = predictor.losses(predictions, proposals) - losses.update({k + "_stage{}".format(stage): v for k, v in stage_losses.items()}) - return losses - else: - # Each is a list[Tensor] of length #image. Each tensor is Ri x (K+1) - scores_per_stage = [h[0].predict_probs(h[1], h[2]) for h in head_outputs] - - # Average the scores across heads - scores = [ - sum(list(scores_per_image)) * (1.0 / self.num_cascade_stages) - for scores_per_image in zip(*scores_per_stage) - ] - # Use the boxes of the last head - predictor, predictions, proposals = head_outputs[-1] - boxes = predictor.predict_boxes(predictions, proposals) - pred_instances, _ = fast_rcnn_inference( - boxes, - scores, - image_sizes, - predictor.test_score_thresh, - predictor.test_nms_thresh, - predictor.test_topk_per_image, - ) - return pred_instances - - @torch.no_grad() - def _match_and_label_boxes(self, proposals, stage, targets): - """ - Match proposals with groundtruth using the matcher at the given stage. - Label the proposals as foreground or background based on the match. - - Args: - proposals (list[Instances]): One Instances for each image, with - the field "proposal_boxes". - stage (int): the current stage - targets (list[Instances]): the ground truth instances - - Returns: - list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes" - """ - num_fg_samples, num_bg_samples = [], [] - for proposals_per_image, targets_per_image in zip(proposals, targets): - match_quality_matrix = pairwise_iou( - targets_per_image.gt_boxes, proposals_per_image.proposal_boxes - ) - # proposal_labels are 0 or 1 - matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix) - if len(targets_per_image) > 0: - gt_classes = targets_per_image.gt_classes[matched_idxs] - # Label unmatched proposals (0 label from matcher) as background (label=num_classes) - gt_classes[proposal_labels == 0] = self.num_classes - gt_boxes = targets_per_image.gt_boxes[matched_idxs] - else: - gt_classes = torch.zeros_like(matched_idxs) + self.num_classes - gt_boxes = Boxes( - targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4)) - ) - proposals_per_image.gt_classes = gt_classes - proposals_per_image.gt_boxes = gt_boxes - - num_fg_samples.append((proposal_labels == 1).sum().item()) - num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1]) - - # Log the number of fg/bg samples in each stage - storage = get_event_storage() - storage.put_scalar( - "stage{}/roi_head/num_fg_samples".format(stage), - sum(num_fg_samples) / len(num_fg_samples), - ) - storage.put_scalar( - "stage{}/roi_head/num_bg_samples".format(stage), - sum(num_bg_samples) / len(num_bg_samples), - ) - return proposals - - def _run_stage(self, features, proposals, stage): - """ - Args: - features (list[Tensor]): #lvl input features to ROIHeads - proposals (list[Instances]): #image Instances, with the field "proposal_boxes" - stage (int): the current stage - - Returns: - Same output as `FastRCNNOutputLayers.forward()`. - """ - box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) - # The original implementation averages the losses among heads, - # but scale up the parameter gradients of the heads. - # This is equivalent to adding the losses among heads, - # but scale down the gradients on features. - if self.training: - box_features = _ScaleGradient.apply(box_features, 1.0 / self.num_cascade_stages) - box_features = self.box_head[stage](box_features) - return self.box_predictor[stage](box_features) - - def _create_proposals_from_boxes(self, boxes, image_sizes): - """ - Args: - boxes (list[Tensor]): per-image predicted boxes, each of shape Ri x 4 - image_sizes (list[tuple]): list of image shapes in (h, w) - - Returns: - list[Instances]: per-image proposals with the given boxes. - """ - # Just like RPN, the proposals should not have gradients - boxes = [Boxes(b.detach()) for b in boxes] - proposals = [] - for boxes_per_image, image_size in zip(boxes, image_sizes): - boxes_per_image.clip(image_size) - if self.training: - # do not filter empty boxes at inference time, - # because the scores from each stage need to be aligned and added later - boxes_per_image = boxes_per_image[boxes_per_image.nonempty()] - prop = Instances(image_size) - prop.proposal_boxes = boxes_per_image - proposals.append(prop) - return proposals diff --git a/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py b/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py deleted file mode 100644 index 039e2490fae27d6e837b57492a230bc556da845f..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py +++ /dev/null @@ -1,569 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -from typing import Callable, Dict, List, Optional, Tuple, Union -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.data.detection_utils import get_fed_loss_cls_weights -from detectron2.layers import ShapeSpec, batched_nms, cat, cross_entropy, nonzero_tuple -from detectron2.modeling.box_regression import Box2BoxTransform, _dense_box_regression_loss -from detectron2.structures import Boxes, Instances -from detectron2.utils.events import get_event_storage - -__all__ = ["fast_rcnn_inference", "FastRCNNOutputLayers"] - - -logger = logging.getLogger(__name__) - -""" -Shape shorthand in this module: - - N: number of images in the minibatch - R: number of ROIs, combined over all images, in the minibatch - Ri: number of ROIs in image i - K: number of foreground classes. E.g.,there are 80 foreground classes in COCO. - -Naming convention: - - deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box - transform (see :class:`box_regression.Box2BoxTransform`). - - pred_class_logits: predicted class scores in [-inf, +inf]; use - softmax(pred_class_logits) to estimate P(class). - - gt_classes: ground-truth classification labels in [0, K], where [0, K) represent - foreground object classes and K represents the background class. - - pred_proposal_deltas: predicted box2box transform deltas for transforming proposals - to detection box predictions. - - gt_proposal_deltas: ground-truth box2box transform deltas -""" - - -def fast_rcnn_inference( - boxes: List[torch.Tensor], - scores: List[torch.Tensor], - image_shapes: List[Tuple[int, int]], - score_thresh: float, - nms_thresh: float, - topk_per_image: int, -): - """ - Call `fast_rcnn_inference_single_image` for all images. - - Args: - boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic - boxes for each image. Element i has shape (Ri, K * 4) if doing - class-specific regression, or (Ri, 4) if doing class-agnostic - regression, where Ri is the number of predicted objects for image i. - This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`. - scores (list[Tensor]): A list of Tensors of predicted class scores for each image. - Element i has shape (Ri, K + 1), where Ri is the number of predicted objects - for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`. - image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch. - score_thresh (float): Only return detections with a confidence score exceeding this - threshold. - nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. - topk_per_image (int): The number of top scoring detections to return. Set < 0 to return - all detections. - - Returns: - instances: (list[Instances]): A list of N instances, one for each image in the batch, - that stores the topk most confidence detections. - kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates - the corresponding boxes/scores index in [0, Ri) from the input, for image i. - """ - result_per_image = [ - fast_rcnn_inference_single_image( - boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image - ) - for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes) - ] - return [x[0] for x in result_per_image], [x[1] for x in result_per_image] - - -def _log_classification_stats(pred_logits, gt_classes, prefix="fast_rcnn"): - """ - Log the classification metrics to EventStorage. - - Args: - pred_logits: Rx(K+1) logits. The last column is for background class. - gt_classes: R labels - """ - num_instances = gt_classes.numel() - if num_instances == 0: - return - pred_classes = pred_logits.argmax(dim=1) - bg_class_ind = pred_logits.shape[1] - 1 - - fg_inds = (gt_classes >= 0) & (gt_classes < bg_class_ind) - num_fg = fg_inds.nonzero().numel() - fg_gt_classes = gt_classes[fg_inds] - fg_pred_classes = pred_classes[fg_inds] - - num_false_negative = (fg_pred_classes == bg_class_ind).nonzero().numel() - num_accurate = (pred_classes == gt_classes).nonzero().numel() - fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel() - - storage = get_event_storage() - storage.put_scalar(f"{prefix}/cls_accuracy", num_accurate / num_instances) - if num_fg > 0: - storage.put_scalar(f"{prefix}/fg_cls_accuracy", fg_num_accurate / num_fg) - storage.put_scalar(f"{prefix}/false_negative", num_false_negative / num_fg) - - -def fast_rcnn_inference_single_image( - boxes, - scores, - image_shape: Tuple[int, int], - score_thresh: float, - nms_thresh: float, - topk_per_image: int, -): - """ - Single-image inference. Return bounding-box detection results by thresholding - on scores and applying non-maximum suppression (NMS). - - Args: - Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes - per image. - - Returns: - Same as `fast_rcnn_inference`, but for only one image. - """ - valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) - if not valid_mask.all(): - boxes = boxes[valid_mask] - scores = scores[valid_mask] - - scores = scores[:, :-1] - num_bbox_reg_classes = boxes.shape[1] // 4 - # Convert to Boxes to use the `clip` function ... - boxes = Boxes(boxes.reshape(-1, 4)) - boxes.clip(image_shape) - boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 - - # 1. Filter results based on detection scores. It can make NMS more efficient - # by filtering out low-confidence detections. - filter_mask = scores > score_thresh # R x K - # R' x 2. First column contains indices of the R predictions; - # Second column contains indices of classes. - filter_inds = filter_mask.nonzero() - if num_bbox_reg_classes == 1: - boxes = boxes[filter_inds[:, 0], 0] - else: - boxes = boxes[filter_mask] - scores = scores[filter_mask] - - # 2. Apply NMS for each class independently. - keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) - if topk_per_image >= 0: - keep = keep[:topk_per_image] - boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] - - result = Instances(image_shape) - result.pred_boxes = Boxes(boxes) - result.scores = scores - result.pred_classes = filter_inds[:, 1] - return result, filter_inds[:, 0] - - -class FastRCNNOutputLayers(nn.Module): - """ - Two linear layers for predicting Fast R-CNN outputs: - - 1. proposal-to-detection box regression deltas - 2. classification scores - """ - - @configurable - def __init__( - self, - input_shape: ShapeSpec, - *, - box2box_transform, - num_classes: int, - test_score_thresh: float = 0.0, - test_nms_thresh: float = 0.5, - test_topk_per_image: int = 100, - cls_agnostic_bbox_reg: bool = False, - smooth_l1_beta: float = 0.0, - box_reg_loss_type: str = "smooth_l1", - loss_weight: Union[float, Dict[str, float]] = 1.0, - use_fed_loss: bool = False, - use_sigmoid_ce: bool = False, - get_fed_loss_cls_weights: Optional[Callable] = None, - fed_loss_num_classes: int = 50, - ): - """ - NOTE: this interface is experimental. - - Args: - input_shape (ShapeSpec): shape of the input feature to this module - box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): - num_classes (int): number of foreground classes - test_score_thresh (float): threshold to filter predictions results. - test_nms_thresh (float): NMS threshold for prediction results. - test_topk_per_image (int): number of top predictions to produce per image. - cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression - smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if - `box_reg_loss_type` is "smooth_l1" - box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou", - "diou", "ciou" - loss_weight (float|dict): weights to use for losses. Can be single float for weighting - all losses, or a dict of individual weightings. Valid dict keys are: - * "loss_cls": applied to classification loss - * "loss_box_reg": applied to box regression loss - use_fed_loss (bool): whether to use federated loss which samples additional negative - classes to calculate the loss - use_sigmoid_ce (bool): whether to calculate the loss using weighted average of binary - cross entropy with logits. This could be used together with federated loss - get_fed_loss_cls_weights (Callable): a callable which takes dataset name and frequency - weight power, and returns the probabilities to sample negative classes for - federated loss. The implementation can be found in - detectron2/data/detection_utils.py - fed_loss_num_classes (int): number of federated classes to keep in total - """ - super().__init__() - if isinstance(input_shape, int): # some backward compatibility - input_shape = ShapeSpec(channels=input_shape) - self.num_classes = num_classes - input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) - # prediction layer for num_classes foreground classes and one background class (hence + 1) - self.cls_score = nn.Linear(input_size, num_classes + 1) - num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes - box_dim = len(box2box_transform.weights) - self.bbox_pred = nn.Linear(input_size, num_bbox_reg_classes * box_dim) - - nn.init.normal_(self.cls_score.weight, std=0.01) - nn.init.normal_(self.bbox_pred.weight, std=0.001) - for l in [self.cls_score, self.bbox_pred]: - nn.init.constant_(l.bias, 0) - - self.box2box_transform = box2box_transform - self.smooth_l1_beta = smooth_l1_beta - self.test_score_thresh = test_score_thresh - self.test_nms_thresh = test_nms_thresh - self.test_topk_per_image = test_topk_per_image - self.box_reg_loss_type = box_reg_loss_type - if isinstance(loss_weight, float): - loss_weight = {"loss_cls": loss_weight, "loss_box_reg": loss_weight} - self.loss_weight = loss_weight - self.use_fed_loss = use_fed_loss - self.use_sigmoid_ce = use_sigmoid_ce - self.fed_loss_num_classes = fed_loss_num_classes - - if self.use_fed_loss: - assert self.use_sigmoid_ce, "Please use sigmoid cross entropy loss with federated loss" - fed_loss_cls_weights = get_fed_loss_cls_weights() - assert ( - len(fed_loss_cls_weights) == self.num_classes - ), "Please check the provided fed_loss_cls_weights. Their size should match num_classes" - self.register_buffer("fed_loss_cls_weights", fed_loss_cls_weights) - - @classmethod - def from_config(cls, cfg, input_shape): - return { - "input_shape": input_shape, - "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS), - # fmt: off - "num_classes" : cfg.MODEL.ROI_HEADS.NUM_CLASSES, - "cls_agnostic_bbox_reg" : cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, - "smooth_l1_beta" : cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA, - "test_score_thresh" : cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST, - "test_nms_thresh" : cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST, - "test_topk_per_image" : cfg.TEST.DETECTIONS_PER_IMAGE, - "box_reg_loss_type" : cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE, - "loss_weight" : {"loss_box_reg": cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT}, # noqa - "use_fed_loss" : cfg.MODEL.ROI_BOX_HEAD.USE_FED_LOSS, - "use_sigmoid_ce" : cfg.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE, - "get_fed_loss_cls_weights" : lambda: get_fed_loss_cls_weights(dataset_names=cfg.DATASETS.TRAIN, freq_weight_power=cfg.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER), # noqa - "fed_loss_num_classes" : cfg.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES, - # fmt: on - } - - def forward(self, x): - """ - Args: - x: per-region features of shape (N, ...) for N bounding boxes to predict. - - Returns: - (Tensor, Tensor): - First tensor: shape (N,K+1), scores for each of the N box. Each row contains the - scores for K object categories and 1 background class. - - Second tensor: bounding box regression deltas for each box. Shape is shape (N,Kx4), - or (N,4) for class-agnostic regression. - """ - if x.dim() > 2: - x = torch.flatten(x, start_dim=1) - scores = self.cls_score(x) - proposal_deltas = self.bbox_pred(x) - return scores, proposal_deltas - - def losses(self, predictions, proposals): - """ - Args: - predictions: return values of :meth:`forward()`. - proposals (list[Instances]): proposals that match the features that were used - to compute predictions. The fields ``proposal_boxes``, ``gt_boxes``, - ``gt_classes`` are expected. - - Returns: - Dict[str, Tensor]: dict of losses - """ - scores, proposal_deltas = predictions - - # parse classification outputs - gt_classes = ( - cat([p.gt_classes for p in proposals], dim=0) if len(proposals) else torch.empty(0) - ) - _log_classification_stats(scores, gt_classes) - - # parse box regression outputs - if len(proposals): - proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0) # Nx4 - assert not proposal_boxes.requires_grad, "Proposals should not require gradients!" - # If "gt_boxes" does not exist, the proposals must be all negative and - # should not be included in regression loss computation. - # Here we just use proposal_boxes as an arbitrary placeholder because its - # value won't be used in self.box_reg_loss(). - gt_boxes = cat( - [(p.gt_boxes if p.has("gt_boxes") else p.proposal_boxes).tensor for p in proposals], - dim=0, - ) - else: - proposal_boxes = gt_boxes = torch.empty((0, 4), device=proposal_deltas.device) - - if self.use_sigmoid_ce: - loss_cls = self.sigmoid_cross_entropy_loss(scores, gt_classes) - else: - loss_cls = cross_entropy(scores, gt_classes, reduction="mean") - - losses = { - "loss_cls": loss_cls, - "loss_box_reg": self.box_reg_loss( - proposal_boxes, gt_boxes, proposal_deltas, gt_classes - ), - } - return {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()} - - # Implementation from https://github.com/xingyizhou/CenterNet2/blob/master/projects/CenterNet2/centernet/modeling/roi_heads/fed_loss.py # noqa - # with slight modifications - def get_fed_loss_classes(self, gt_classes, num_fed_loss_classes, num_classes, weight): - """ - Args: - gt_classes: a long tensor of shape R that contains the gt class label of each proposal. - num_fed_loss_classes: minimum number of classes to keep when calculating federated loss. - Will sample negative classes if number of unique gt_classes is smaller than this value. - num_classes: number of foreground classes - weight: probabilities used to sample negative classes - - Returns: - Tensor: - classes to keep when calculating the federated loss, including both unique gt - classes and sampled negative classes. - """ - unique_gt_classes = torch.unique(gt_classes) - prob = unique_gt_classes.new_ones(num_classes + 1).float() - prob[-1] = 0 - if len(unique_gt_classes) < num_fed_loss_classes: - prob[:num_classes] = weight.float().clone() - prob[unique_gt_classes] = 0 - sampled_negative_classes = torch.multinomial( - prob, num_fed_loss_classes - len(unique_gt_classes), replacement=False - ) - fed_loss_classes = torch.cat([unique_gt_classes, sampled_negative_classes]) - else: - fed_loss_classes = unique_gt_classes - return fed_loss_classes - - # Implementation from https://github.com/xingyizhou/CenterNet2/blob/master/projects/CenterNet2/centernet/modeling/roi_heads/custom_fast_rcnn.py#L113 # noqa - # with slight modifications - def sigmoid_cross_entropy_loss(self, pred_class_logits, gt_classes): - """ - Args: - pred_class_logits: shape (N, K+1), scores for each of the N box. Each row contains the - scores for K object categories and 1 background class - gt_classes: a long tensor of shape R that contains the gt class label of each proposal. - """ - if pred_class_logits.numel() == 0: - return pred_class_logits.new_zeros([1])[0] - - N = pred_class_logits.shape[0] - K = pred_class_logits.shape[1] - 1 - - target = pred_class_logits.new_zeros(N, K + 1) - target[range(len(gt_classes)), gt_classes] = 1 - target = target[:, :K] - - cls_loss = F.binary_cross_entropy_with_logits( - pred_class_logits[:, :-1], target, reduction="none" - ) - - if self.use_fed_loss: - fed_loss_classes = self.get_fed_loss_classes( - gt_classes, - num_fed_loss_classes=self.fed_loss_num_classes, - num_classes=K, - weight=self.fed_loss_cls_weights, - ) - fed_loss_classes_mask = fed_loss_classes.new_zeros(K + 1) - fed_loss_classes_mask[fed_loss_classes] = 1 - fed_loss_classes_mask = fed_loss_classes_mask[:K] - weight = fed_loss_classes_mask.view(1, K).expand(N, K).float() - else: - weight = 1 - - loss = torch.sum(cls_loss * weight) / N - return loss - - def box_reg_loss(self, proposal_boxes, gt_boxes, pred_deltas, gt_classes): - """ - Args: - proposal_boxes/gt_boxes are tensors with the same shape (R, 4 or 5). - pred_deltas has shape (R, 4 or 5), or (R, num_classes * (4 or 5)). - gt_classes is a long tensor of shape R, the gt class label of each proposal. - R shall be the number of proposals. - """ - box_dim = proposal_boxes.shape[1] # 4 or 5 - # Regression loss is only computed for foreground proposals (those matched to a GT) - fg_inds = nonzero_tuple((gt_classes >= 0) & (gt_classes < self.num_classes))[0] - if pred_deltas.shape[1] == box_dim: # cls-agnostic regression - fg_pred_deltas = pred_deltas[fg_inds] - else: - fg_pred_deltas = pred_deltas.view(-1, self.num_classes, box_dim)[ - fg_inds, gt_classes[fg_inds] - ] - - loss_box_reg = _dense_box_regression_loss( - [proposal_boxes[fg_inds]], - self.box2box_transform, - [fg_pred_deltas.unsqueeze(0)], - [gt_boxes[fg_inds]], - ..., - self.box_reg_loss_type, - self.smooth_l1_beta, - ) - - # The reg loss is normalized using the total number of regions (R), not the number - # of foreground regions even though the box regression loss is only defined on - # foreground regions. Why? Because doing so gives equal training influence to - # each foreground example. To see how, consider two different minibatches: - # (1) Contains a single foreground region - # (2) Contains 100 foreground regions - # If we normalize by the number of foreground regions, the single example in - # minibatch (1) will be given 100 times as much influence as each foreground - # example in minibatch (2). Normalizing by the total number of regions, R, - # means that the single example in minibatch (1) and each of the 100 examples - # in minibatch (2) are given equal influence. - return loss_box_reg / max(gt_classes.numel(), 1.0) # return 0 if empty - - def inference(self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]): - """ - Args: - predictions: return values of :meth:`forward()`. - proposals (list[Instances]): proposals that match the features that were - used to compute predictions. The ``proposal_boxes`` field is expected. - - Returns: - list[Instances]: same as `fast_rcnn_inference`. - list[Tensor]: same as `fast_rcnn_inference`. - """ - boxes = self.predict_boxes(predictions, proposals) - scores = self.predict_probs(predictions, proposals) - image_shapes = [x.image_size for x in proposals] - return fast_rcnn_inference( - boxes, - scores, - image_shapes, - self.test_score_thresh, - self.test_nms_thresh, - self.test_topk_per_image, - ) - - def predict_boxes_for_gt_classes(self, predictions, proposals): - """ - Args: - predictions: return values of :meth:`forward()`. - proposals (list[Instances]): proposals that match the features that were used - to compute predictions. The fields ``proposal_boxes``, ``gt_classes`` are expected. - - Returns: - list[Tensor]: - A list of Tensors of predicted boxes for GT classes in case of - class-specific box head. Element i of the list has shape (Ri, B), where Ri is - the number of proposals for image i and B is the box dimension (4 or 5) - """ - if not len(proposals): - return [] - scores, proposal_deltas = predictions - proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0) - N, B = proposal_boxes.shape - predict_boxes = self.box2box_transform.apply_deltas( - proposal_deltas, proposal_boxes - ) # Nx(KxB) - - K = predict_boxes.shape[1] // B - if K > 1: - gt_classes = torch.cat([p.gt_classes for p in proposals], dim=0) - # Some proposals are ignored or have a background class. Their gt_classes - # cannot be used as index. - gt_classes = gt_classes.clamp_(0, K - 1) - - predict_boxes = predict_boxes.view(N, K, B)[ - torch.arange(N, dtype=torch.long, device=predict_boxes.device), gt_classes - ] - num_prop_per_image = [len(p) for p in proposals] - return predict_boxes.split(num_prop_per_image) - - def predict_boxes( - self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances] - ): - """ - Args: - predictions: return values of :meth:`forward()`. - proposals (list[Instances]): proposals that match the features that were - used to compute predictions. The ``proposal_boxes`` field is expected. - - Returns: - list[Tensor]: - A list of Tensors of predicted class-specific or class-agnostic boxes - for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is - the number of proposals for image i and B is the box dimension (4 or 5) - """ - if not len(proposals): - return [] - _, proposal_deltas = predictions - num_prop_per_image = [len(p) for p in proposals] - proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0) - predict_boxes = self.box2box_transform.apply_deltas( - proposal_deltas, - proposal_boxes, - ) # Nx(KxB) - return predict_boxes.split(num_prop_per_image) - - def predict_probs( - self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances] - ): - """ - Args: - predictions: return values of :meth:`forward()`. - proposals (list[Instances]): proposals that match the features that were - used to compute predictions. - - Returns: - list[Tensor]: - A list of Tensors of predicted class probabilities for each image. - Element i has shape (Ri, K + 1), where Ri is the number of proposals for image i. - """ - scores, _ = predictions - num_inst_per_image = [len(p) for p in proposals] - if self.use_sigmoid_ce: - probs = scores.sigmoid() - else: - probs = F.softmax(scores, dim=-1) - return probs.split(num_inst_per_image, dim=0) diff --git a/detectron2/detectron2/modeling/roi_heads/keypoint_head.py b/detectron2/detectron2/modeling/roi_heads/keypoint_head.py deleted file mode 100644 index e0acc138e72fcb188e4ffb3d156358b8ca59babf..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/roi_heads/keypoint_head.py +++ /dev/null @@ -1,272 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from typing import List -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.layers import Conv2d, ConvTranspose2d, cat, interpolate -from detectron2.structures import Instances, heatmaps_to_keypoints -from detectron2.utils.events import get_event_storage -from detectron2.utils.registry import Registry - -_TOTAL_SKIPPED = 0 - - -__all__ = [ - "ROI_KEYPOINT_HEAD_REGISTRY", - "build_keypoint_head", - "BaseKeypointRCNNHead", - "KRCNNConvDeconvUpsampleHead", -] - - -ROI_KEYPOINT_HEAD_REGISTRY = Registry("ROI_KEYPOINT_HEAD") -ROI_KEYPOINT_HEAD_REGISTRY.__doc__ = """ -Registry for keypoint heads, which make keypoint predictions from per-region features. - -The registered object will be called with `obj(cfg, input_shape)`. -""" - - -def build_keypoint_head(cfg, input_shape): - """ - Build a keypoint head from `cfg.MODEL.ROI_KEYPOINT_HEAD.NAME`. - """ - name = cfg.MODEL.ROI_KEYPOINT_HEAD.NAME - return ROI_KEYPOINT_HEAD_REGISTRY.get(name)(cfg, input_shape) - - -def keypoint_rcnn_loss(pred_keypoint_logits, instances, normalizer): - """ - Arguments: - pred_keypoint_logits (Tensor): A tensor of shape (N, K, S, S) where N is the total number - of instances in the batch, K is the number of keypoints, and S is the side length - of the keypoint heatmap. The values are spatial logits. - instances (list[Instances]): A list of M Instances, where M is the batch size. - These instances are predictions from the model - that are in 1:1 correspondence with pred_keypoint_logits. - Each Instances should contain a `gt_keypoints` field containing a `structures.Keypoint` - instance. - normalizer (float): Normalize the loss by this amount. - If not specified, we normalize by the number of visible keypoints in the minibatch. - - Returns a scalar tensor containing the loss. - """ - heatmaps = [] - valid = [] - - keypoint_side_len = pred_keypoint_logits.shape[2] - for instances_per_image in instances: - if len(instances_per_image) == 0: - continue - keypoints = instances_per_image.gt_keypoints - heatmaps_per_image, valid_per_image = keypoints.to_heatmap( - instances_per_image.proposal_boxes.tensor, keypoint_side_len - ) - heatmaps.append(heatmaps_per_image.view(-1)) - valid.append(valid_per_image.view(-1)) - - if len(heatmaps): - keypoint_targets = cat(heatmaps, dim=0) - valid = cat(valid, dim=0).to(dtype=torch.uint8) - valid = torch.nonzero(valid).squeeze(1) - - # torch.mean (in binary_cross_entropy_with_logits) doesn't - # accept empty tensors, so handle it separately - if len(heatmaps) == 0 or valid.numel() == 0: - global _TOTAL_SKIPPED - _TOTAL_SKIPPED += 1 - storage = get_event_storage() - storage.put_scalar("kpts_num_skipped_batches", _TOTAL_SKIPPED, smoothing_hint=False) - return pred_keypoint_logits.sum() * 0 - - N, K, H, W = pred_keypoint_logits.shape - pred_keypoint_logits = pred_keypoint_logits.view(N * K, H * W) - - keypoint_loss = F.cross_entropy( - pred_keypoint_logits[valid], keypoint_targets[valid], reduction="sum" - ) - - # If a normalizer isn't specified, normalize by the number of visible keypoints in the minibatch - if normalizer is None: - normalizer = valid.numel() - keypoint_loss /= normalizer - - return keypoint_loss - - -def keypoint_rcnn_inference(pred_keypoint_logits: torch.Tensor, pred_instances: List[Instances]): - """ - Post process each predicted keypoint heatmap in `pred_keypoint_logits` into (x, y, score) - and add it to the `pred_instances` as a `pred_keypoints` field. - - Args: - pred_keypoint_logits (Tensor): A tensor of shape (R, K, S, S) where R is the total number - of instances in the batch, K is the number of keypoints, and S is the side length of - the keypoint heatmap. The values are spatial logits. - pred_instances (list[Instances]): A list of N Instances, where N is the number of images. - - Returns: - None. Each element in pred_instances will contain extra "pred_keypoints" and - "pred_keypoint_heatmaps" fields. "pred_keypoints" is a tensor of shape - (#instance, K, 3) where the last dimension corresponds to (x, y, score). - The scores are larger than 0. "pred_keypoint_heatmaps" contains the raw - keypoint logits as passed to this function. - """ - # flatten all bboxes from all images together (list[Boxes] -> Rx4 tensor) - bboxes_flat = cat([b.pred_boxes.tensor for b in pred_instances], dim=0) - - pred_keypoint_logits = pred_keypoint_logits.detach() - keypoint_results = heatmaps_to_keypoints(pred_keypoint_logits, bboxes_flat.detach()) - num_instances_per_image = [len(i) for i in pred_instances] - keypoint_results = keypoint_results[:, :, [0, 1, 3]].split(num_instances_per_image, dim=0) - heatmap_results = pred_keypoint_logits.split(num_instances_per_image, dim=0) - - for keypoint_results_per_image, heatmap_results_per_image, instances_per_image in zip( - keypoint_results, heatmap_results, pred_instances - ): - # keypoint_results_per_image is (num instances)x(num keypoints)x(x, y, score) - # heatmap_results_per_image is (num instances)x(num keypoints)x(side)x(side) - instances_per_image.pred_keypoints = keypoint_results_per_image - instances_per_image.pred_keypoint_heatmaps = heatmap_results_per_image - - -class BaseKeypointRCNNHead(nn.Module): - """ - Implement the basic Keypoint R-CNN losses and inference logic described in - Sec. 5 of :paper:`Mask R-CNN`. - """ - - @configurable - def __init__(self, *, num_keypoints, loss_weight=1.0, loss_normalizer=1.0): - """ - NOTE: this interface is experimental. - - Args: - num_keypoints (int): number of keypoints to predict - loss_weight (float): weight to multiple on the keypoint loss - loss_normalizer (float or str): - If float, divide the loss by `loss_normalizer * #images`. - If 'visible', the loss is normalized by the total number of - visible keypoints across images. - """ - super().__init__() - self.num_keypoints = num_keypoints - self.loss_weight = loss_weight - assert loss_normalizer == "visible" or isinstance(loss_normalizer, float), loss_normalizer - self.loss_normalizer = loss_normalizer - - @classmethod - def from_config(cls, cfg, input_shape): - ret = { - "loss_weight": cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT, - "num_keypoints": cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS, - } - normalize_by_visible = ( - cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS - ) # noqa - if not normalize_by_visible: - batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE - positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION - ret["loss_normalizer"] = ( - ret["num_keypoints"] * batch_size_per_image * positive_sample_fraction - ) - else: - ret["loss_normalizer"] = "visible" - return ret - - def forward(self, x, instances: List[Instances]): - """ - Args: - x: input 4D region feature(s) provided by :class:`ROIHeads`. - instances (list[Instances]): contains the boxes & labels corresponding - to the input features. - Exact format is up to its caller to decide. - Typically, this is the foreground instances in training, with - "proposal_boxes" field and other gt annotations. - In inference, it contains boxes that are already predicted. - - Returns: - A dict of losses if in training. The predicted "instances" if in inference. - """ - x = self.layers(x) - if self.training: - num_images = len(instances) - normalizer = ( - None if self.loss_normalizer == "visible" else num_images * self.loss_normalizer - ) - return { - "loss_keypoint": keypoint_rcnn_loss(x, instances, normalizer=normalizer) - * self.loss_weight - } - else: - keypoint_rcnn_inference(x, instances) - return instances - - def layers(self, x): - """ - Neural network layers that makes predictions from regional input features. - """ - raise NotImplementedError - - -# To get torchscript support, we make the head a subclass of `nn.Sequential`. -# Therefore, to add new layers in this head class, please make sure they are -# added in the order they will be used in forward(). -@ROI_KEYPOINT_HEAD_REGISTRY.register() -class KRCNNConvDeconvUpsampleHead(BaseKeypointRCNNHead, nn.Sequential): - """ - A standard keypoint head containing a series of 3x3 convs, followed by - a transpose convolution and bilinear interpolation for upsampling. - It is described in Sec. 5 of :paper:`Mask R-CNN`. - """ - - @configurable - def __init__(self, input_shape, *, num_keypoints, conv_dims, **kwargs): - """ - NOTE: this interface is experimental. - - Args: - input_shape (ShapeSpec): shape of the input feature - conv_dims: an iterable of output channel counts for each conv in the head - e.g. (512, 512, 512) for three convs outputting 512 channels. - """ - super().__init__(num_keypoints=num_keypoints, **kwargs) - - # default up_scale to 2.0 (this can be made an option) - up_scale = 2.0 - in_channels = input_shape.channels - - for idx, layer_channels in enumerate(conv_dims, 1): - module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1) - self.add_module("conv_fcn{}".format(idx), module) - self.add_module("conv_fcn_relu{}".format(idx), nn.ReLU()) - in_channels = layer_channels - - deconv_kernel = 4 - self.score_lowres = ConvTranspose2d( - in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1 - ) - self.up_scale = up_scale - - for name, param in self.named_parameters(): - if "bias" in name: - nn.init.constant_(param, 0) - elif "weight" in name: - # Caffe2 implementation uses MSRAFill, which in fact - # corresponds to kaiming_normal_ in PyTorch - nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") - - @classmethod - def from_config(cls, cfg, input_shape): - ret = super().from_config(cfg, input_shape) - ret["input_shape"] = input_shape - ret["conv_dims"] = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS - return ret - - def layers(self, x): - for layer in self: - x = layer(x) - x = interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False) - return x diff --git a/detectron2/detectron2/modeling/roi_heads/mask_head.py b/detectron2/detectron2/modeling/roi_heads/mask_head.py deleted file mode 100644 index 1eff8f7916111546f9413cb6004cadcea01ba950..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/roi_heads/mask_head.py +++ /dev/null @@ -1,298 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from typing import List -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, cat, get_norm -from detectron2.layers.wrappers import move_device_like -from detectron2.structures import Instances -from detectron2.utils.events import get_event_storage -from detectron2.utils.registry import Registry - -__all__ = [ - "BaseMaskRCNNHead", - "MaskRCNNConvUpsampleHead", - "build_mask_head", - "ROI_MASK_HEAD_REGISTRY", -] - - -ROI_MASK_HEAD_REGISTRY = Registry("ROI_MASK_HEAD") -ROI_MASK_HEAD_REGISTRY.__doc__ = """ -Registry for mask heads, which predicts instance masks given -per-region features. - -The registered object will be called with `obj(cfg, input_shape)`. -""" - - -@torch.jit.unused -def mask_rcnn_loss(pred_mask_logits: torch.Tensor, instances: List[Instances], vis_period: int = 0): - """ - Compute the mask prediction loss defined in the Mask R-CNN paper. - - Args: - pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask) - for class-specific or class-agnostic, where B is the total number of predicted masks - in all images, C is the number of foreground classes, and Hmask, Wmask are the height - and width of the mask predictions. The values are logits. - instances (list[Instances]): A list of N Instances, where N is the number of images - in the batch. These instances are in 1:1 - correspondence with the pred_mask_logits. The ground-truth labels (class, box, mask, - ...) associated with each instance are stored in fields. - vis_period (int): the period (in steps) to dump visualization. - - Returns: - mask_loss (Tensor): A scalar tensor containing the loss. - """ - cls_agnostic_mask = pred_mask_logits.size(1) == 1 - total_num_masks = pred_mask_logits.size(0) - mask_side_len = pred_mask_logits.size(2) - assert pred_mask_logits.size(2) == pred_mask_logits.size(3), "Mask prediction must be square!" - - gt_classes = [] - gt_masks = [] - for instances_per_image in instances: - if len(instances_per_image) == 0: - continue - if not cls_agnostic_mask: - gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64) - gt_classes.append(gt_classes_per_image) - - gt_masks_per_image = instances_per_image.gt_masks.crop_and_resize( - instances_per_image.proposal_boxes.tensor, mask_side_len - ).to(device=pred_mask_logits.device) - # A tensor of shape (N, M, M), N=#instances in the image; M=mask_side_len - gt_masks.append(gt_masks_per_image) - - if len(gt_masks) == 0: - return pred_mask_logits.sum() * 0 - - gt_masks = cat(gt_masks, dim=0) - - if cls_agnostic_mask: - pred_mask_logits = pred_mask_logits[:, 0] - else: - indices = torch.arange(total_num_masks) - gt_classes = cat(gt_classes, dim=0) - pred_mask_logits = pred_mask_logits[indices, gt_classes] - - if gt_masks.dtype == torch.bool: - gt_masks_bool = gt_masks - else: - # Here we allow gt_masks to be float as well (depend on the implementation of rasterize()) - gt_masks_bool = gt_masks > 0.5 - gt_masks = gt_masks.to(dtype=torch.float32) - - # Log the training accuracy (using gt classes and sigmoid(0.0) == 0.5 threshold) - mask_incorrect = (pred_mask_logits > 0.0) != gt_masks_bool - mask_accuracy = 1 - (mask_incorrect.sum().item() / max(mask_incorrect.numel(), 1.0)) - num_positive = gt_masks_bool.sum().item() - false_positive = (mask_incorrect & ~gt_masks_bool).sum().item() / max( - gt_masks_bool.numel() - num_positive, 1.0 - ) - false_negative = (mask_incorrect & gt_masks_bool).sum().item() / max(num_positive, 1.0) - - storage = get_event_storage() - storage.put_scalar("mask_rcnn/accuracy", mask_accuracy) - storage.put_scalar("mask_rcnn/false_positive", false_positive) - storage.put_scalar("mask_rcnn/false_negative", false_negative) - if vis_period > 0 and storage.iter % vis_period == 0: - pred_masks = pred_mask_logits.sigmoid() - vis_masks = torch.cat([pred_masks, gt_masks], axis=2) - name = "Left: mask prediction; Right: mask GT" - for idx, vis_mask in enumerate(vis_masks): - vis_mask = torch.stack([vis_mask] * 3, axis=0) - storage.put_image(name + f" ({idx})", vis_mask) - - mask_loss = F.binary_cross_entropy_with_logits(pred_mask_logits, gt_masks, reduction="mean") - return mask_loss - - -def mask_rcnn_inference(pred_mask_logits: torch.Tensor, pred_instances: List[Instances]): - """ - Convert pred_mask_logits to estimated foreground probability masks while also - extracting only the masks for the predicted classes in pred_instances. For each - predicted box, the mask of the same class is attached to the instance by adding a - new "pred_masks" field to pred_instances. - - Args: - pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask) - for class-specific or class-agnostic, where B is the total number of predicted masks - in all images, C is the number of foreground classes, and Hmask, Wmask are the height - and width of the mask predictions. The values are logits. - pred_instances (list[Instances]): A list of N Instances, where N is the number of images - in the batch. Each Instances must have field "pred_classes". - - Returns: - None. pred_instances will contain an extra "pred_masks" field storing a mask of size (Hmask, - Wmask) for predicted class. Note that the masks are returned as a soft (non-quantized) - masks the resolution predicted by the network; post-processing steps, such as resizing - the predicted masks to the original image resolution and/or binarizing them, is left - to the caller. - """ - cls_agnostic_mask = pred_mask_logits.size(1) == 1 - - if cls_agnostic_mask: - mask_probs_pred = pred_mask_logits.sigmoid() - else: - # Select masks corresponding to the predicted classes - num_masks = pred_mask_logits.shape[0] - class_pred = cat([i.pred_classes for i in pred_instances]) - device = ( - class_pred.device - if torch.jit.is_scripting() - else ("cpu" if torch.jit.is_tracing() else class_pred.device) - ) - indices = move_device_like(torch.arange(num_masks, device=device), class_pred) - mask_probs_pred = pred_mask_logits[indices, class_pred][:, None].sigmoid() - # mask_probs_pred.shape: (B, 1, Hmask, Wmask) - - num_boxes_per_image = [len(i) for i in pred_instances] - mask_probs_pred = mask_probs_pred.split(num_boxes_per_image, dim=0) - - for prob, instances in zip(mask_probs_pred, pred_instances): - instances.pred_masks = prob # (1, Hmask, Wmask) - - -class BaseMaskRCNNHead(nn.Module): - """ - Implement the basic Mask R-CNN losses and inference logic described in :paper:`Mask R-CNN` - """ - - @configurable - def __init__(self, *, loss_weight: float = 1.0, vis_period: int = 0): - """ - NOTE: this interface is experimental. - - Args: - loss_weight (float): multiplier of the loss - vis_period (int): visualization period - """ - super().__init__() - self.vis_period = vis_period - self.loss_weight = loss_weight - - @classmethod - def from_config(cls, cfg, input_shape): - return {"vis_period": cfg.VIS_PERIOD} - - def forward(self, x, instances: List[Instances]): - """ - Args: - x: input region feature(s) provided by :class:`ROIHeads`. - instances (list[Instances]): contains the boxes & labels corresponding - to the input features. - Exact format is up to its caller to decide. - Typically, this is the foreground instances in training, with - "proposal_boxes" field and other gt annotations. - In inference, it contains boxes that are already predicted. - - Returns: - A dict of losses in training. The predicted "instances" in inference. - """ - x = self.layers(x) - if self.training: - return {"loss_mask": mask_rcnn_loss(x, instances, self.vis_period) * self.loss_weight} - else: - mask_rcnn_inference(x, instances) - return instances - - def layers(self, x): - """ - Neural network layers that makes predictions from input features. - """ - raise NotImplementedError - - -# To get torchscript support, we make the head a subclass of `nn.Sequential`. -# Therefore, to add new layers in this head class, please make sure they are -# added in the order they will be used in forward(). -@ROI_MASK_HEAD_REGISTRY.register() -class MaskRCNNConvUpsampleHead(BaseMaskRCNNHead, nn.Sequential): - """ - A mask head with several conv layers, plus an upsample layer (with `ConvTranspose2d`). - Predictions are made with a final 1x1 conv layer. - """ - - @configurable - def __init__(self, input_shape: ShapeSpec, *, num_classes, conv_dims, conv_norm="", **kwargs): - """ - NOTE: this interface is experimental. - - Args: - input_shape (ShapeSpec): shape of the input feature - num_classes (int): the number of foreground classes (i.e. background is not - included). 1 if using class agnostic prediction. - conv_dims (list[int]): a list of N>0 integers representing the output dimensions - of N-1 conv layers and the last upsample layer. - conv_norm (str or callable): normalization for the conv layers. - See :func:`detectron2.layers.get_norm` for supported types. - """ - super().__init__(**kwargs) - assert len(conv_dims) >= 1, "conv_dims have to be non-empty!" - - self.conv_norm_relus = [] - - cur_channels = input_shape.channels - for k, conv_dim in enumerate(conv_dims[:-1]): - conv = Conv2d( - cur_channels, - conv_dim, - kernel_size=3, - stride=1, - padding=1, - bias=not conv_norm, - norm=get_norm(conv_norm, conv_dim), - activation=nn.ReLU(), - ) - self.add_module("mask_fcn{}".format(k + 1), conv) - self.conv_norm_relus.append(conv) - cur_channels = conv_dim - - self.deconv = ConvTranspose2d( - cur_channels, conv_dims[-1], kernel_size=2, stride=2, padding=0 - ) - self.add_module("deconv_relu", nn.ReLU()) - cur_channels = conv_dims[-1] - - self.predictor = Conv2d(cur_channels, num_classes, kernel_size=1, stride=1, padding=0) - - for layer in self.conv_norm_relus + [self.deconv]: - weight_init.c2_msra_fill(layer) - # use normal distribution initialization for mask prediction layer - nn.init.normal_(self.predictor.weight, std=0.001) - if self.predictor.bias is not None: - nn.init.constant_(self.predictor.bias, 0) - - @classmethod - def from_config(cls, cfg, input_shape): - ret = super().from_config(cfg, input_shape) - conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM - num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV - ret.update( - conv_dims=[conv_dim] * (num_conv + 1), # +1 for ConvTranspose - conv_norm=cfg.MODEL.ROI_MASK_HEAD.NORM, - input_shape=input_shape, - ) - if cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK: - ret["num_classes"] = 1 - else: - ret["num_classes"] = cfg.MODEL.ROI_HEADS.NUM_CLASSES - return ret - - def layers(self, x): - for layer in self: - x = layer(x) - return x - - -def build_mask_head(cfg, input_shape): - """ - Build a mask head defined by `cfg.MODEL.ROI_MASK_HEAD.NAME`. - """ - name = cfg.MODEL.ROI_MASK_HEAD.NAME - return ROI_MASK_HEAD_REGISTRY.get(name)(cfg, input_shape) diff --git a/detectron2/detectron2/modeling/roi_heads/roi_heads.py b/detectron2/detectron2/modeling/roi_heads/roi_heads.py deleted file mode 100644 index 2f4546cd0c42bb1214c50ba3021238c3491c8557..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/roi_heads/roi_heads.py +++ /dev/null @@ -1,877 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import inspect -import logging -import numpy as np -from typing import Dict, List, Optional, Tuple -import torch -from torch import nn - -from detectron2.config import configurable -from detectron2.layers import ShapeSpec, nonzero_tuple -from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou -from detectron2.utils.events import get_event_storage -from detectron2.utils.registry import Registry - -from ..backbone.resnet import BottleneckBlock, ResNet -from ..matcher import Matcher -from ..poolers import ROIPooler -from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals -from ..sampling import subsample_labels -from .box_head import build_box_head -from .fast_rcnn import FastRCNNOutputLayers -from .keypoint_head import build_keypoint_head -from .mask_head import build_mask_head - -ROI_HEADS_REGISTRY = Registry("ROI_HEADS") -ROI_HEADS_REGISTRY.__doc__ = """ -Registry for ROI heads in a generalized R-CNN model. -ROIHeads take feature maps and region proposals, and -perform per-region computation. - -The registered object will be called with `obj(cfg, input_shape)`. -The call is expected to return an :class:`ROIHeads`. -""" - -logger = logging.getLogger(__name__) - - -def build_roi_heads(cfg, input_shape): - """ - Build ROIHeads defined by `cfg.MODEL.ROI_HEADS.NAME`. - """ - name = cfg.MODEL.ROI_HEADS.NAME - return ROI_HEADS_REGISTRY.get(name)(cfg, input_shape) - - -def select_foreground_proposals( - proposals: List[Instances], bg_label: int -) -> Tuple[List[Instances], List[torch.Tensor]]: - """ - Given a list of N Instances (for N images), each containing a `gt_classes` field, - return a list of Instances that contain only instances with `gt_classes != -1 && - gt_classes != bg_label`. - - Args: - proposals (list[Instances]): A list of N Instances, where N is the number of - images in the batch. - bg_label: label index of background class. - - Returns: - list[Instances]: N Instances, each contains only the selected foreground instances. - list[Tensor]: N boolean vector, correspond to the selection mask of - each Instances object. True for selected instances. - """ - assert isinstance(proposals, (list, tuple)) - assert isinstance(proposals[0], Instances) - assert proposals[0].has("gt_classes") - fg_proposals = [] - fg_selection_masks = [] - for proposals_per_image in proposals: - gt_classes = proposals_per_image.gt_classes - fg_selection_mask = (gt_classes != -1) & (gt_classes != bg_label) - fg_idxs = fg_selection_mask.nonzero().squeeze(1) - fg_proposals.append(proposals_per_image[fg_idxs]) - fg_selection_masks.append(fg_selection_mask) - return fg_proposals, fg_selection_masks - - -def select_proposals_with_visible_keypoints(proposals: List[Instances]) -> List[Instances]: - """ - Args: - proposals (list[Instances]): a list of N Instances, where N is the - number of images. - - Returns: - proposals: only contains proposals with at least one visible keypoint. - - Note that this is still slightly different from Detectron. - In Detectron, proposals for training keypoint head are re-sampled from - all the proposals with IOU>threshold & >=1 visible keypoint. - - Here, the proposals are first sampled from all proposals with - IOU>threshold, then proposals with no visible keypoint are filtered out. - This strategy seems to make no difference on Detectron and is easier to implement. - """ - ret = [] - all_num_fg = [] - for proposals_per_image in proposals: - # If empty/unannotated image (hard negatives), skip filtering for train - if len(proposals_per_image) == 0: - ret.append(proposals_per_image) - continue - gt_keypoints = proposals_per_image.gt_keypoints.tensor - # #fg x K x 3 - vis_mask = gt_keypoints[:, :, 2] >= 1 - xs, ys = gt_keypoints[:, :, 0], gt_keypoints[:, :, 1] - proposal_boxes = proposals_per_image.proposal_boxes.tensor.unsqueeze(dim=1) # #fg x 1 x 4 - kp_in_box = ( - (xs >= proposal_boxes[:, :, 0]) - & (xs <= proposal_boxes[:, :, 2]) - & (ys >= proposal_boxes[:, :, 1]) - & (ys <= proposal_boxes[:, :, 3]) - ) - selection = (kp_in_box & vis_mask).any(dim=1) - selection_idxs = nonzero_tuple(selection)[0] - all_num_fg.append(selection_idxs.numel()) - ret.append(proposals_per_image[selection_idxs]) - - storage = get_event_storage() - storage.put_scalar("keypoint_head/num_fg_samples", np.mean(all_num_fg)) - return ret - - -class ROIHeads(torch.nn.Module): - """ - ROIHeads perform all per-region computation in an R-CNN. - - It typically contains logic to - - 1. (in training only) match proposals with ground truth and sample them - 2. crop the regions and extract per-region features using proposals - 3. make per-region predictions with different heads - - It can have many variants, implemented as subclasses of this class. - This base class contains the logic to match/sample proposals. - But it is not necessary to inherit this class if the sampling logic is not needed. - """ - - @configurable - def __init__( - self, - *, - num_classes, - batch_size_per_image, - positive_fraction, - proposal_matcher, - proposal_append_gt=True, - ): - """ - NOTE: this interface is experimental. - - Args: - num_classes (int): number of foreground classes (i.e. background is not included) - batch_size_per_image (int): number of proposals to sample for training - positive_fraction (float): fraction of positive (foreground) proposals - to sample for training. - proposal_matcher (Matcher): matcher that matches proposals and ground truth - proposal_append_gt (bool): whether to include ground truth as proposals as well - """ - super().__init__() - self.batch_size_per_image = batch_size_per_image - self.positive_fraction = positive_fraction - self.num_classes = num_classes - self.proposal_matcher = proposal_matcher - self.proposal_append_gt = proposal_append_gt - - @classmethod - def from_config(cls, cfg): - return { - "batch_size_per_image": cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, - "positive_fraction": cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION, - "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES, - "proposal_append_gt": cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT, - # Matcher to assign box proposals to gt boxes - "proposal_matcher": Matcher( - cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, - cfg.MODEL.ROI_HEADS.IOU_LABELS, - allow_low_quality_matches=False, - ), - } - - def _sample_proposals( - self, matched_idxs: torch.Tensor, matched_labels: torch.Tensor, gt_classes: torch.Tensor - ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Based on the matching between N proposals and M groundtruth, - sample the proposals and set their classification labels. - - Args: - matched_idxs (Tensor): a vector of length N, each is the best-matched - gt index in [0, M) for each proposal. - matched_labels (Tensor): a vector of length N, the matcher's label - (one of cfg.MODEL.ROI_HEADS.IOU_LABELS) for each proposal. - gt_classes (Tensor): a vector of length M. - - Returns: - Tensor: a vector of indices of sampled proposals. Each is in [0, N). - Tensor: a vector of the same length, the classification label for - each sampled proposal. Each sample is labeled as either a category in - [0, num_classes) or the background (num_classes). - """ - has_gt = gt_classes.numel() > 0 - # Get the corresponding GT for each proposal - if has_gt: - gt_classes = gt_classes[matched_idxs] - # Label unmatched proposals (0 label from matcher) as background (label=num_classes) - gt_classes[matched_labels == 0] = self.num_classes - # Label ignore proposals (-1 label) - gt_classes[matched_labels == -1] = -1 - else: - gt_classes = torch.zeros_like(matched_idxs) + self.num_classes - - sampled_fg_idxs, sampled_bg_idxs = subsample_labels( - gt_classes, self.batch_size_per_image, self.positive_fraction, self.num_classes - ) - - sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0) - return sampled_idxs, gt_classes[sampled_idxs] - - @torch.no_grad() - def label_and_sample_proposals( - self, proposals: List[Instances], targets: List[Instances] - ) -> List[Instances]: - """ - Prepare some proposals to be used to train the ROI heads. - It performs box matching between `proposals` and `targets`, and assigns - training labels to the proposals. - It returns ``self.batch_size_per_image`` random samples from proposals and groundtruth - boxes, with a fraction of positives that is no larger than - ``self.positive_fraction``. - - Args: - See :meth:`ROIHeads.forward` - - Returns: - list[Instances]: - length `N` list of `Instances`s containing the proposals - sampled for training. Each `Instances` has the following fields: - - - proposal_boxes: the proposal boxes - - gt_boxes: the ground-truth box that the proposal is assigned to - (this is only meaningful if the proposal has a label > 0; if label = 0 - then the ground-truth box is random) - - Other fields such as "gt_classes", "gt_masks", that's included in `targets`. - """ - # Augment proposals with ground-truth boxes. - # In the case of learned proposals (e.g., RPN), when training starts - # the proposals will be low quality due to random initialization. - # It's possible that none of these initial - # proposals have high enough overlap with the gt objects to be used - # as positive examples for the second stage components (box head, - # cls head, mask head). Adding the gt boxes to the set of proposals - # ensures that the second stage components will have some positive - # examples from the start of training. For RPN, this augmentation improves - # convergence and empirically improves box AP on COCO by about 0.5 - # points (under one tested configuration). - if self.proposal_append_gt: - proposals = add_ground_truth_to_proposals(targets, proposals) - - proposals_with_gt = [] - - num_fg_samples = [] - num_bg_samples = [] - for proposals_per_image, targets_per_image in zip(proposals, targets): - has_gt = len(targets_per_image) > 0 - match_quality_matrix = pairwise_iou( - targets_per_image.gt_boxes, proposals_per_image.proposal_boxes - ) - matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) - sampled_idxs, gt_classes = self._sample_proposals( - matched_idxs, matched_labels, targets_per_image.gt_classes - ) - - # Set target attributes of the sampled proposals: - proposals_per_image = proposals_per_image[sampled_idxs] - proposals_per_image.gt_classes = gt_classes - - if has_gt: - sampled_targets = matched_idxs[sampled_idxs] - # We index all the attributes of targets that start with "gt_" - # and have not been added to proposals yet (="gt_classes"). - # NOTE: here the indexing waste some compute, because heads - # like masks, keypoints, etc, will filter the proposals again, - # (by foreground/background, or number of keypoints in the image, etc) - # so we essentially index the data twice. - for trg_name, trg_value in targets_per_image.get_fields().items(): - if trg_name.startswith("gt_") and not proposals_per_image.has(trg_name): - proposals_per_image.set(trg_name, trg_value[sampled_targets]) - # If no GT is given in the image, we don't know what a dummy gt value can be. - # Therefore the returned proposals won't have any gt_* fields, except for a - # gt_classes full of background label. - - num_bg_samples.append((gt_classes == self.num_classes).sum().item()) - num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) - proposals_with_gt.append(proposals_per_image) - - # Log the number of fg/bg samples that are selected for training ROI heads - storage = get_event_storage() - storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) - storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) - - return proposals_with_gt - - def forward( - self, - images: ImageList, - features: Dict[str, torch.Tensor], - proposals: List[Instances], - targets: Optional[List[Instances]] = None, - ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]: - """ - Args: - images (ImageList): - features (dict[str,Tensor]): input data as a mapping from feature - map name to tensor. Axis 0 represents the number of images `N` in - the input data; axes 1-3 are channels, height, and width, which may - vary between feature maps (e.g., if a feature pyramid is used). - proposals (list[Instances]): length `N` list of `Instances`. The i-th - `Instances` contains object proposals for the i-th input image, - with fields "proposal_boxes" and "objectness_logits". - targets (list[Instances], optional): length `N` list of `Instances`. The i-th - `Instances` contains the ground-truth per-instance annotations - for the i-th input image. Specify `targets` during training only. - It may have the following fields: - - - gt_boxes: the bounding box of each instance. - - gt_classes: the label for each instance with a category ranging in [0, #class]. - - gt_masks: PolygonMasks or BitMasks, the ground-truth masks of each instance. - - gt_keypoints: NxKx3, the groud-truth keypoints for each instance. - - Returns: - list[Instances]: length `N` list of `Instances` containing the - detected instances. Returned during inference only; may be [] during training. - - dict[str->Tensor]: - mapping from a named loss to a tensor storing the loss. Used during training only. - """ - raise NotImplementedError() - - -@ROI_HEADS_REGISTRY.register() -class Res5ROIHeads(ROIHeads): - """ - The ROIHeads in a typical "C4" R-CNN model, where - the box and mask head share the cropping and - the per-region feature computation by a Res5 block. - See :paper:`ResNet` Appendix A. - """ - - @configurable - def __init__( - self, - *, - in_features: List[str], - pooler: ROIPooler, - res5: nn.Module, - box_predictor: nn.Module, - mask_head: Optional[nn.Module] = None, - **kwargs, - ): - """ - NOTE: this interface is experimental. - - Args: - in_features (list[str]): list of backbone feature map names to use for - feature extraction - pooler (ROIPooler): pooler to extra region features from backbone - res5 (nn.Sequential): a CNN to compute per-region features, to be used by - ``box_predictor`` and ``mask_head``. Typically this is a "res5" - block from a ResNet. - box_predictor (nn.Module): make box predictions from the feature. - Should have the same interface as :class:`FastRCNNOutputLayers`. - mask_head (nn.Module): transform features to make mask predictions - """ - super().__init__(**kwargs) - self.in_features = in_features - self.pooler = pooler - if isinstance(res5, (list, tuple)): - res5 = nn.Sequential(*res5) - self.res5 = res5 - self.box_predictor = box_predictor - self.mask_on = mask_head is not None - if self.mask_on: - self.mask_head = mask_head - - @classmethod - def from_config(cls, cfg, input_shape): - # fmt: off - ret = super().from_config(cfg) - in_features = ret["in_features"] = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION - pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE - pooler_scales = (1.0 / input_shape[in_features[0]].stride, ) - sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO - mask_on = cfg.MODEL.MASK_ON - # fmt: on - assert not cfg.MODEL.KEYPOINT_ON - assert len(in_features) == 1 - - ret["pooler"] = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - - # Compatbility with old moco code. Might be useful. - # See notes in StandardROIHeads.from_config - if not inspect.ismethod(cls._build_res5_block): - logger.warning( - "The behavior of _build_res5_block may change. " - "Please do not depend on private methods." - ) - cls._build_res5_block = classmethod(cls._build_res5_block) - - ret["res5"], out_channels = cls._build_res5_block(cfg) - ret["box_predictor"] = FastRCNNOutputLayers( - cfg, ShapeSpec(channels=out_channels, height=1, width=1) - ) - - if mask_on: - ret["mask_head"] = build_mask_head( - cfg, - ShapeSpec(channels=out_channels, width=pooler_resolution, height=pooler_resolution), - ) - return ret - - @classmethod - def _build_res5_block(cls, cfg): - # fmt: off - stage_channel_factor = 2 ** 3 # res5 is 8x res2 - num_groups = cfg.MODEL.RESNETS.NUM_GROUPS - width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP - bottleneck_channels = num_groups * width_per_group * stage_channel_factor - out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS * stage_channel_factor - stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 - norm = cfg.MODEL.RESNETS.NORM - assert not cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE[-1], \ - "Deformable conv is not yet supported in res5 head." - # fmt: on - - blocks = ResNet.make_stage( - BottleneckBlock, - 3, - stride_per_block=[2, 1, 1], - in_channels=out_channels // 2, - bottleneck_channels=bottleneck_channels, - out_channels=out_channels, - num_groups=num_groups, - norm=norm, - stride_in_1x1=stride_in_1x1, - ) - return nn.Sequential(*blocks), out_channels - - def _shared_roi_transform(self, features: List[torch.Tensor], boxes: List[Boxes]): - x = self.pooler(features, boxes) - return self.res5(x) - - def forward( - self, - images: ImageList, - features: Dict[str, torch.Tensor], - proposals: List[Instances], - targets: Optional[List[Instances]] = None, - ): - """ - See :meth:`ROIHeads.forward`. - """ - del images - - if self.training: - assert targets - proposals = self.label_and_sample_proposals(proposals, targets) - del targets - - proposal_boxes = [x.proposal_boxes for x in proposals] - box_features = self._shared_roi_transform( - [features[f] for f in self.in_features], proposal_boxes - ) - predictions = self.box_predictor(box_features.mean(dim=[2, 3])) - - if self.training: - del features - losses = self.box_predictor.losses(predictions, proposals) - if self.mask_on: - proposals, fg_selection_masks = select_foreground_proposals( - proposals, self.num_classes - ) - # Since the ROI feature transform is shared between boxes and masks, - # we don't need to recompute features. The mask loss is only defined - # on foreground proposals, so we need to select out the foreground - # features. - mask_features = box_features[torch.cat(fg_selection_masks, dim=0)] - del box_features - losses.update(self.mask_head(mask_features, proposals)) - return [], losses - else: - pred_instances, _ = self.box_predictor.inference(predictions, proposals) - pred_instances = self.forward_with_given_boxes(features, pred_instances) - return pred_instances, {} - - def forward_with_given_boxes( - self, features: Dict[str, torch.Tensor], instances: List[Instances] - ) -> List[Instances]: - """ - Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. - - Args: - features: same as in `forward()` - instances (list[Instances]): instances to predict other outputs. Expect the keys - "pred_boxes" and "pred_classes" to exist. - - Returns: - instances (Instances): - the same `Instances` object, with extra - fields such as `pred_masks` or `pred_keypoints`. - """ - assert not self.training - assert instances[0].has("pred_boxes") and instances[0].has("pred_classes") - - if self.mask_on: - feature_list = [features[f] for f in self.in_features] - x = self._shared_roi_transform(feature_list, [x.pred_boxes for x in instances]) - return self.mask_head(x, instances) - else: - return instances - - -@ROI_HEADS_REGISTRY.register() -class StandardROIHeads(ROIHeads): - """ - It's "standard" in a sense that there is no ROI transform sharing - or feature sharing between tasks. - Each head independently processes the input features by each head's - own pooler and head. - - This class is used by most models, such as FPN and C5. - To implement more models, you can subclass it and implement a different - :meth:`forward()` or a head. - """ - - @configurable - def __init__( - self, - *, - box_in_features: List[str], - box_pooler: ROIPooler, - box_head: nn.Module, - box_predictor: nn.Module, - mask_in_features: Optional[List[str]] = None, - mask_pooler: Optional[ROIPooler] = None, - mask_head: Optional[nn.Module] = None, - keypoint_in_features: Optional[List[str]] = None, - keypoint_pooler: Optional[ROIPooler] = None, - keypoint_head: Optional[nn.Module] = None, - train_on_pred_boxes: bool = False, - **kwargs, - ): - """ - NOTE: this interface is experimental. - - Args: - box_in_features (list[str]): list of feature names to use for the box head. - box_pooler (ROIPooler): pooler to extra region features for box head - box_head (nn.Module): transform features to make box predictions - box_predictor (nn.Module): make box predictions from the feature. - Should have the same interface as :class:`FastRCNNOutputLayers`. - mask_in_features (list[str]): list of feature names to use for the mask - pooler or mask head. None if not using mask head. - mask_pooler (ROIPooler): pooler to extract region features from image features. - The mask head will then take region features to make predictions. - If None, the mask head will directly take the dict of image features - defined by `mask_in_features` - mask_head (nn.Module): transform features to make mask predictions - keypoint_in_features, keypoint_pooler, keypoint_head: similar to ``mask_*``. - train_on_pred_boxes (bool): whether to use proposal boxes or - predicted boxes from the box head to train other heads. - """ - super().__init__(**kwargs) - # keep self.in_features for backward compatibility - self.in_features = self.box_in_features = box_in_features - self.box_pooler = box_pooler - self.box_head = box_head - self.box_predictor = box_predictor - - self.mask_on = mask_in_features is not None - if self.mask_on: - self.mask_in_features = mask_in_features - self.mask_pooler = mask_pooler - self.mask_head = mask_head - - self.keypoint_on = keypoint_in_features is not None - if self.keypoint_on: - self.keypoint_in_features = keypoint_in_features - self.keypoint_pooler = keypoint_pooler - self.keypoint_head = keypoint_head - - self.train_on_pred_boxes = train_on_pred_boxes - - @classmethod - def from_config(cls, cfg, input_shape): - ret = super().from_config(cfg) - ret["train_on_pred_boxes"] = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES - # Subclasses that have not been updated to use from_config style construction - # may have overridden _init_*_head methods. In this case, those overridden methods - # will not be classmethods and we need to avoid trying to call them here. - # We test for this with ismethod which only returns True for bound methods of cls. - # Such subclasses will need to handle calling their overridden _init_*_head methods. - if inspect.ismethod(cls._init_box_head): - ret.update(cls._init_box_head(cfg, input_shape)) - if inspect.ismethod(cls._init_mask_head): - ret.update(cls._init_mask_head(cfg, input_shape)) - if inspect.ismethod(cls._init_keypoint_head): - ret.update(cls._init_keypoint_head(cfg, input_shape)) - return ret - - @classmethod - def _init_box_head(cls, cfg, input_shape): - # fmt: off - in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION - pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) - sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO - pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE - # fmt: on - - # If StandardROIHeads is applied on multiple feature maps (as in FPN), - # then we share the same predictors and therefore the channel counts must be the same - in_channels = [input_shape[f].channels for f in in_features] - # Check all channel counts are equal - assert len(set(in_channels)) == 1, in_channels - in_channels = in_channels[0] - - box_pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - # Here we split "box head" and "box predictor", which is mainly due to historical reasons. - # They are used together so the "box predictor" layers should be part of the "box head". - # New subclasses of ROIHeads do not need "box predictor"s. - box_head = build_box_head( - cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution) - ) - box_predictor = FastRCNNOutputLayers(cfg, box_head.output_shape) - return { - "box_in_features": in_features, - "box_pooler": box_pooler, - "box_head": box_head, - "box_predictor": box_predictor, - } - - @classmethod - def _init_mask_head(cls, cfg, input_shape): - if not cfg.MODEL.MASK_ON: - return {} - # fmt: off - in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION - pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) - sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO - pooler_type = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE - # fmt: on - - in_channels = [input_shape[f].channels for f in in_features][0] - - ret = {"mask_in_features": in_features} - ret["mask_pooler"] = ( - ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - if pooler_type - else None - ) - if pooler_type: - shape = ShapeSpec( - channels=in_channels, width=pooler_resolution, height=pooler_resolution - ) - else: - shape = {f: input_shape[f] for f in in_features} - ret["mask_head"] = build_mask_head(cfg, shape) - return ret - - @classmethod - def _init_keypoint_head(cls, cfg, input_shape): - if not cfg.MODEL.KEYPOINT_ON: - return {} - # fmt: off - in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION - pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) # noqa - sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO - pooler_type = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE - # fmt: on - - in_channels = [input_shape[f].channels for f in in_features][0] - - ret = {"keypoint_in_features": in_features} - ret["keypoint_pooler"] = ( - ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - if pooler_type - else None - ) - if pooler_type: - shape = ShapeSpec( - channels=in_channels, width=pooler_resolution, height=pooler_resolution - ) - else: - shape = {f: input_shape[f] for f in in_features} - ret["keypoint_head"] = build_keypoint_head(cfg, shape) - return ret - - def forward( - self, - images: ImageList, - features: Dict[str, torch.Tensor], - proposals: List[Instances], - targets: Optional[List[Instances]] = None, - ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]: - """ - See :class:`ROIHeads.forward`. - """ - del images - if self.training: - assert targets, "'targets' argument is required during training" - proposals = self.label_and_sample_proposals(proposals, targets) - del targets - - if self.training: - losses = self._forward_box(features, proposals) - # Usually the original proposals used by the box head are used by the mask, keypoint - # heads. But when `self.train_on_pred_boxes is True`, proposals will contain boxes - # predicted by the box head. - losses.update(self._forward_mask(features, proposals)) - losses.update(self._forward_keypoint(features, proposals)) - return proposals, losses - else: - pred_instances = self._forward_box(features, proposals) - # During inference cascaded prediction is used: the mask and keypoints heads are only - # applied to the top scoring box detections. - pred_instances = self.forward_with_given_boxes(features, pred_instances) - return pred_instances, {} - - def forward_with_given_boxes( - self, features: Dict[str, torch.Tensor], instances: List[Instances] - ) -> List[Instances]: - """ - Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. - - This is useful for downstream tasks where a box is known, but need to obtain - other attributes (outputs of other heads). - Test-time augmentation also uses this. - - Args: - features: same as in `forward()` - instances (list[Instances]): instances to predict other outputs. Expect the keys - "pred_boxes" and "pred_classes" to exist. - - Returns: - list[Instances]: - the same `Instances` objects, with extra - fields such as `pred_masks` or `pred_keypoints`. - """ - assert not self.training - assert instances[0].has("pred_boxes") and instances[0].has("pred_classes") - - instances = self._forward_mask(features, instances) - instances = self._forward_keypoint(features, instances) - return instances - - def _forward_box(self, features: Dict[str, torch.Tensor], proposals: List[Instances]): - """ - Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`, - the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument. - - Args: - features (dict[str, Tensor]): mapping from feature map names to tensor. - Same as in :meth:`ROIHeads.forward`. - proposals (list[Instances]): the per-image object proposals with - their matching ground truth. - Each has fields "proposal_boxes", and "objectness_logits", - "gt_classes", "gt_boxes". - - Returns: - In training, a dict of losses. - In inference, a list of `Instances`, the predicted instances. - """ - features = [features[f] for f in self.box_in_features] - box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) - box_features = self.box_head(box_features) - predictions = self.box_predictor(box_features) - del box_features - - if self.training: - losses = self.box_predictor.losses(predictions, proposals) - # proposals is modified in-place below, so losses must be computed first. - if self.train_on_pred_boxes: - with torch.no_grad(): - pred_boxes = self.box_predictor.predict_boxes_for_gt_classes( - predictions, proposals - ) - for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes): - proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image) - return losses - else: - pred_instances, _ = self.box_predictor.inference(predictions, proposals) - return pred_instances - - def _forward_mask(self, features: Dict[str, torch.Tensor], instances: List[Instances]): - """ - Forward logic of the mask prediction branch. - - Args: - features (dict[str, Tensor]): mapping from feature map names to tensor. - Same as in :meth:`ROIHeads.forward`. - instances (list[Instances]): the per-image instances to train/predict masks. - In training, they can be the proposals. - In inference, they can be the boxes predicted by R-CNN box head. - - Returns: - In training, a dict of losses. - In inference, update `instances` with new fields "pred_masks" and return it. - """ - if not self.mask_on: - return {} if self.training else instances - - if self.training: - # head is only trained on positive proposals. - instances, _ = select_foreground_proposals(instances, self.num_classes) - - if self.mask_pooler is not None: - features = [features[f] for f in self.mask_in_features] - boxes = [x.proposal_boxes if self.training else x.pred_boxes for x in instances] - features = self.mask_pooler(features, boxes) - else: - features = {f: features[f] for f in self.mask_in_features} - return self.mask_head(features, instances) - - def _forward_keypoint(self, features: Dict[str, torch.Tensor], instances: List[Instances]): - """ - Forward logic of the keypoint prediction branch. - - Args: - features (dict[str, Tensor]): mapping from feature map names to tensor. - Same as in :meth:`ROIHeads.forward`. - instances (list[Instances]): the per-image instances to train/predict keypoints. - In training, they can be the proposals. - In inference, they can be the boxes predicted by R-CNN box head. - - Returns: - In training, a dict of losses. - In inference, update `instances` with new fields "pred_keypoints" and return it. - """ - if not self.keypoint_on: - return {} if self.training else instances - - if self.training: - # head is only trained on positive proposals with >=1 visible keypoints. - instances, _ = select_foreground_proposals(instances, self.num_classes) - instances = select_proposals_with_visible_keypoints(instances) - - if self.keypoint_pooler is not None: - features = [features[f] for f in self.keypoint_in_features] - boxes = [x.proposal_boxes if self.training else x.pred_boxes for x in instances] - features = self.keypoint_pooler(features, boxes) - else: - features = {f: features[f] for f in self.keypoint_in_features} - return self.keypoint_head(features, instances) diff --git a/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py b/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py deleted file mode 100644 index 1e7bfabdedff5c5a826d8d4f551ea96b541f2cb6..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py +++ /dev/null @@ -1,271 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import numpy as np -import torch - -from detectron2.config import configurable -from detectron2.layers import ShapeSpec, batched_nms_rotated -from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated -from detectron2.utils.events import get_event_storage - -from ..box_regression import Box2BoxTransformRotated -from ..poolers import ROIPooler -from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals -from .box_head import build_box_head -from .fast_rcnn import FastRCNNOutputLayers -from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads - -logger = logging.getLogger(__name__) - -""" -Shape shorthand in this module: - - N: number of images in the minibatch - R: number of ROIs, combined over all images, in the minibatch - Ri: number of ROIs in image i - K: number of foreground classes. E.g.,there are 80 foreground classes in COCO. - -Naming convention: - - deltas: refers to the 5-d (dx, dy, dw, dh, da) deltas that parameterize the box2box - transform (see :class:`box_regression.Box2BoxTransformRotated`). - - pred_class_logits: predicted class scores in [-inf, +inf]; use - softmax(pred_class_logits) to estimate P(class). - - gt_classes: ground-truth classification labels in [0, K], where [0, K) represent - foreground object classes and K represents the background class. - - pred_proposal_deltas: predicted rotated box2box transform deltas for transforming proposals - to detection box predictions. - - gt_proposal_deltas: ground-truth rotated box2box transform deltas -""" - - -def fast_rcnn_inference_rotated( - boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image -): - """ - Call `fast_rcnn_inference_single_image_rotated` for all images. - - Args: - boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic - boxes for each image. Element i has shape (Ri, K * 5) if doing - class-specific regression, or (Ri, 5) if doing class-agnostic - regression, where Ri is the number of predicted objects for image i. - This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`. - scores (list[Tensor]): A list of Tensors of predicted class scores for each image. - Element i has shape (Ri, K + 1), where Ri is the number of predicted objects - for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`. - image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch. - score_thresh (float): Only return detections with a confidence score exceeding this - threshold. - nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. - topk_per_image (int): The number of top scoring detections to return. Set < 0 to return - all detections. - - Returns: - instances: (list[Instances]): A list of N instances, one for each image in the batch, - that stores the topk most confidence detections. - kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates - the corresponding boxes/scores index in [0, Ri) from the input, for image i. - """ - result_per_image = [ - fast_rcnn_inference_single_image_rotated( - boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image - ) - for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes) - ] - return [x[0] for x in result_per_image], [x[1] for x in result_per_image] - - -@torch.no_grad() -def fast_rcnn_inference_single_image_rotated( - boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image -): - """ - Single-image inference. Return rotated bounding-box detection results by thresholding - on scores and applying rotated non-maximum suppression (Rotated NMS). - - Args: - Same as `fast_rcnn_inference_rotated`, but with rotated boxes, scores, and image shapes - per image. - - Returns: - Same as `fast_rcnn_inference_rotated`, but for only one image. - """ - valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) - if not valid_mask.all(): - boxes = boxes[valid_mask] - scores = scores[valid_mask] - - B = 5 # box dimension - scores = scores[:, :-1] - num_bbox_reg_classes = boxes.shape[1] // B - # Convert to Boxes to use the `clip` function ... - boxes = RotatedBoxes(boxes.reshape(-1, B)) - boxes.clip(image_shape) - boxes = boxes.tensor.view(-1, num_bbox_reg_classes, B) # R x C x B - # Filter results based on detection scores - filter_mask = scores > score_thresh # R x K - # R' x 2. First column contains indices of the R predictions; - # Second column contains indices of classes. - filter_inds = filter_mask.nonzero() - if num_bbox_reg_classes == 1: - boxes = boxes[filter_inds[:, 0], 0] - else: - boxes = boxes[filter_mask] - scores = scores[filter_mask] - - # Apply per-class Rotated NMS - keep = batched_nms_rotated(boxes, scores, filter_inds[:, 1], nms_thresh) - if topk_per_image >= 0: - keep = keep[:topk_per_image] - boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] - - result = Instances(image_shape) - result.pred_boxes = RotatedBoxes(boxes) - result.scores = scores - result.pred_classes = filter_inds[:, 1] - - return result, filter_inds[:, 0] - - -class RotatedFastRCNNOutputLayers(FastRCNNOutputLayers): - """ - Two linear layers for predicting Rotated Fast R-CNN outputs. - """ - - @classmethod - def from_config(cls, cfg, input_shape): - args = super().from_config(cfg, input_shape) - args["box2box_transform"] = Box2BoxTransformRotated( - weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS - ) - return args - - def inference(self, predictions, proposals): - """ - Returns: - list[Instances]: same as `fast_rcnn_inference_rotated`. - list[Tensor]: same as `fast_rcnn_inference_rotated`. - """ - boxes = self.predict_boxes(predictions, proposals) - scores = self.predict_probs(predictions, proposals) - image_shapes = [x.image_size for x in proposals] - - return fast_rcnn_inference_rotated( - boxes, - scores, - image_shapes, - self.test_score_thresh, - self.test_nms_thresh, - self.test_topk_per_image, - ) - - -@ROI_HEADS_REGISTRY.register() -class RROIHeads(StandardROIHeads): - """ - This class is used by Rotated Fast R-CNN to detect rotated boxes. - For now, it only supports box predictions but not mask or keypoints. - """ - - @configurable - def __init__(self, **kwargs): - """ - NOTE: this interface is experimental. - """ - super().__init__(**kwargs) - assert ( - not self.mask_on and not self.keypoint_on - ), "Mask/Keypoints not supported in Rotated ROIHeads." - assert not self.train_on_pred_boxes, "train_on_pred_boxes not implemented for RROIHeads!" - - @classmethod - def _init_box_head(cls, cfg, input_shape): - # fmt: off - in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION - pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) - sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO - pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE - # fmt: on - assert pooler_type in ["ROIAlignRotated"], pooler_type - # assume all channel counts are equal - in_channels = [input_shape[f].channels for f in in_features][0] - - box_pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - box_head = build_box_head( - cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution) - ) - # This line is the only difference v.s. StandardROIHeads - box_predictor = RotatedFastRCNNOutputLayers(cfg, box_head.output_shape) - return { - "box_in_features": in_features, - "box_pooler": box_pooler, - "box_head": box_head, - "box_predictor": box_predictor, - } - - @torch.no_grad() - def label_and_sample_proposals(self, proposals, targets): - """ - Prepare some proposals to be used to train the RROI heads. - It performs box matching between `proposals` and `targets`, and assigns - training labels to the proposals. - It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes, - with a fraction of positives that is no larger than `self.positive_sample_fraction. - - Args: - See :meth:`StandardROIHeads.forward` - - Returns: - list[Instances]: length `N` list of `Instances`s containing the proposals - sampled for training. Each `Instances` has the following fields: - - proposal_boxes: the rotated proposal boxes - - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to - (this is only meaningful if the proposal has a label > 0; if label = 0 - then the ground-truth box is random) - - gt_classes: the ground-truth classification lable for each proposal - """ - if self.proposal_append_gt: - proposals = add_ground_truth_to_proposals(targets, proposals) - - proposals_with_gt = [] - - num_fg_samples = [] - num_bg_samples = [] - for proposals_per_image, targets_per_image in zip(proposals, targets): - has_gt = len(targets_per_image) > 0 - match_quality_matrix = pairwise_iou_rotated( - targets_per_image.gt_boxes, proposals_per_image.proposal_boxes - ) - matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) - sampled_idxs, gt_classes = self._sample_proposals( - matched_idxs, matched_labels, targets_per_image.gt_classes - ) - - proposals_per_image = proposals_per_image[sampled_idxs] - proposals_per_image.gt_classes = gt_classes - - if has_gt: - sampled_targets = matched_idxs[sampled_idxs] - proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets] - - num_bg_samples.append((gt_classes == self.num_classes).sum().item()) - num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) - proposals_with_gt.append(proposals_per_image) - - # Log the number of fg/bg samples that are selected for training ROI heads - storage = get_event_storage() - storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) - storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) - - return proposals_with_gt diff --git a/detectron2/detectron2/modeling/sampling.py b/detectron2/detectron2/modeling/sampling.py deleted file mode 100644 index a2d0f6648b349c5ea39fd29785b77c961a58fa22..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/sampling.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import torch - -from detectron2.layers import nonzero_tuple - -__all__ = ["subsample_labels"] - - -def subsample_labels( - labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int -): - """ - Return `num_samples` (or fewer, if not enough found) - random samples from `labels` which is a mixture of positives & negatives. - It will try to return as many positives as possible without - exceeding `positive_fraction * num_samples`, and then try to - fill the remaining slots with negatives. - - Args: - labels (Tensor): (N, ) label vector with values: - * -1: ignore - * bg_label: background ("negative") class - * otherwise: one or more foreground ("positive") classes - num_samples (int): The total number of labels with value >= 0 to return. - Values that are not sampled will be filled with -1 (ignore). - positive_fraction (float): The number of subsampled labels with values > 0 - is `min(num_positives, int(positive_fraction * num_samples))`. The number - of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. - In order words, if there are not enough positives, the sample is filled with - negatives. If there are also not enough negatives, then as many elements are - sampled as is possible. - bg_label (int): label index of background ("negative") class. - - Returns: - pos_idx, neg_idx (Tensor): - 1D vector of indices. The total length of both is `num_samples` or fewer. - """ - positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0] - negative = nonzero_tuple(labels == bg_label)[0] - - num_pos = int(num_samples * positive_fraction) - # protect against not enough positive examples - num_pos = min(positive.numel(), num_pos) - num_neg = num_samples - num_pos - # protect against not enough negative examples - num_neg = min(negative.numel(), num_neg) - - # randomly select positive and negative examples - perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] - perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] - - pos_idx = positive[perm1] - neg_idx = negative[perm2] - return pos_idx, neg_idx diff --git a/detectron2/detectron2/modeling/test_time_augmentation.py b/detectron2/detectron2/modeling/test_time_augmentation.py deleted file mode 100644 index 373e6bf00a39c040ff1da49d6dcd39a54a0b69a7..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/modeling/test_time_augmentation.py +++ /dev/null @@ -1,307 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import numpy as np -from contextlib import contextmanager -from itertools import count -from typing import List -import torch -from fvcore.transforms import HFlipTransform, NoOpTransform -from torch import nn -from torch.nn.parallel import DistributedDataParallel - -from detectron2.config import configurable -from detectron2.data.detection_utils import read_image -from detectron2.data.transforms import ( - RandomFlip, - ResizeShortestEdge, - ResizeTransform, - apply_augmentations, -) -from detectron2.structures import Boxes, Instances - -from .meta_arch import GeneralizedRCNN -from .postprocessing import detector_postprocess -from .roi_heads.fast_rcnn import fast_rcnn_inference_single_image - -__all__ = ["DatasetMapperTTA", "GeneralizedRCNNWithTTA"] - - -class DatasetMapperTTA: - """ - Implement test-time augmentation for detection data. - It is a callable which takes a dataset dict from a detection dataset, - and returns a list of dataset dicts where the images - are augmented from the input image by the transformations defined in the config. - This is used for test-time augmentation. - """ - - @configurable - def __init__(self, min_sizes: List[int], max_size: int, flip: bool): - """ - Args: - min_sizes: list of short-edge size to resize the image to - max_size: maximum height or width of resized images - flip: whether to apply flipping augmentation - """ - self.min_sizes = min_sizes - self.max_size = max_size - self.flip = flip - - @classmethod - def from_config(cls, cfg): - return { - "min_sizes": cfg.TEST.AUG.MIN_SIZES, - "max_size": cfg.TEST.AUG.MAX_SIZE, - "flip": cfg.TEST.AUG.FLIP, - } - - def __call__(self, dataset_dict): - """ - Args: - dict: a dict in standard model input format. See tutorials for details. - - Returns: - list[dict]: - a list of dicts, which contain augmented version of the input image. - The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``. - Each dict has field "transforms" which is a TransformList, - containing the transforms that are used to generate this image. - """ - numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy() - shape = numpy_image.shape - orig_shape = (dataset_dict["height"], dataset_dict["width"]) - if shape[:2] != orig_shape: - # It transforms the "original" image in the dataset to the input image - pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0], shape[1]) - else: - pre_tfm = NoOpTransform() - - # Create all combinations of augmentations to use - aug_candidates = [] # each element is a list[Augmentation] - for min_size in self.min_sizes: - resize = ResizeShortestEdge(min_size, self.max_size) - aug_candidates.append([resize]) # resize only - if self.flip: - flip = RandomFlip(prob=1.0) - aug_candidates.append([resize, flip]) # resize + flip - - # Apply all the augmentations - ret = [] - for aug in aug_candidates: - new_image, tfms = apply_augmentations(aug, np.copy(numpy_image)) - torch_image = torch.from_numpy(np.ascontiguousarray(new_image.transpose(2, 0, 1))) - - dic = copy.deepcopy(dataset_dict) - dic["transforms"] = pre_tfm + tfms - dic["image"] = torch_image - ret.append(dic) - return ret - - -class GeneralizedRCNNWithTTA(nn.Module): - """ - A GeneralizedRCNN with test-time augmentation enabled. - Its :meth:`__call__` method has the same interface as :meth:`GeneralizedRCNN.forward`. - """ - - def __init__(self, cfg, model, tta_mapper=None, batch_size=3): - """ - Args: - cfg (CfgNode): - model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on. - tta_mapper (callable): takes a dataset dict and returns a list of - augmented versions of the dataset dict. Defaults to - `DatasetMapperTTA(cfg)`. - batch_size (int): batch the augmented images into this batch size for inference. - """ - super().__init__() - if isinstance(model, DistributedDataParallel): - model = model.module - assert isinstance( - model, GeneralizedRCNN - ), "TTA is only supported on GeneralizedRCNN. Got a model of type {}".format(type(model)) - self.cfg = cfg.clone() - assert not self.cfg.MODEL.KEYPOINT_ON, "TTA for keypoint is not supported yet" - assert ( - not self.cfg.MODEL.LOAD_PROPOSALS - ), "TTA for pre-computed proposals is not supported yet" - - self.model = model - - if tta_mapper is None: - tta_mapper = DatasetMapperTTA(cfg) - self.tta_mapper = tta_mapper - self.batch_size = batch_size - - @contextmanager - def _turn_off_roi_heads(self, attrs): - """ - Open a context where some heads in `model.roi_heads` are temporarily turned off. - Args: - attr (list[str]): the attribute in `model.roi_heads` which can be used - to turn off a specific head, e.g., "mask_on", "keypoint_on". - """ - roi_heads = self.model.roi_heads - old = {} - for attr in attrs: - try: - old[attr] = getattr(roi_heads, attr) - except AttributeError: - # The head may not be implemented in certain ROIHeads - pass - - if len(old.keys()) == 0: - yield - else: - for attr in old.keys(): - setattr(roi_heads, attr, False) - yield - for attr in old.keys(): - setattr(roi_heads, attr, old[attr]) - - def _batch_inference(self, batched_inputs, detected_instances=None): - """ - Execute inference on a list of inputs, - using batch size = self.batch_size, instead of the length of the list. - - Inputs & outputs have the same format as :meth:`GeneralizedRCNN.inference` - """ - if detected_instances is None: - detected_instances = [None] * len(batched_inputs) - - outputs = [] - inputs, instances = [], [] - for idx, input, instance in zip(count(), batched_inputs, detected_instances): - inputs.append(input) - instances.append(instance) - if len(inputs) == self.batch_size or idx == len(batched_inputs) - 1: - outputs.extend( - self.model.inference( - inputs, - instances if instances[0] is not None else None, - do_postprocess=False, - ) - ) - inputs, instances = [], [] - return outputs - - def __call__(self, batched_inputs): - """ - Same input/output format as :meth:`GeneralizedRCNN.forward` - """ - - def _maybe_read_image(dataset_dict): - ret = copy.copy(dataset_dict) - if "image" not in ret: - image = read_image(ret.pop("file_name"), self.model.input_format) - image = torch.from_numpy(np.ascontiguousarray(image.transpose(2, 0, 1))) # CHW - ret["image"] = image - if "height" not in ret and "width" not in ret: - ret["height"] = image.shape[1] - ret["width"] = image.shape[2] - return ret - - return [self._inference_one_image(_maybe_read_image(x)) for x in batched_inputs] - - def _inference_one_image(self, input): - """ - Args: - input (dict): one dataset dict with "image" field being a CHW tensor - - Returns: - dict: one output dict - """ - orig_shape = (input["height"], input["width"]) - augmented_inputs, tfms = self._get_augmented_inputs(input) - # Detect boxes from all augmented versions - with self._turn_off_roi_heads(["mask_on", "keypoint_on"]): - # temporarily disable roi heads - all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms) - # merge all detected boxes to obtain final predictions for boxes - merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape) - - if self.cfg.MODEL.MASK_ON: - # Use the detected boxes to obtain masks - augmented_instances = self._rescale_detected_boxes( - augmented_inputs, merged_instances, tfms - ) - # run forward on the detected boxes - outputs = self._batch_inference(augmented_inputs, augmented_instances) - # Delete now useless variables to avoid being out of memory - del augmented_inputs, augmented_instances - # average the predictions - merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms) - merged_instances = detector_postprocess(merged_instances, *orig_shape) - return {"instances": merged_instances} - else: - return {"instances": merged_instances} - - def _get_augmented_inputs(self, input): - augmented_inputs = self.tta_mapper(input) - tfms = [x.pop("transforms") for x in augmented_inputs] - return augmented_inputs, tfms - - def _get_augmented_boxes(self, augmented_inputs, tfms): - # 1: forward with all augmented images - outputs = self._batch_inference(augmented_inputs) - # 2: union the results - all_boxes = [] - all_scores = [] - all_classes = [] - for output, tfm in zip(outputs, tfms): - # Need to inverse the transforms on boxes, to obtain results on original image - pred_boxes = output.pred_boxes.tensor - original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy()) - all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device)) - - all_scores.extend(output.scores) - all_classes.extend(output.pred_classes) - all_boxes = torch.cat(all_boxes, dim=0) - return all_boxes, all_scores, all_classes - - def _merge_detections(self, all_boxes, all_scores, all_classes, shape_hw): - # select from the union of all results - num_boxes = len(all_boxes) - num_classes = self.cfg.MODEL.ROI_HEADS.NUM_CLASSES - # +1 because fast_rcnn_inference expects background scores as well - all_scores_2d = torch.zeros(num_boxes, num_classes + 1, device=all_boxes.device) - for idx, cls, score in zip(count(), all_classes, all_scores): - all_scores_2d[idx, cls] = score - - merged_instances, _ = fast_rcnn_inference_single_image( - all_boxes, - all_scores_2d, - shape_hw, - 1e-8, - self.cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST, - self.cfg.TEST.DETECTIONS_PER_IMAGE, - ) - - return merged_instances - - def _rescale_detected_boxes(self, augmented_inputs, merged_instances, tfms): - augmented_instances = [] - for input, tfm in zip(augmented_inputs, tfms): - # Transform the target box to the augmented image's coordinate space - pred_boxes = merged_instances.pred_boxes.tensor.cpu().numpy() - pred_boxes = torch.from_numpy(tfm.apply_box(pred_boxes)) - - aug_instances = Instances( - image_size=input["image"].shape[1:3], - pred_boxes=Boxes(pred_boxes), - pred_classes=merged_instances.pred_classes, - scores=merged_instances.scores, - ) - augmented_instances.append(aug_instances) - return augmented_instances - - def _reduce_pred_masks(self, outputs, tfms): - # Should apply inverse transforms on masks. - # We assume only resize & flip are used. pred_masks is a scale-invariant - # representation, so we handle flip specially - for output, tfm in zip(outputs, tfms): - if any(isinstance(t, HFlipTransform) for t in tfm.transforms): - output.pred_masks = output.pred_masks.flip(dims=[3]) - all_pred_masks = torch.stack([o.pred_masks for o in outputs], dim=0) - avg_pred_masks = torch.mean(all_pred_masks, dim=0) - return avg_pred_masks diff --git a/detectron2/detectron2/projects/README.md b/detectron2/detectron2/projects/README.md deleted file mode 100644 index 95afe7ff8c8a9bd2f56621fcc3c1bdac11c256a9..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/projects/README.md +++ /dev/null @@ -1,2 +0,0 @@ - -Projects live in the [`projects` directory](../../projects) under the root of this repository, but not here. diff --git a/detectron2/detectron2/projects/__init__.py b/detectron2/detectron2/projects/__init__.py deleted file mode 100644 index b2d0540b93ebbad78d6ff2cc0adc0fe8375816c2..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/projects/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import importlib.abc -import importlib.util -from pathlib import Path - -__all__ = [] - -_PROJECTS = { - "point_rend": "PointRend", - "deeplab": "DeepLab", - "panoptic_deeplab": "Panoptic-DeepLab", -} -_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent / "projects" - -if _PROJECT_ROOT.is_dir(): - # This is true only for in-place installation (pip install -e, setup.py develop), - # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230 - - class _D2ProjectsFinder(importlib.abc.MetaPathFinder): - def find_spec(self, name, path, target=None): - if not name.startswith("detectron2.projects."): - return - project_name = name.split(".")[-1] - project_dir = _PROJECTS.get(project_name) - if not project_dir: - return - target_file = _PROJECT_ROOT / f"{project_dir}/{project_name}/__init__.py" - if not target_file.is_file(): - return - return importlib.util.spec_from_file_location(name, target_file) - - import sys - - sys.meta_path.append(_D2ProjectsFinder()) diff --git a/detectron2/detectron2/solver/__init__.py b/detectron2/detectron2/solver/__init__.py deleted file mode 100644 index 7e36c64f60f38f41d01dd2c9fb30364489a03841..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/solver/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params -from .lr_scheduler import ( - LRMultiplier, - LRScheduler, - WarmupCosineLR, - WarmupMultiStepLR, - WarmupParamScheduler, -) - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/detectron2/detectron2/solver/build.py b/detectron2/detectron2/solver/build.py deleted file mode 100644 index c0984d39f7227e94d2577435e32cd56e82c545fa..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/solver/build.py +++ /dev/null @@ -1,323 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import itertools -import logging -from collections import defaultdict -from enum import Enum -from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Type, Union -import torch -from fvcore.common.param_scheduler import ( - CosineParamScheduler, - MultiStepParamScheduler, - StepWithFixedGammaParamScheduler, -) - -from detectron2.config import CfgNode -from detectron2.utils.env import TORCH_VERSION - -from .lr_scheduler import LRMultiplier, LRScheduler, WarmupParamScheduler - -_GradientClipperInput = Union[torch.Tensor, Iterable[torch.Tensor]] -_GradientClipper = Callable[[_GradientClipperInput], None] - - -class GradientClipType(Enum): - VALUE = "value" - NORM = "norm" - - -def _create_gradient_clipper(cfg: CfgNode) -> _GradientClipper: - """ - Creates gradient clipping closure to clip by value or by norm, - according to the provided config. - """ - cfg = copy.deepcopy(cfg) - - def clip_grad_norm(p: _GradientClipperInput): - torch.nn.utils.clip_grad_norm_(p, cfg.CLIP_VALUE, cfg.NORM_TYPE) - - def clip_grad_value(p: _GradientClipperInput): - torch.nn.utils.clip_grad_value_(p, cfg.CLIP_VALUE) - - _GRADIENT_CLIP_TYPE_TO_CLIPPER = { - GradientClipType.VALUE: clip_grad_value, - GradientClipType.NORM: clip_grad_norm, - } - return _GRADIENT_CLIP_TYPE_TO_CLIPPER[GradientClipType(cfg.CLIP_TYPE)] - - -def _generate_optimizer_class_with_gradient_clipping( - optimizer: Type[torch.optim.Optimizer], - *, - per_param_clipper: Optional[_GradientClipper] = None, - global_clipper: Optional[_GradientClipper] = None, -) -> Type[torch.optim.Optimizer]: - """ - Dynamically creates a new type that inherits the type of a given instance - and overrides the `step` method to add gradient clipping - """ - assert ( - per_param_clipper is None or global_clipper is None - ), "Not allowed to use both per-parameter clipping and global clipping" - - def optimizer_wgc_step(self, closure=None): - if per_param_clipper is not None: - for group in self.param_groups: - for p in group["params"]: - per_param_clipper(p) - else: - # global clipper for future use with detr - # (https://github.com/facebookresearch/detr/pull/287) - all_params = itertools.chain(*[g["params"] for g in self.param_groups]) - global_clipper(all_params) - super(type(self), self).step(closure) - - OptimizerWithGradientClip = type( - optimizer.__name__ + "WithGradientClip", - (optimizer,), - {"step": optimizer_wgc_step}, - ) - return OptimizerWithGradientClip - - -def maybe_add_gradient_clipping( - cfg: CfgNode, optimizer: Type[torch.optim.Optimizer] -) -> Type[torch.optim.Optimizer]: - """ - If gradient clipping is enabled through config options, wraps the existing - optimizer type to become a new dynamically created class OptimizerWithGradientClip - that inherits the given optimizer and overrides the `step` method to - include gradient clipping. - - Args: - cfg: CfgNode, configuration options - optimizer: type. A subclass of torch.optim.Optimizer - - Return: - type: either the input `optimizer` (if gradient clipping is disabled), or - a subclass of it with gradient clipping included in the `step` method. - """ - if not cfg.SOLVER.CLIP_GRADIENTS.ENABLED: - return optimizer - if isinstance(optimizer, torch.optim.Optimizer): - optimizer_type = type(optimizer) - else: - assert issubclass(optimizer, torch.optim.Optimizer), optimizer - optimizer_type = optimizer - - grad_clipper = _create_gradient_clipper(cfg.SOLVER.CLIP_GRADIENTS) - OptimizerWithGradientClip = _generate_optimizer_class_with_gradient_clipping( - optimizer_type, per_param_clipper=grad_clipper - ) - if isinstance(optimizer, torch.optim.Optimizer): - optimizer.__class__ = OptimizerWithGradientClip # a bit hacky, not recommended - return optimizer - else: - return OptimizerWithGradientClip - - -def build_optimizer(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer: - """ - Build an optimizer from config. - """ - params = get_default_optimizer_params( - model, - base_lr=cfg.SOLVER.BASE_LR, - weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM, - bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR, - weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS, - ) - sgd_args = { - "params": params, - "lr": cfg.SOLVER.BASE_LR, - "momentum": cfg.SOLVER.MOMENTUM, - "nesterov": cfg.SOLVER.NESTEROV, - "weight_decay": cfg.SOLVER.WEIGHT_DECAY, - } - if TORCH_VERSION >= (1, 12): - sgd_args["foreach"] = True - return maybe_add_gradient_clipping(cfg, torch.optim.SGD(**sgd_args)) - - -def get_default_optimizer_params( - model: torch.nn.Module, - base_lr: Optional[float] = None, - weight_decay: Optional[float] = None, - weight_decay_norm: Optional[float] = None, - bias_lr_factor: Optional[float] = 1.0, - weight_decay_bias: Optional[float] = None, - lr_factor_func: Optional[Callable] = None, - overrides: Optional[Dict[str, Dict[str, float]]] = None, -) -> List[Dict[str, Any]]: - """ - Get default param list for optimizer, with support for a few types of - overrides. If no overrides needed, this is equivalent to `model.parameters()`. - - Args: - base_lr: lr for every group by default. Can be omitted to use the one in optimizer. - weight_decay: weight decay for every group by default. Can be omitted to use the one - in optimizer. - weight_decay_norm: override weight decay for params in normalization layers - bias_lr_factor: multiplier of lr for bias parameters. - weight_decay_bias: override weight decay for bias parameters. - lr_factor_func: function to calculate lr decay rate by mapping the parameter names to - corresponding lr decay rate. Note that setting this option requires - also setting ``base_lr``. - overrides: if not `None`, provides values for optimizer hyperparameters - (LR, weight decay) for module parameters with a given name; e.g. - ``{"embedding": {"lr": 0.01, "weight_decay": 0.1}}`` will set the LR and - weight decay values for all module parameters named `embedding`. - - For common detection models, ``weight_decay_norm`` is the only option - needed to be set. ``bias_lr_factor,weight_decay_bias`` are legacy settings - from Detectron1 that are not found useful. - - Example: - :: - torch.optim.SGD(get_default_optimizer_params(model, weight_decay_norm=0), - lr=0.01, weight_decay=1e-4, momentum=0.9) - """ - if overrides is None: - overrides = {} - defaults = {} - if base_lr is not None: - defaults["lr"] = base_lr - if weight_decay is not None: - defaults["weight_decay"] = weight_decay - bias_overrides = {} - if bias_lr_factor is not None and bias_lr_factor != 1.0: - # NOTE: unlike Detectron v1, we now by default make bias hyperparameters - # exactly the same as regular weights. - if base_lr is None: - raise ValueError("bias_lr_factor requires base_lr") - bias_overrides["lr"] = base_lr * bias_lr_factor - if weight_decay_bias is not None: - bias_overrides["weight_decay"] = weight_decay_bias - if len(bias_overrides): - if "bias" in overrides: - raise ValueError("Conflicting overrides for 'bias'") - overrides["bias"] = bias_overrides - if lr_factor_func is not None: - if base_lr is None: - raise ValueError("lr_factor_func requires base_lr") - norm_module_types = ( - torch.nn.BatchNorm1d, - torch.nn.BatchNorm2d, - torch.nn.BatchNorm3d, - torch.nn.SyncBatchNorm, - # NaiveSyncBatchNorm inherits from BatchNorm2d - torch.nn.GroupNorm, - torch.nn.InstanceNorm1d, - torch.nn.InstanceNorm2d, - torch.nn.InstanceNorm3d, - torch.nn.LayerNorm, - torch.nn.LocalResponseNorm, - ) - params: List[Dict[str, Any]] = [] - memo: Set[torch.nn.parameter.Parameter] = set() - for module_name, module in model.named_modules(): - for module_param_name, value in module.named_parameters(recurse=False): - if not value.requires_grad: - continue - # Avoid duplicating parameters - if value in memo: - continue - memo.add(value) - - hyperparams = copy.copy(defaults) - if isinstance(module, norm_module_types) and weight_decay_norm is not None: - hyperparams["weight_decay"] = weight_decay_norm - if lr_factor_func is not None: - hyperparams["lr"] *= lr_factor_func(f"{module_name}.{module_param_name}") - - hyperparams.update(overrides.get(module_param_name, {})) - params.append({"params": [value], **hyperparams}) - return reduce_param_groups(params) - - -def _expand_param_groups(params: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - # Transform parameter groups into per-parameter structure. - # Later items in `params` can overwrite parameters set in previous items. - ret = defaultdict(dict) - for item in params: - assert "params" in item - cur_params = {x: y for x, y in item.items() if x != "params" and x != "param_names"} - if "param_names" in item: - for param_name, param in zip(item["param_names"], item["params"]): - ret[param].update({"param_names": [param_name], "params": [param], **cur_params}) - else: - for param in item["params"]: - ret[param].update({"params": [param], **cur_params}) - return list(ret.values()) - - -def reduce_param_groups(params: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - # Reorganize the parameter groups and merge duplicated groups. - # The number of parameter groups needs to be as small as possible in order - # to efficiently use the PyTorch multi-tensor optimizer. Therefore instead - # of using a parameter_group per single parameter, we reorganize the - # parameter groups and merge duplicated groups. This approach speeds - # up multi-tensor optimizer significantly. - params = _expand_param_groups(params) - groups = defaultdict(list) # re-group all parameter groups by their hyperparams - for item in params: - cur_params = tuple((x, y) for x, y in item.items() if x != "params" and x != "param_names") - groups[cur_params].append({"params": item["params"]}) - if "param_names" in item: - groups[cur_params][-1]["param_names"] = item["param_names"] - - ret = [] - for param_keys, param_values in groups.items(): - cur = {kv[0]: kv[1] for kv in param_keys} - cur["params"] = list( - itertools.chain.from_iterable([params["params"] for params in param_values]) - ) - if len(param_values) > 0 and "param_names" in param_values[0]: - cur["param_names"] = list( - itertools.chain.from_iterable([params["param_names"] for params in param_values]) - ) - ret.append(cur) - return ret - - -def build_lr_scheduler(cfg: CfgNode, optimizer: torch.optim.Optimizer) -> LRScheduler: - """ - Build a LR scheduler from config. - """ - name = cfg.SOLVER.LR_SCHEDULER_NAME - - if name == "WarmupMultiStepLR": - steps = [x for x in cfg.SOLVER.STEPS if x <= cfg.SOLVER.MAX_ITER] - if len(steps) != len(cfg.SOLVER.STEPS): - logger = logging.getLogger(__name__) - logger.warning( - "SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. " - "These values will be ignored." - ) - sched = MultiStepParamScheduler( - values=[cfg.SOLVER.GAMMA**k for k in range(len(steps) + 1)], - milestones=steps, - num_updates=cfg.SOLVER.MAX_ITER, - ) - elif name == "WarmupCosineLR": - end_value = cfg.SOLVER.BASE_LR_END / cfg.SOLVER.BASE_LR - assert end_value >= 0.0 and end_value <= 1.0, end_value - sched = CosineParamScheduler(1, end_value) - elif name == "WarmupStepWithFixedGammaLR": - sched = StepWithFixedGammaParamScheduler( - base_value=1.0, - gamma=cfg.SOLVER.GAMMA, - num_decays=cfg.SOLVER.NUM_DECAYS, - num_updates=cfg.SOLVER.MAX_ITER, - ) - else: - raise ValueError("Unknown LR scheduler: {}".format(name)) - - sched = WarmupParamScheduler( - sched, - cfg.SOLVER.WARMUP_FACTOR, - min(cfg.SOLVER.WARMUP_ITERS / cfg.SOLVER.MAX_ITER, 1.0), - cfg.SOLVER.WARMUP_METHOD, - cfg.SOLVER.RESCALE_INTERVAL, - ) - return LRMultiplier(optimizer, multiplier=sched, max_iter=cfg.SOLVER.MAX_ITER) diff --git a/detectron2/detectron2/solver/lr_scheduler.py b/detectron2/detectron2/solver/lr_scheduler.py deleted file mode 100644 index 01e1eb7854a9662b9595a7ffa9b0e484faf34dff..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/solver/lr_scheduler.py +++ /dev/null @@ -1,247 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import math -from bisect import bisect_right -from typing import List -import torch -from fvcore.common.param_scheduler import ( - CompositeParamScheduler, - ConstantParamScheduler, - LinearParamScheduler, - ParamScheduler, -) - -try: - from torch.optim.lr_scheduler import LRScheduler -except ImportError: - from torch.optim.lr_scheduler import _LRScheduler as LRScheduler - -logger = logging.getLogger(__name__) - - -class WarmupParamScheduler(CompositeParamScheduler): - """ - Add an initial warmup stage to another scheduler. - """ - - def __init__( - self, - scheduler: ParamScheduler, - warmup_factor: float, - warmup_length: float, - warmup_method: str = "linear", - rescale_interval: bool = False, - ): - """ - Args: - scheduler: warmup will be added at the beginning of this scheduler - warmup_factor: the factor w.r.t the initial value of ``scheduler``, e.g. 0.001 - warmup_length: the relative length (in [0, 1]) of warmup steps w.r.t the entire - training, e.g. 0.01 - warmup_method: one of "linear" or "constant" - rescale_interval: whether we will rescale the interval of the scheduler after - warmup - """ - # the value to reach when warmup ends - end_value = scheduler(0.0) if rescale_interval else scheduler(warmup_length) - start_value = warmup_factor * scheduler(0.0) - if warmup_method == "constant": - warmup = ConstantParamScheduler(start_value) - elif warmup_method == "linear": - warmup = LinearParamScheduler(start_value, end_value) - else: - raise ValueError("Unknown warmup method: {}".format(warmup_method)) - super().__init__( - [warmup, scheduler], - interval_scaling=["rescaled", "rescaled" if rescale_interval else "fixed"], - lengths=[warmup_length, 1 - warmup_length], - ) - - -class LRMultiplier(LRScheduler): - """ - A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the - learning rate of each param in the optimizer. - Every step, the learning rate of each parameter becomes its initial value - multiplied by the output of the given :class:`ParamScheduler`. - - The absolute learning rate value of each parameter can be different. - This scheduler can be used as long as the relative scale among them do - not change during training. - - Examples: - :: - LRMultiplier( - opt, - WarmupParamScheduler( - MultiStepParamScheduler( - [1, 0.1, 0.01], - milestones=[60000, 80000], - num_updates=90000, - ), 0.001, 100 / 90000 - ), - max_iter=90000 - ) - """ - - # NOTES: in the most general case, every LR can use its own scheduler. - # Supporting this requires interaction with the optimizer when its parameter - # group is initialized. For example, classyvision implements its own optimizer - # that allows different schedulers for every parameter group. - # To avoid this complexity, we use this class to support the most common cases - # where the relative scale among all LRs stay unchanged during training. In this - # case we only need a total of one scheduler that defines the relative LR multiplier. - - def __init__( - self, - optimizer: torch.optim.Optimizer, - multiplier: ParamScheduler, - max_iter: int, - last_iter: int = -1, - ): - """ - Args: - optimizer, last_iter: See ``torch.optim.lr_scheduler.LRScheduler``. - ``last_iter`` is the same as ``last_epoch``. - multiplier: a fvcore ParamScheduler that defines the multiplier on - every LR of the optimizer - max_iter: the total number of training iterations - """ - if not isinstance(multiplier, ParamScheduler): - raise ValueError( - "_LRMultiplier(multiplier=) must be an instance of fvcore " - f"ParamScheduler. Got {multiplier} instead." - ) - self._multiplier = multiplier - self._max_iter = max_iter - super().__init__(optimizer, last_epoch=last_iter) - - def state_dict(self): - # fvcore schedulers are stateless. Only keep pytorch scheduler states - return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch} - - def get_lr(self) -> List[float]: - multiplier = self._multiplier(self.last_epoch / self._max_iter) - return [base_lr * multiplier for base_lr in self.base_lrs] - - -""" -Content below is no longer needed! -""" - -# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes -# only on epoch boundaries. We typically use iteration based schedules instead. -# As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean -# "iteration" instead. - -# FIXME: ideally this would be achieved with a CombinedLRScheduler, separating -# MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it. - - -class WarmupMultiStepLR(LRScheduler): - def __init__( - self, - optimizer: torch.optim.Optimizer, - milestones: List[int], - gamma: float = 0.1, - warmup_factor: float = 0.001, - warmup_iters: int = 1000, - warmup_method: str = "linear", - last_epoch: int = -1, - ): - logger.warning( - "WarmupMultiStepLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!" - ) - if not list(milestones) == sorted(milestones): - raise ValueError( - "Milestones should be a list of" " increasing integers. Got {}", milestones - ) - self.milestones = milestones - self.gamma = gamma - self.warmup_factor = warmup_factor - self.warmup_iters = warmup_iters - self.warmup_method = warmup_method - super().__init__(optimizer, last_epoch) - - def get_lr(self) -> List[float]: - warmup_factor = _get_warmup_factor_at_iter( - self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor - ) - return [ - base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) - for base_lr in self.base_lrs - ] - - def _compute_values(self) -> List[float]: - # The new interface - return self.get_lr() - - -class WarmupCosineLR(LRScheduler): - def __init__( - self, - optimizer: torch.optim.Optimizer, - max_iters: int, - warmup_factor: float = 0.001, - warmup_iters: int = 1000, - warmup_method: str = "linear", - last_epoch: int = -1, - ): - logger.warning( - "WarmupCosineLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!" - ) - self.max_iters = max_iters - self.warmup_factor = warmup_factor - self.warmup_iters = warmup_iters - self.warmup_method = warmup_method - super().__init__(optimizer, last_epoch) - - def get_lr(self) -> List[float]: - warmup_factor = _get_warmup_factor_at_iter( - self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor - ) - # Different definitions of half-cosine with warmup are possible. For - # simplicity we multiply the standard half-cosine schedule by the warmup - # factor. An alternative is to start the period of the cosine at warmup_iters - # instead of at 0. In the case that warmup_iters << max_iters the two are - # very close to each other. - return [ - base_lr - * warmup_factor - * 0.5 - * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters)) - for base_lr in self.base_lrs - ] - - def _compute_values(self) -> List[float]: - # The new interface - return self.get_lr() - - -def _get_warmup_factor_at_iter( - method: str, iter: int, warmup_iters: int, warmup_factor: float -) -> float: - """ - Return the learning rate warmup factor at a specific iteration. - See :paper:`ImageNet in 1h` for more details. - - Args: - method (str): warmup method; either "constant" or "linear". - iter (int): iteration at which to calculate the warmup factor. - warmup_iters (int): the number of warmup iterations. - warmup_factor (float): the base warmup factor (the meaning changes according - to the method used). - - Returns: - float: the effective warmup factor at the given iteration. - """ - if iter >= warmup_iters: - return 1.0 - - if method == "constant": - return warmup_factor - elif method == "linear": - alpha = iter / warmup_iters - return warmup_factor * (1 - alpha) + alpha - else: - raise ValueError("Unknown warmup method: {}".format(method)) diff --git a/detectron2/detectron2/structures/__init__.py b/detectron2/detectron2/structures/__init__.py deleted file mode 100644 index f3ee6057e3ec2731984ce8203c6eaf5348d08260..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/structures/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa, pairwise_point_box_distance -from .image_list import ImageList - -from .instances import Instances -from .keypoints import Keypoints, heatmaps_to_keypoints -from .masks import BitMasks, PolygonMasks, polygons_to_bitmask, ROIMasks -from .rotated_boxes import RotatedBoxes -from .rotated_boxes import pairwise_iou as pairwise_iou_rotated - -__all__ = [k for k in globals().keys() if not k.startswith("_")] - - -from detectron2.utils.env import fixup_module_metadata - -fixup_module_metadata(__name__, globals(), __all__) -del fixup_module_metadata diff --git a/detectron2/detectron2/structures/boxes.py b/detectron2/detectron2/structures/boxes.py deleted file mode 100644 index fd396f68645db1d6946056eed868ffcc02cd7a22..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/structures/boxes.py +++ /dev/null @@ -1,425 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import math -import numpy as np -from enum import IntEnum, unique -from typing import List, Tuple, Union -import torch -from torch import device - -_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray] - - -@unique -class BoxMode(IntEnum): - """ - Enum of different ways to represent a box. - """ - - XYXY_ABS = 0 - """ - (x0, y0, x1, y1) in absolute floating points coordinates. - The coordinates in range [0, width or height]. - """ - XYWH_ABS = 1 - """ - (x0, y0, w, h) in absolute floating points coordinates. - """ - XYXY_REL = 2 - """ - Not yet supported! - (x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image. - """ - XYWH_REL = 3 - """ - Not yet supported! - (x0, y0, w, h) in range [0, 1]. They are relative to the size of the image. - """ - XYWHA_ABS = 4 - """ - (xc, yc, w, h, a) in absolute floating points coordinates. - (xc, yc) is the center of the rotated box, and the angle a is in degrees ccw. - """ - - @staticmethod - def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType: - """ - Args: - box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5 - from_mode, to_mode (BoxMode) - - Returns: - The converted box of the same type. - """ - if from_mode == to_mode: - return box - - original_type = type(box) - is_numpy = isinstance(box, np.ndarray) - single_box = isinstance(box, (list, tuple)) - if single_box: - assert len(box) == 4 or len(box) == 5, ( - "BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor," - " where k == 4 or 5" - ) - arr = torch.tensor(box)[None, :] - else: - # avoid modifying the input box - if is_numpy: - arr = torch.from_numpy(np.asarray(box)).clone() - else: - arr = box.clone() - - assert to_mode not in [BoxMode.XYXY_REL, BoxMode.XYWH_REL] and from_mode not in [ - BoxMode.XYXY_REL, - BoxMode.XYWH_REL, - ], "Relative mode not yet supported!" - - if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS: - assert ( - arr.shape[-1] == 5 - ), "The last dimension of input shape must be 5 for XYWHA format" - original_dtype = arr.dtype - arr = arr.double() - - w = arr[:, 2] - h = arr[:, 3] - a = arr[:, 4] - c = torch.abs(torch.cos(a * math.pi / 180.0)) - s = torch.abs(torch.sin(a * math.pi / 180.0)) - # This basically computes the horizontal bounding rectangle of the rotated box - new_w = c * w + s * h - new_h = c * h + s * w - - # convert center to top-left corner - arr[:, 0] -= new_w / 2.0 - arr[:, 1] -= new_h / 2.0 - # bottom-right corner - arr[:, 2] = arr[:, 0] + new_w - arr[:, 3] = arr[:, 1] + new_h - - arr = arr[:, :4].to(dtype=original_dtype) - elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS: - original_dtype = arr.dtype - arr = arr.double() - arr[:, 0] += arr[:, 2] / 2.0 - arr[:, 1] += arr[:, 3] / 2.0 - angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype) - arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype) - else: - if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS: - arr[:, 2] += arr[:, 0] - arr[:, 3] += arr[:, 1] - elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS: - arr[:, 2] -= arr[:, 0] - arr[:, 3] -= arr[:, 1] - else: - raise NotImplementedError( - "Conversion from BoxMode {} to {} is not supported yet".format( - from_mode, to_mode - ) - ) - - if single_box: - return original_type(arr.flatten().tolist()) - if is_numpy: - return arr.numpy() - else: - return arr - - -class Boxes: - """ - This structure stores a list of boxes as a Nx4 torch.Tensor. - It supports some common methods about boxes - (`area`, `clip`, `nonempty`, etc), - and also behaves like a Tensor - (support indexing, `to(device)`, `.device`, and iteration over all boxes) - - Attributes: - tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2). - """ - - def __init__(self, tensor: torch.Tensor): - """ - Args: - tensor (Tensor[float]): a Nx4 matrix. Each row is (x1, y1, x2, y2). - """ - if not isinstance(tensor, torch.Tensor): - tensor = torch.as_tensor(tensor, dtype=torch.float32, device=torch.device("cpu")) - else: - tensor = tensor.to(torch.float32) - if tensor.numel() == 0: - # Use reshape, so we don't end up creating a new tensor that does not depend on - # the inputs (and consequently confuses jit) - tensor = tensor.reshape((-1, 4)).to(dtype=torch.float32) - assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size() - - self.tensor = tensor - - def clone(self) -> "Boxes": - """ - Clone the Boxes. - - Returns: - Boxes - """ - return Boxes(self.tensor.clone()) - - def to(self, device: torch.device): - # Boxes are assumed float32 and does not support to(dtype) - return Boxes(self.tensor.to(device=device)) - - def area(self) -> torch.Tensor: - """ - Computes the area of all the boxes. - - Returns: - torch.Tensor: a vector with areas of each box. - """ - box = self.tensor - area = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]) - return area - - def clip(self, box_size: Tuple[int, int]) -> None: - """ - Clip (in place) the boxes by limiting x coordinates to the range [0, width] - and y coordinates to the range [0, height]. - - Args: - box_size (height, width): The clipping box's size. - """ - assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!" - h, w = box_size - x1 = self.tensor[:, 0].clamp(min=0, max=w) - y1 = self.tensor[:, 1].clamp(min=0, max=h) - x2 = self.tensor[:, 2].clamp(min=0, max=w) - y2 = self.tensor[:, 3].clamp(min=0, max=h) - self.tensor = torch.stack((x1, y1, x2, y2), dim=-1) - - def nonempty(self, threshold: float = 0.0) -> torch.Tensor: - """ - Find boxes that are non-empty. - A box is considered empty, if either of its side is no larger than threshold. - - Returns: - Tensor: - a binary vector which represents whether each box is empty - (False) or non-empty (True). - """ - box = self.tensor - widths = box[:, 2] - box[:, 0] - heights = box[:, 3] - box[:, 1] - keep = (widths > threshold) & (heights > threshold) - return keep - - def __getitem__(self, item) -> "Boxes": - """ - Args: - item: int, slice, or a BoolTensor - - Returns: - Boxes: Create a new :class:`Boxes` by indexing. - - The following usage are allowed: - - 1. `new_boxes = boxes[3]`: return a `Boxes` which contains only one box. - 2. `new_boxes = boxes[2:10]`: return a slice of boxes. - 3. `new_boxes = boxes[vector]`, where vector is a torch.BoolTensor - with `length = len(boxes)`. Nonzero elements in the vector will be selected. - - Note that the returned Boxes might share storage with this Boxes, - subject to Pytorch's indexing semantics. - """ - if isinstance(item, int): - return Boxes(self.tensor[item].view(1, -1)) - b = self.tensor[item] - assert b.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item) - return Boxes(b) - - def __len__(self) -> int: - return self.tensor.shape[0] - - def __repr__(self) -> str: - return "Boxes(" + str(self.tensor) + ")" - - def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor: - """ - Args: - box_size (height, width): Size of the reference box. - boundary_threshold (int): Boxes that extend beyond the reference box - boundary by more than boundary_threshold are considered "outside". - - Returns: - a binary vector, indicating whether each box is inside the reference box. - """ - height, width = box_size - inds_inside = ( - (self.tensor[..., 0] >= -boundary_threshold) - & (self.tensor[..., 1] >= -boundary_threshold) - & (self.tensor[..., 2] < width + boundary_threshold) - & (self.tensor[..., 3] < height + boundary_threshold) - ) - return inds_inside - - def get_centers(self) -> torch.Tensor: - """ - Returns: - The box centers in a Nx2 array of (x, y). - """ - return (self.tensor[:, :2] + self.tensor[:, 2:]) / 2 - - def scale(self, scale_x: float, scale_y: float) -> None: - """ - Scale the box with horizontal and vertical scaling factors - """ - self.tensor[:, 0::2] *= scale_x - self.tensor[:, 1::2] *= scale_y - - @classmethod - def cat(cls, boxes_list: List["Boxes"]) -> "Boxes": - """ - Concatenates a list of Boxes into a single Boxes - - Arguments: - boxes_list (list[Boxes]) - - Returns: - Boxes: the concatenated Boxes - """ - assert isinstance(boxes_list, (list, tuple)) - if len(boxes_list) == 0: - return cls(torch.empty(0)) - assert all([isinstance(box, Boxes) for box in boxes_list]) - - # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input - cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0)) - return cat_boxes - - @property - def device(self) -> device: - return self.tensor.device - - # type "Iterator[torch.Tensor]", yield, and iter() not supported by torchscript - # https://github.com/pytorch/pytorch/issues/18627 - @torch.jit.unused - def __iter__(self): - """ - Yield a box as a Tensor of shape (4,) at a time. - """ - yield from self.tensor - - -def pairwise_intersection(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: - """ - Given two lists of boxes of size N and M, - compute the intersection area between __all__ N x M pairs of boxes. - The box order must be (xmin, ymin, xmax, ymax) - - Args: - boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. - - Returns: - Tensor: intersection, sized [N,M]. - """ - boxes1, boxes2 = boxes1.tensor, boxes2.tensor - width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max( - boxes1[:, None, :2], boxes2[:, :2] - ) # [N,M,2] - - width_height.clamp_(min=0) # [N,M,2] - intersection = width_height.prod(dim=2) # [N,M] - return intersection - - -# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py -# with slight modifications -def pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: - """ - Given two lists of boxes of size N and M, compute the IoU - (intersection over union) between **all** N x M pairs of boxes. - The box order must be (xmin, ymin, xmax, ymax). - - Args: - boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. - - Returns: - Tensor: IoU, sized [N,M]. - """ - area1 = boxes1.area() # [N] - area2 = boxes2.area() # [M] - inter = pairwise_intersection(boxes1, boxes2) - - # handle empty boxes - iou = torch.where( - inter > 0, - inter / (area1[:, None] + area2 - inter), - torch.zeros(1, dtype=inter.dtype, device=inter.device), - ) - return iou - - -def pairwise_ioa(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: - """ - Similar to :func:`pariwise_iou` but compute the IoA (intersection over boxes2 area). - - Args: - boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. - - Returns: - Tensor: IoA, sized [N,M]. - """ - area2 = boxes2.area() # [M] - inter = pairwise_intersection(boxes1, boxes2) - - # handle empty boxes - ioa = torch.where( - inter > 0, inter / area2, torch.zeros(1, dtype=inter.dtype, device=inter.device) - ) - return ioa - - -def pairwise_point_box_distance(points: torch.Tensor, boxes: Boxes): - """ - Pairwise distance between N points and M boxes. The distance between a - point and a box is represented by the distance from the point to 4 edges - of the box. Distances are all positive when the point is inside the box. - - Args: - points: Nx2 coordinates. Each row is (x, y) - boxes: M boxes - - Returns: - Tensor: distances of size (N, M, 4). The 4 values are distances from - the point to the left, top, right, bottom of the box. - """ - x, y = points.unsqueeze(dim=2).unbind(dim=1) # (N, 1) - x0, y0, x1, y1 = boxes.tensor.unsqueeze(dim=0).unbind(dim=2) # (1, M) - return torch.stack([x - x0, y - y0, x1 - x, y1 - y], dim=2) - - -def matched_pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: - """ - Compute pairwise intersection over union (IOU) of two sets of matched - boxes that have the same number of boxes. - Similar to :func:`pairwise_iou`, but computes only diagonal elements of the matrix. - - Args: - boxes1 (Boxes): bounding boxes, sized [N,4]. - boxes2 (Boxes): same length as boxes1 - Returns: - Tensor: iou, sized [N]. - """ - assert len(boxes1) == len( - boxes2 - ), "boxlists should have the same" "number of entries, got {}, {}".format( - len(boxes1), len(boxes2) - ) - area1 = boxes1.area() # [N] - area2 = boxes2.area() # [N] - box1, box2 = boxes1.tensor, boxes2.tensor - lt = torch.max(box1[:, :2], box2[:, :2]) # [N,2] - rb = torch.min(box1[:, 2:], box2[:, 2:]) # [N,2] - wh = (rb - lt).clamp(min=0) # [N,2] - inter = wh[:, 0] * wh[:, 1] # [N] - iou = inter / (area1 + area2 - inter) # [N] - return iou diff --git a/detectron2/detectron2/structures/image_list.py b/detectron2/detectron2/structures/image_list.py deleted file mode 100644 index e4243bb11e8fd95e8732f966f1d840d0560ae4c4..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/structures/image_list.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from __future__ import division -from typing import Any, Dict, List, Optional, Tuple -import torch -from torch import device -from torch.nn import functional as F - -from detectron2.layers.wrappers import move_device_like, shapes_to_tensor - - -class ImageList: - """ - Structure that holds a list of images (of possibly - varying sizes) as a single tensor. - This works by padding the images to the same size. - The original sizes of each image is stored in `image_sizes`. - - Attributes: - image_sizes (list[tuple[int, int]]): each tuple is (h, w). - During tracing, it becomes list[Tensor] instead. - """ - - def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]): - """ - Arguments: - tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1 - image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can - be smaller than (H, W) due to padding. - """ - self.tensor = tensor - self.image_sizes = image_sizes - - def __len__(self) -> int: - return len(self.image_sizes) - - def __getitem__(self, idx) -> torch.Tensor: - """ - Access the individual image in its original size. - - Args: - idx: int or slice - - Returns: - Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1 - """ - size = self.image_sizes[idx] - return self.tensor[idx, ..., : size[0], : size[1]] - - @torch.jit.unused - def to(self, *args: Any, **kwargs: Any) -> "ImageList": - cast_tensor = self.tensor.to(*args, **kwargs) - return ImageList(cast_tensor, self.image_sizes) - - @property - def device(self) -> device: - return self.tensor.device - - @staticmethod - def from_tensors( - tensors: List[torch.Tensor], - size_divisibility: int = 0, - pad_value: float = 0.0, - padding_constraints: Optional[Dict[str, int]] = None, - ) -> "ImageList": - """ - Args: - tensors: a tuple or list of `torch.Tensor`, each of shape (Hi, Wi) or - (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded - to the same shape with `pad_value`. - size_divisibility (int): If `size_divisibility > 0`, add padding to ensure - the common height and width is divisible by `size_divisibility`. - This depends on the model and many models need a divisibility of 32. - pad_value (float): value to pad. - padding_constraints (optional[Dict]): If given, it would follow the format as - {"size_divisibility": int, "square_size": int}, where `size_divisibility` will - overwrite the above one if presented and `square_size` indicates the - square padding size if `square_size` > 0. - Returns: - an `ImageList`. - """ - assert len(tensors) > 0 - assert isinstance(tensors, (tuple, list)) - for t in tensors: - assert isinstance(t, torch.Tensor), type(t) - assert t.shape[:-2] == tensors[0].shape[:-2], t.shape - - image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors] - image_sizes_tensor = [shapes_to_tensor(x) for x in image_sizes] - max_size = torch.stack(image_sizes_tensor).max(0).values - - if padding_constraints is not None: - square_size = padding_constraints.get("square_size", 0) - if square_size > 0: - # pad to square. - max_size[0] = max_size[1] = square_size - if "size_divisibility" in padding_constraints: - size_divisibility = padding_constraints["size_divisibility"] - if size_divisibility > 1: - stride = size_divisibility - # the last two dims are H,W, both subject to divisibility requirement - max_size = (max_size + (stride - 1)).div(stride, rounding_mode="floor") * stride - - # handle weirdness of scripting and tracing ... - if torch.jit.is_scripting(): - max_size: List[int] = max_size.to(dtype=torch.long).tolist() - else: - if torch.jit.is_tracing(): - image_sizes = image_sizes_tensor - - if len(tensors) == 1: - # This seems slightly (2%) faster. - # TODO: check whether it's faster for multiple images as well - image_size = image_sizes[0] - padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]] - batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0) - else: - # max_size can be a tensor in tracing mode, therefore convert to list - batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size) - device = ( - None if torch.jit.is_scripting() else ("cpu" if torch.jit.is_tracing() else None) - ) - batched_imgs = tensors[0].new_full(batch_shape, pad_value, device=device) - batched_imgs = move_device_like(batched_imgs, tensors[0]) - for i, img in enumerate(tensors): - # Use `batched_imgs` directly instead of `img, pad_img = zip(tensors, batched_imgs)` - # Tracing mode cannot capture `copy_()` of temporary locals - batched_imgs[i, ..., : img.shape[-2], : img.shape[-1]].copy_(img) - - return ImageList(batched_imgs.contiguous(), image_sizes) diff --git a/detectron2/detectron2/structures/instances.py b/detectron2/detectron2/structures/instances.py deleted file mode 100644 index 557bd6a7294c9770068d0908950a7a8f6e476ce4..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/structures/instances.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import itertools -import warnings -from typing import Any, Dict, List, Tuple, Union -import torch - - -class Instances: - """ - This class represents a list of instances in an image. - It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields". - All fields must have the same ``__len__`` which is the number of instances. - - All other (non-field) attributes of this class are considered private: - they must start with '_' and are not modifiable by a user. - - Some basic usage: - - 1. Set/get/check a field: - - .. code-block:: python - - instances.gt_boxes = Boxes(...) - print(instances.pred_masks) # a tensor of shape (N, H, W) - print('gt_masks' in instances) - - 2. ``len(instances)`` returns the number of instances - 3. Indexing: ``instances[indices]`` will apply the indexing on all the fields - and returns a new :class:`Instances`. - Typically, ``indices`` is a integer vector of indices, - or a binary mask of length ``num_instances`` - - .. code-block:: python - - category_3_detections = instances[instances.pred_classes == 3] - confident_detections = instances[instances.scores > 0.9] - """ - - def __init__(self, image_size: Tuple[int, int], **kwargs: Any): - """ - Args: - image_size (height, width): the spatial size of the image. - kwargs: fields to add to this `Instances`. - """ - self._image_size = image_size - self._fields: Dict[str, Any] = {} - for k, v in kwargs.items(): - self.set(k, v) - - @property - def image_size(self) -> Tuple[int, int]: - """ - Returns: - tuple: height, width - """ - return self._image_size - - def __setattr__(self, name: str, val: Any) -> None: - if name.startswith("_"): - super().__setattr__(name, val) - else: - self.set(name, val) - - def __getattr__(self, name: str) -> Any: - if name == "_fields" or name not in self._fields: - raise AttributeError("Cannot find field '{}' in the given Instances!".format(name)) - return self._fields[name] - - def set(self, name: str, value: Any) -> None: - """ - Set the field named `name` to `value`. - The length of `value` must be the number of instances, - and must agree with other existing fields in this object. - """ - with warnings.catch_warnings(record=True): - data_len = len(value) - if len(self._fields): - assert ( - len(self) == data_len - ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self)) - self._fields[name] = value - - def has(self, name: str) -> bool: - """ - Returns: - bool: whether the field called `name` exists. - """ - return name in self._fields - - def remove(self, name: str) -> None: - """ - Remove the field called `name`. - """ - del self._fields[name] - - def get(self, name: str) -> Any: - """ - Returns the field called `name`. - """ - return self._fields[name] - - def get_fields(self) -> Dict[str, Any]: - """ - Returns: - dict: a dict which maps names (str) to data of the fields - - Modifying the returned dict will modify this instance. - """ - return self._fields - - # Tensor-like methods - def to(self, *args: Any, **kwargs: Any) -> "Instances": - """ - Returns: - Instances: all fields are called with a `to(device)`, if the field has this method. - """ - ret = Instances(self._image_size) - for k, v in self._fields.items(): - if hasattr(v, "to"): - v = v.to(*args, **kwargs) - ret.set(k, v) - return ret - - def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances": - """ - Args: - item: an index-like object and will be used to index all the fields. - - Returns: - If `item` is a string, return the data in the corresponding field. - Otherwise, returns an `Instances` where all fields are indexed by `item`. - """ - if type(item) is int: - if item >= len(self) or item < -len(self): - raise IndexError("Instances index out of range!") - else: - item = slice(item, None, len(self)) - - ret = Instances(self._image_size) - for k, v in self._fields.items(): - ret.set(k, v[item]) - return ret - - def __len__(self) -> int: - for v in self._fields.values(): - # use __len__ because len() has to be int and is not friendly to tracing - return v.__len__() - raise NotImplementedError("Empty Instances does not support __len__!") - - def __iter__(self): - raise NotImplementedError("`Instances` object is not iterable!") - - @staticmethod - def cat(instance_lists: List["Instances"]) -> "Instances": - """ - Args: - instance_lists (list[Instances]) - - Returns: - Instances - """ - assert all(isinstance(i, Instances) for i in instance_lists) - assert len(instance_lists) > 0 - if len(instance_lists) == 1: - return instance_lists[0] - - image_size = instance_lists[0].image_size - if not isinstance(image_size, torch.Tensor): # could be a tensor in tracing - for i in instance_lists[1:]: - assert i.image_size == image_size - ret = Instances(image_size) - for k in instance_lists[0]._fields.keys(): - values = [i.get(k) for i in instance_lists] - v0 = values[0] - if isinstance(v0, torch.Tensor): - values = torch.cat(values, dim=0) - elif isinstance(v0, list): - values = list(itertools.chain(*values)) - elif hasattr(type(v0), "cat"): - values = type(v0).cat(values) - else: - raise ValueError("Unsupported type {} for concatenation".format(type(v0))) - ret.set(k, values) - return ret - - def __str__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={}, ".format(len(self)) - s += "image_height={}, ".format(self._image_size[0]) - s += "image_width={}, ".format(self._image_size[1]) - s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items()))) - return s - - __repr__ = __str__ diff --git a/detectron2/detectron2/structures/keypoints.py b/detectron2/detectron2/structures/keypoints.py deleted file mode 100644 index b93ebed4f6554e67ba9bde8d3af90e8dbb3246b6..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/structures/keypoints.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -from typing import Any, List, Tuple, Union -import torch -from torch.nn import functional as F - - -class Keypoints: - """ - Stores keypoint **annotation** data. GT Instances have a `gt_keypoints` property - containing the x,y location and visibility flag of each keypoint. This tensor has shape - (N, K, 3) where N is the number of instances and K is the number of keypoints per instance. - - The visibility flag follows the COCO format and must be one of three integers: - - * v=0: not labeled (in which case x=y=0) - * v=1: labeled but not visible - * v=2: labeled and visible - """ - - def __init__(self, keypoints: Union[torch.Tensor, np.ndarray, List[List[float]]]): - """ - Arguments: - keypoints: A Tensor, numpy array, or list of the x, y, and visibility of each keypoint. - The shape should be (N, K, 3) where N is the number of - instances, and K is the number of keypoints per instance. - """ - device = keypoints.device if isinstance(keypoints, torch.Tensor) else torch.device("cpu") - keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device) - assert keypoints.dim() == 3 and keypoints.shape[2] == 3, keypoints.shape - self.tensor = keypoints - - def __len__(self) -> int: - return self.tensor.size(0) - - def to(self, *args: Any, **kwargs: Any) -> "Keypoints": - return type(self)(self.tensor.to(*args, **kwargs)) - - @property - def device(self) -> torch.device: - return self.tensor.device - - def to_heatmap(self, boxes: torch.Tensor, heatmap_size: int) -> torch.Tensor: - """ - Convert keypoint annotations to a heatmap of one-hot labels for training, - as described in :paper:`Mask R-CNN`. - - Arguments: - boxes: Nx4 tensor, the boxes to draw the keypoints to - - Returns: - heatmaps: - A tensor of shape (N, K), each element is integer spatial label - in the range [0, heatmap_size**2 - 1] for each keypoint in the input. - valid: - A tensor of shape (N, K) containing whether each keypoint is in the roi or not. - """ - return _keypoints_to_heatmap(self.tensor, boxes, heatmap_size) - - def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Keypoints": - """ - Create a new `Keypoints` by indexing on this `Keypoints`. - - The following usage are allowed: - - 1. `new_kpts = kpts[3]`: return a `Keypoints` which contains only one instance. - 2. `new_kpts = kpts[2:10]`: return a slice of key points. - 3. `new_kpts = kpts[vector]`, where vector is a torch.ByteTensor - with `length = len(kpts)`. Nonzero elements in the vector will be selected. - - Note that the returned Keypoints might share storage with this Keypoints, - subject to Pytorch's indexing semantics. - """ - if isinstance(item, int): - return Keypoints([self.tensor[item]]) - return Keypoints(self.tensor[item]) - - def __repr__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={})".format(len(self.tensor)) - return s - - @staticmethod - def cat(keypoints_list: List["Keypoints"]) -> "Keypoints": - """ - Concatenates a list of Keypoints into a single Keypoints - - Arguments: - keypoints_list (list[Keypoints]) - - Returns: - Keypoints: the concatenated Keypoints - """ - assert isinstance(keypoints_list, (list, tuple)) - assert len(keypoints_list) > 0 - assert all(isinstance(keypoints, Keypoints) for keypoints in keypoints_list) - - cat_kpts = type(keypoints_list[0])( - torch.cat([kpts.tensor for kpts in keypoints_list], dim=0) - ) - return cat_kpts - - -# TODO make this nicer, this is a direct translation from C2 (but removing the inner loop) -def _keypoints_to_heatmap( - keypoints: torch.Tensor, rois: torch.Tensor, heatmap_size: int -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Encode keypoint locations into a target heatmap for use in SoftmaxWithLoss across space. - - Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the - closed interval [0, heatmap_size - 1] on discrete image coordinates. We use the - continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"): - d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate. - - Arguments: - keypoints: tensor of keypoint locations in of shape (N, K, 3). - rois: Nx4 tensor of rois in xyxy format - heatmap_size: integer side length of square heatmap. - - Returns: - heatmaps: A tensor of shape (N, K) containing an integer spatial label - in the range [0, heatmap_size**2 - 1] for each keypoint in the input. - valid: A tensor of shape (N, K) containing whether each keypoint is in - the roi or not. - """ - - if rois.numel() == 0: - return rois.new().long(), rois.new().long() - offset_x = rois[:, 0] - offset_y = rois[:, 1] - scale_x = heatmap_size / (rois[:, 2] - rois[:, 0]) - scale_y = heatmap_size / (rois[:, 3] - rois[:, 1]) - - offset_x = offset_x[:, None] - offset_y = offset_y[:, None] - scale_x = scale_x[:, None] - scale_y = scale_y[:, None] - - x = keypoints[..., 0] - y = keypoints[..., 1] - - x_boundary_inds = x == rois[:, 2][:, None] - y_boundary_inds = y == rois[:, 3][:, None] - - x = (x - offset_x) * scale_x - x = x.floor().long() - y = (y - offset_y) * scale_y - y = y.floor().long() - - x[x_boundary_inds] = heatmap_size - 1 - y[y_boundary_inds] = heatmap_size - 1 - - valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size) - vis = keypoints[..., 2] > 0 - valid = (valid_loc & vis).long() - - lin_ind = y * heatmap_size + x - heatmaps = lin_ind * valid - - return heatmaps, valid - - -@torch.jit.script_if_tracing -def heatmaps_to_keypoints(maps: torch.Tensor, rois: torch.Tensor) -> torch.Tensor: - """ - Extract predicted keypoint locations from heatmaps. - - Args: - maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for - each ROI and each keypoint. - rois (Tensor): (#ROIs, 4). The box of each ROI. - - Returns: - Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to - (x, y, logit, score) for each keypoint. - - When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate, - we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from - Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate. - """ - - offset_x = rois[:, 0] - offset_y = rois[:, 1] - - widths = (rois[:, 2] - rois[:, 0]).clamp(min=1) - heights = (rois[:, 3] - rois[:, 1]).clamp(min=1) - widths_ceil = widths.ceil() - heights_ceil = heights.ceil() - - num_rois, num_keypoints = maps.shape[:2] - xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4) - - width_corrections = widths / widths_ceil - height_corrections = heights / heights_ceil - - keypoints_idx = torch.arange(num_keypoints, device=maps.device) - - for i in range(num_rois): - outsize = (int(heights_ceil[i]), int(widths_ceil[i])) - roi_map = F.interpolate(maps[[i]], size=outsize, mode="bicubic", align_corners=False) - - # Although semantically equivalent, `reshape` is used instead of `squeeze` due - # to limitation during ONNX export of `squeeze` in scripting mode - roi_map = roi_map.reshape(roi_map.shape[1:]) # keypoints x H x W - - # softmax over the spatial region - max_score, _ = roi_map.view(num_keypoints, -1).max(1) - max_score = max_score.view(num_keypoints, 1, 1) - tmp_full_resolution = (roi_map - max_score).exp_() - tmp_pool_resolution = (maps[i] - max_score).exp_() - # Produce scores over the region H x W, but normalize with POOL_H x POOL_W, - # so that the scores of objects of different absolute sizes will be more comparable - roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum((1, 2), keepdim=True) - - w = roi_map.shape[2] - pos = roi_map.view(num_keypoints, -1).argmax(1) - - x_int = pos % w - y_int = (pos - x_int) // w - - assert ( - roi_map_scores[keypoints_idx, y_int, x_int] - == roi_map_scores.view(num_keypoints, -1).max(1)[0] - ).all() - - x = (x_int.float() + 0.5) * width_corrections[i] - y = (y_int.float() + 0.5) * height_corrections[i] - - xy_preds[i, :, 0] = x + offset_x[i] - xy_preds[i, :, 1] = y + offset_y[i] - xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int] - xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int] - - return xy_preds diff --git a/detectron2/detectron2/structures/masks.py b/detectron2/detectron2/structures/masks.py deleted file mode 100644 index 899ad8b6ce1557ccc38da58d31814c3ddb9cb737..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/structures/masks.py +++ /dev/null @@ -1,534 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import itertools -import numpy as np -from typing import Any, Iterator, List, Union -import pycocotools.mask as mask_util -import torch -from torch import device - -from detectron2.layers.roi_align import ROIAlign -from detectron2.utils.memory import retry_if_cuda_oom - -from .boxes import Boxes - - -def polygon_area(x, y): - # Using the shoelace formula - # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates - return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) - - -def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray: - """ - Args: - polygons (list[ndarray]): each array has shape (Nx2,) - height, width (int) - - Returns: - ndarray: a bool mask of shape (height, width) - """ - if len(polygons) == 0: - # COCOAPI does not support empty polygons - return np.zeros((height, width)).astype(bool) - rles = mask_util.frPyObjects(polygons, height, width) - rle = mask_util.merge(rles) - return mask_util.decode(rle).astype(bool) - - -def rasterize_polygons_within_box( - polygons: List[np.ndarray], box: np.ndarray, mask_size: int -) -> torch.Tensor: - """ - Rasterize the polygons into a mask image and - crop the mask content in the given box. - The cropped mask is resized to (mask_size, mask_size). - - This function is used when generating training targets for mask head in Mask R-CNN. - Given original ground-truth masks for an image, new ground-truth mask - training targets in the size of `mask_size x mask_size` - must be provided for each predicted box. This function will be called to - produce such targets. - - Args: - polygons (list[ndarray[float]]): a list of polygons, which represents an instance. - box: 4-element numpy array - mask_size (int): - - Returns: - Tensor: BoolTensor of shape (mask_size, mask_size) - """ - # 1. Shift the polygons w.r.t the boxes - w, h = box[2] - box[0], box[3] - box[1] - - polygons = copy.deepcopy(polygons) - for p in polygons: - p[0::2] = p[0::2] - box[0] - p[1::2] = p[1::2] - box[1] - - # 2. Rescale the polygons to the new box size - # max() to avoid division by small number - ratio_h = mask_size / max(h, 0.1) - ratio_w = mask_size / max(w, 0.1) - - if ratio_h == ratio_w: - for p in polygons: - p *= ratio_h - else: - for p in polygons: - p[0::2] *= ratio_w - p[1::2] *= ratio_h - - # 3. Rasterize the polygons with coco api - mask = polygons_to_bitmask(polygons, mask_size, mask_size) - mask = torch.from_numpy(mask) - return mask - - -class BitMasks: - """ - This class stores the segmentation masks for all objects in one image, in - the form of bitmaps. - - Attributes: - tensor: bool Tensor of N,H,W, representing N instances in the image. - """ - - def __init__(self, tensor: Union[torch.Tensor, np.ndarray]): - """ - Args: - tensor: bool Tensor of N,H,W, representing N instances in the image. - """ - if isinstance(tensor, torch.Tensor): - tensor = tensor.to(torch.bool) - else: - tensor = torch.as_tensor(tensor, dtype=torch.bool, device=torch.device("cpu")) - assert tensor.dim() == 3, tensor.size() - self.image_size = tensor.shape[1:] - self.tensor = tensor - - @torch.jit.unused - def to(self, *args: Any, **kwargs: Any) -> "BitMasks": - return BitMasks(self.tensor.to(*args, **kwargs)) - - @property - def device(self) -> torch.device: - return self.tensor.device - - @torch.jit.unused - def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks": - """ - Returns: - BitMasks: Create a new :class:`BitMasks` by indexing. - - The following usage are allowed: - - 1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask. - 2. `new_masks = masks[2:10]`: return a slice of masks. - 3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor - with `length = len(masks)`. Nonzero elements in the vector will be selected. - - Note that the returned object might share storage with this object, - subject to Pytorch's indexing semantics. - """ - if isinstance(item, int): - return BitMasks(self.tensor[item].unsqueeze(0)) - m = self.tensor[item] - assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format( - item, m.shape - ) - return BitMasks(m) - - @torch.jit.unused - def __iter__(self) -> torch.Tensor: - yield from self.tensor - - @torch.jit.unused - def __repr__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={})".format(len(self.tensor)) - return s - - def __len__(self) -> int: - return self.tensor.shape[0] - - def nonempty(self) -> torch.Tensor: - """ - Find masks that are non-empty. - - Returns: - Tensor: a BoolTensor which represents - whether each mask is empty (False) or non-empty (True). - """ - return self.tensor.flatten(1).any(dim=1) - - @staticmethod - def from_polygon_masks( - polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int - ) -> "BitMasks": - """ - Args: - polygon_masks (list[list[ndarray]] or PolygonMasks) - height, width (int) - """ - if isinstance(polygon_masks, PolygonMasks): - polygon_masks = polygon_masks.polygons - masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks] - if len(masks): - return BitMasks(torch.stack([torch.from_numpy(x) for x in masks])) - else: - return BitMasks(torch.empty(0, height, width, dtype=torch.bool)) - - @staticmethod - def from_roi_masks(roi_masks: "ROIMasks", height: int, width: int) -> "BitMasks": - """ - Args: - roi_masks: - height, width (int): - """ - return roi_masks.to_bitmasks(height, width) - - def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: - """ - Crop each bitmask by the given box, and resize results to (mask_size, mask_size). - This can be used to prepare training targets for Mask R-CNN. - It has less reconstruction error compared to rasterization with polygons. - However we observe no difference in accuracy, - but BitMasks requires more memory to store all the masks. - - Args: - boxes (Tensor): Nx4 tensor storing the boxes for each mask - mask_size (int): the size of the rasterized mask. - - Returns: - Tensor: - A bool tensor of shape (N, mask_size, mask_size), where - N is the number of predicted boxes for this image. - """ - assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) - device = self.tensor.device - - batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None] - rois = torch.cat([batch_inds, boxes], dim=1) # Nx5 - - bit_masks = self.tensor.to(dtype=torch.float32) - rois = rois.to(device=device) - output = ( - ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True) - .forward(bit_masks[:, None, :, :], rois) - .squeeze(1) - ) - output = output >= 0.5 - return output - - def get_bounding_boxes(self) -> Boxes: - """ - Returns: - Boxes: tight bounding boxes around bitmasks. - If a mask is empty, it's bounding box will be all zero. - """ - boxes = torch.zeros(self.tensor.shape[0], 4, dtype=torch.float32) - x_any = torch.any(self.tensor, dim=1) - y_any = torch.any(self.tensor, dim=2) - for idx in range(self.tensor.shape[0]): - x = torch.where(x_any[idx, :])[0] - y = torch.where(y_any[idx, :])[0] - if len(x) > 0 and len(y) > 0: - boxes[idx, :] = torch.as_tensor( - [x[0], y[0], x[-1] + 1, y[-1] + 1], dtype=torch.float32 - ) - return Boxes(boxes) - - @staticmethod - def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks": - """ - Concatenates a list of BitMasks into a single BitMasks - - Arguments: - bitmasks_list (list[BitMasks]) - - Returns: - BitMasks: the concatenated BitMasks - """ - assert isinstance(bitmasks_list, (list, tuple)) - assert len(bitmasks_list) > 0 - assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list) - - cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0)) - return cat_bitmasks - - -class PolygonMasks: - """ - This class stores the segmentation masks for all objects in one image, in the form of polygons. - - Attributes: - polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon. - """ - - def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]): - """ - Arguments: - polygons (list[list[np.ndarray]]): The first - level of the list correspond to individual instances, - the second level to all the polygons that compose the - instance, and the third level to the polygon coordinates. - The third level array should have the format of - [x0, y0, x1, y1, ..., xn, yn] (n >= 3). - """ - if not isinstance(polygons, list): - raise ValueError( - "Cannot create PolygonMasks: Expect a list of list of polygons per image. " - "Got '{}' instead.".format(type(polygons)) - ) - - def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray: - # Use float64 for higher precision, because why not? - # Always put polygons on CPU (self.to is a no-op) since they - # are supposed to be small tensors. - # May need to change this assumption if GPU placement becomes useful - if isinstance(t, torch.Tensor): - t = t.cpu().numpy() - return np.asarray(t).astype("float64") - - def process_polygons( - polygons_per_instance: List[Union[torch.Tensor, np.ndarray]] - ) -> List[np.ndarray]: - if not isinstance(polygons_per_instance, list): - raise ValueError( - "Cannot create polygons: Expect a list of polygons per instance. " - "Got '{}' instead.".format(type(polygons_per_instance)) - ) - # transform each polygon to a numpy array - polygons_per_instance = [_make_array(p) for p in polygons_per_instance] - for polygon in polygons_per_instance: - if len(polygon) % 2 != 0 or len(polygon) < 6: - raise ValueError(f"Cannot create a polygon from {len(polygon)} coordinates.") - return polygons_per_instance - - self.polygons: List[List[np.ndarray]] = [ - process_polygons(polygons_per_instance) for polygons_per_instance in polygons - ] - - def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks": - return self - - @property - def device(self) -> torch.device: - return torch.device("cpu") - - def get_bounding_boxes(self) -> Boxes: - """ - Returns: - Boxes: tight bounding boxes around polygon masks. - """ - boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32) - for idx, polygons_per_instance in enumerate(self.polygons): - minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32) - maxxy = torch.zeros(2, dtype=torch.float32) - for polygon in polygons_per_instance: - coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32) - minxy = torch.min(minxy, torch.min(coords, dim=0).values) - maxxy = torch.max(maxxy, torch.max(coords, dim=0).values) - boxes[idx, :2] = minxy - boxes[idx, 2:] = maxxy - return Boxes(boxes) - - def nonempty(self) -> torch.Tensor: - """ - Find masks that are non-empty. - - Returns: - Tensor: - a BoolTensor which represents whether each mask is empty (False) or not (True). - """ - keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons] - return torch.from_numpy(np.asarray(keep, dtype=bool)) - - def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks": - """ - Support indexing over the instances and return a `PolygonMasks` object. - `item` can be: - - 1. An integer. It will return an object with only one instance. - 2. A slice. It will return an object with the selected instances. - 3. A list[int]. It will return an object with the selected instances, - correpsonding to the indices in the list. - 4. A vector mask of type BoolTensor, whose length is num_instances. - It will return an object with the instances whose mask is nonzero. - """ - if isinstance(item, int): - selected_polygons = [self.polygons[item]] - elif isinstance(item, slice): - selected_polygons = self.polygons[item] - elif isinstance(item, list): - selected_polygons = [self.polygons[i] for i in item] - elif isinstance(item, torch.Tensor): - # Polygons is a list, so we have to move the indices back to CPU. - if item.dtype == torch.bool: - assert item.dim() == 1, item.shape - item = item.nonzero().squeeze(1).cpu().numpy().tolist() - elif item.dtype in [torch.int32, torch.int64]: - item = item.cpu().numpy().tolist() - else: - raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype)) - selected_polygons = [self.polygons[i] for i in item] - return PolygonMasks(selected_polygons) - - def __iter__(self) -> Iterator[List[np.ndarray]]: - """ - Yields: - list[ndarray]: the polygons for one instance. - Each Tensor is a float64 vector representing a polygon. - """ - return iter(self.polygons) - - def __repr__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={})".format(len(self.polygons)) - return s - - def __len__(self) -> int: - return len(self.polygons) - - def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: - """ - Crop each mask by the given box, and resize results to (mask_size, mask_size). - This can be used to prepare training targets for Mask R-CNN. - - Args: - boxes (Tensor): Nx4 tensor storing the boxes for each mask - mask_size (int): the size of the rasterized mask. - - Returns: - Tensor: A bool tensor of shape (N, mask_size, mask_size), where - N is the number of predicted boxes for this image. - """ - assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) - - device = boxes.device - # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise - # (several small tensors for representing a single instance mask) - boxes = boxes.to(torch.device("cpu")) - - results = [ - rasterize_polygons_within_box(poly, box.numpy(), mask_size) - for poly, box in zip(self.polygons, boxes) - ] - """ - poly: list[list[float]], the polygons for one instance - box: a tensor of shape (4,) - """ - if len(results) == 0: - return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device) - return torch.stack(results, dim=0).to(device=device) - - def area(self): - """ - Computes area of the mask. - Only works with Polygons, using the shoelace formula: - https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates - - Returns: - Tensor: a vector, area for each instance - """ - - area = [] - for polygons_per_instance in self.polygons: - area_per_instance = 0 - for p in polygons_per_instance: - area_per_instance += polygon_area(p[0::2], p[1::2]) - area.append(area_per_instance) - - return torch.tensor(area) - - @staticmethod - def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks": - """ - Concatenates a list of PolygonMasks into a single PolygonMasks - - Arguments: - polymasks_list (list[PolygonMasks]) - - Returns: - PolygonMasks: the concatenated PolygonMasks - """ - assert isinstance(polymasks_list, (list, tuple)) - assert len(polymasks_list) > 0 - assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list) - - cat_polymasks = type(polymasks_list[0])( - list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list)) - ) - return cat_polymasks - - -class ROIMasks: - """ - Represent masks by N smaller masks defined in some ROIs. Once ROI boxes are given, - full-image bitmask can be obtained by "pasting" the mask on the region defined - by the corresponding ROI box. - """ - - def __init__(self, tensor: torch.Tensor): - """ - Args: - tensor: (N, M, M) mask tensor that defines the mask within each ROI. - """ - if tensor.dim() != 3: - raise ValueError("ROIMasks must take a masks of 3 dimension.") - self.tensor = tensor - - def to(self, device: torch.device) -> "ROIMasks": - return ROIMasks(self.tensor.to(device)) - - @property - def device(self) -> device: - return self.tensor.device - - def __len__(self): - return self.tensor.shape[0] - - def __getitem__(self, item) -> "ROIMasks": - """ - Returns: - ROIMasks: Create a new :class:`ROIMasks` by indexing. - - The following usage are allowed: - - 1. `new_masks = masks[2:10]`: return a slice of masks. - 2. `new_masks = masks[vector]`, where vector is a torch.BoolTensor - with `length = len(masks)`. Nonzero elements in the vector will be selected. - - Note that the returned object might share storage with this object, - subject to Pytorch's indexing semantics. - """ - t = self.tensor[item] - if t.dim() != 3: - raise ValueError( - f"Indexing on ROIMasks with {item} returns a tensor with shape {t.shape}!" - ) - return ROIMasks(t) - - @torch.jit.unused - def __repr__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={})".format(len(self.tensor)) - return s - - @torch.jit.unused - def to_bitmasks(self, boxes: torch.Tensor, height, width, threshold=0.5): - """ - Args: see documentation of :func:`paste_masks_in_image`. - """ - from detectron2.layers.mask_ops import paste_masks_in_image, _paste_masks_tensor_shape - - if torch.jit.is_tracing(): - if isinstance(height, torch.Tensor): - paste_func = _paste_masks_tensor_shape - else: - paste_func = paste_masks_in_image - else: - paste_func = retry_if_cuda_oom(paste_masks_in_image) - bitmasks = paste_func(self.tensor, boxes.tensor, (height, width), threshold=threshold) - return BitMasks(bitmasks) diff --git a/detectron2/detectron2/structures/rotated_boxes.py b/detectron2/detectron2/structures/rotated_boxes.py deleted file mode 100644 index cb65e22036e40c3545f88711aeff4d6a173d875b..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/structures/rotated_boxes.py +++ /dev/null @@ -1,505 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import math -from typing import List, Tuple -import torch - -from detectron2.layers.rotated_boxes import pairwise_iou_rotated - -from .boxes import Boxes - - -class RotatedBoxes(Boxes): - """ - This structure stores a list of rotated boxes as a Nx5 torch.Tensor. - It supports some common methods about boxes - (`area`, `clip`, `nonempty`, etc), - and also behaves like a Tensor - (support indexing, `to(device)`, `.device`, and iteration over all boxes) - """ - - def __init__(self, tensor: torch.Tensor): - """ - Args: - tensor (Tensor[float]): a Nx5 matrix. Each row is - (x_center, y_center, width, height, angle), - in which angle is represented in degrees. - While there's no strict range restriction for it, - the recommended principal range is between [-180, 180) degrees. - - Assume we have a horizontal box B = (x_center, y_center, width, height), - where width is along the x-axis and height is along the y-axis. - The rotated box B_rot (x_center, y_center, width, height, angle) - can be seen as: - - 1. When angle == 0: - B_rot == B - 2. When angle > 0: - B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CCW; - 3. When angle < 0: - B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CW. - - Mathematically, since the right-handed coordinate system for image space - is (y, x), where y is top->down and x is left->right, the 4 vertices of the - rotated rectangle :math:`(yr_i, xr_i)` (i = 1, 2, 3, 4) can be obtained from - the vertices of the horizontal rectangle :math:`(y_i, x_i)` (i = 1, 2, 3, 4) - in the following way (:math:`\\theta = angle*\\pi/180` is the angle in radians, - :math:`(y_c, x_c)` is the center of the rectangle): - - .. math:: - - yr_i = \\cos(\\theta) (y_i - y_c) - \\sin(\\theta) (x_i - x_c) + y_c, - - xr_i = \\sin(\\theta) (y_i - y_c) + \\cos(\\theta) (x_i - x_c) + x_c, - - which is the standard rigid-body rotation transformation. - - Intuitively, the angle is - (1) the rotation angle from y-axis in image space - to the height vector (top->down in the box's local coordinate system) - of the box in CCW, and - (2) the rotation angle from x-axis in image space - to the width vector (left->right in the box's local coordinate system) - of the box in CCW. - - More intuitively, consider the following horizontal box ABCD represented - in (x1, y1, x2, y2): (3, 2, 7, 4), - covering the [3, 7] x [2, 4] region of the continuous coordinate system - which looks like this: - - .. code:: none - - O--------> x - | - | A---B - | | | - | D---C - | - v y - - Note that each capital letter represents one 0-dimensional geometric point - instead of a 'square pixel' here. - - In the example above, using (x, y) to represent a point we have: - - .. math:: - - O = (0, 0), A = (3, 2), B = (7, 2), C = (7, 4), D = (3, 4) - - We name vector AB = vector DC as the width vector in box's local coordinate system, and - vector AD = vector BC as the height vector in box's local coordinate system. Initially, - when angle = 0 degree, they're aligned with the positive directions of x-axis and y-axis - in the image space, respectively. - - For better illustration, we denote the center of the box as E, - - .. code:: none - - O--------> x - | - | A---B - | | E | - | D---C - | - v y - - where the center E = ((3+7)/2, (2+4)/2) = (5, 3). - - Also, - - .. math:: - - width = |AB| = |CD| = 7 - 3 = 4, - height = |AD| = |BC| = 4 - 2 = 2. - - Therefore, the corresponding representation for the same shape in rotated box in - (x_center, y_center, width, height, angle) format is: - - (5, 3, 4, 2, 0), - - Now, let's consider (5, 3, 4, 2, 90), which is rotated by 90 degrees - CCW (counter-clockwise) by definition. It looks like this: - - .. code:: none - - O--------> x - | B-C - | | | - | |E| - | | | - | A-D - v y - - The center E is still located at the same point (5, 3), while the vertices - ABCD are rotated by 90 degrees CCW with regard to E: - A = (4, 5), B = (4, 1), C = (6, 1), D = (6, 5) - - Here, 90 degrees can be seen as the CCW angle to rotate from y-axis to - vector AD or vector BC (the top->down height vector in box's local coordinate system), - or the CCW angle to rotate from x-axis to vector AB or vector DC (the left->right - width vector in box's local coordinate system). - - .. math:: - - width = |AB| = |CD| = 5 - 1 = 4, - height = |AD| = |BC| = 6 - 4 = 2. - - Next, how about (5, 3, 4, 2, -90), which is rotated by 90 degrees CW (clockwise) - by definition? It looks like this: - - .. code:: none - - O--------> x - | D-A - | | | - | |E| - | | | - | C-B - v y - - The center E is still located at the same point (5, 3), while the vertices - ABCD are rotated by 90 degrees CW with regard to E: - A = (6, 1), B = (6, 5), C = (4, 5), D = (4, 1) - - .. math:: - - width = |AB| = |CD| = 5 - 1 = 4, - height = |AD| = |BC| = 6 - 4 = 2. - - This covers exactly the same region as (5, 3, 4, 2, 90) does, and their IoU - will be 1. However, these two will generate different RoI Pooling results and - should not be treated as an identical box. - - On the other hand, it's easy to see that (X, Y, W, H, A) is identical to - (X, Y, W, H, A+360N), for any integer N. For example (5, 3, 4, 2, 270) would be - identical to (5, 3, 4, 2, -90), because rotating the shape 270 degrees CCW is - equivalent to rotating the same shape 90 degrees CW. - - We could rotate further to get (5, 3, 4, 2, 180), or (5, 3, 4, 2, -180): - - .. code:: none - - O--------> x - | - | C---D - | | E | - | B---A - | - v y - - .. math:: - - A = (7, 4), B = (3, 4), C = (3, 2), D = (7, 2), - - width = |AB| = |CD| = 7 - 3 = 4, - height = |AD| = |BC| = 4 - 2 = 2. - - Finally, this is a very inaccurate (heavily quantized) illustration of - how (5, 3, 4, 2, 60) looks like in case anyone wonders: - - .. code:: none - - O--------> x - | B\ - | / C - | /E / - | A / - | `D - v y - - It's still a rectangle with center of (5, 3), width of 4 and height of 2, - but its angle (and thus orientation) is somewhere between - (5, 3, 4, 2, 0) and (5, 3, 4, 2, 90). - """ - device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") - tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device) - if tensor.numel() == 0: - # Use reshape, so we don't end up creating a new tensor that does not depend on - # the inputs (and consequently confuses jit) - tensor = tensor.reshape((0, 5)).to(dtype=torch.float32, device=device) - assert tensor.dim() == 2 and tensor.size(-1) == 5, tensor.size() - - self.tensor = tensor - - def clone(self) -> "RotatedBoxes": - """ - Clone the RotatedBoxes. - - Returns: - RotatedBoxes - """ - return RotatedBoxes(self.tensor.clone()) - - def to(self, device: torch.device, non_blocking: bool = False): - # Boxes are assumed float32 and does not support to(dtype) - return RotatedBoxes(self.tensor.to(device=device, non_blocking=non_blocking)) - - def area(self) -> torch.Tensor: - """ - Computes the area of all the boxes. - - Returns: - torch.Tensor: a vector with areas of each box. - """ - box = self.tensor - area = box[:, 2] * box[:, 3] - return area - - # Avoid in-place operations so that we can torchscript; NOTE: this creates a new tensor - def normalize_angles(self) -> None: - """ - Restrict angles to the range of [-180, 180) degrees - """ - angle_tensor = (self.tensor[:, 4] + 180.0) % 360.0 - 180.0 - self.tensor = torch.cat((self.tensor[:, :4], angle_tensor[:, None]), dim=1) - - def clip(self, box_size: Tuple[int, int], clip_angle_threshold: float = 1.0) -> None: - """ - Clip (in place) the boxes by limiting x coordinates to the range [0, width] - and y coordinates to the range [0, height]. - - For RRPN: - Only clip boxes that are almost horizontal with a tolerance of - clip_angle_threshold to maintain backward compatibility. - - Rotated boxes beyond this threshold are not clipped for two reasons: - - 1. There are potentially multiple ways to clip a rotated box to make it - fit within the image. - 2. It's tricky to make the entire rectangular box fit within the image - and still be able to not leave out pixels of interest. - - Therefore we rely on ops like RoIAlignRotated to safely handle this. - - Args: - box_size (height, width): The clipping box's size. - clip_angle_threshold: - Iff. abs(normalized(angle)) <= clip_angle_threshold (in degrees), - we do the clipping as horizontal boxes. - """ - h, w = box_size - - # normalize angles to be within (-180, 180] degrees - self.normalize_angles() - - idx = torch.where(torch.abs(self.tensor[:, 4]) <= clip_angle_threshold)[0] - - # convert to (x1, y1, x2, y2) - x1 = self.tensor[idx, 0] - self.tensor[idx, 2] / 2.0 - y1 = self.tensor[idx, 1] - self.tensor[idx, 3] / 2.0 - x2 = self.tensor[idx, 0] + self.tensor[idx, 2] / 2.0 - y2 = self.tensor[idx, 1] + self.tensor[idx, 3] / 2.0 - - # clip - x1.clamp_(min=0, max=w) - y1.clamp_(min=0, max=h) - x2.clamp_(min=0, max=w) - y2.clamp_(min=0, max=h) - - # convert back to (xc, yc, w, h) - self.tensor[idx, 0] = (x1 + x2) / 2.0 - self.tensor[idx, 1] = (y1 + y2) / 2.0 - # make sure widths and heights do not increase due to numerical errors - self.tensor[idx, 2] = torch.min(self.tensor[idx, 2], x2 - x1) - self.tensor[idx, 3] = torch.min(self.tensor[idx, 3], y2 - y1) - - def nonempty(self, threshold: float = 0.0) -> torch.Tensor: - """ - Find boxes that are non-empty. - A box is considered empty, if either of its side is no larger than threshold. - - Returns: - Tensor: a binary vector which represents - whether each box is empty (False) or non-empty (True). - """ - box = self.tensor - widths = box[:, 2] - heights = box[:, 3] - keep = (widths > threshold) & (heights > threshold) - return keep - - def __getitem__(self, item) -> "RotatedBoxes": - """ - Returns: - RotatedBoxes: Create a new :class:`RotatedBoxes` by indexing. - - The following usage are allowed: - - 1. `new_boxes = boxes[3]`: return a `RotatedBoxes` which contains only one box. - 2. `new_boxes = boxes[2:10]`: return a slice of boxes. - 3. `new_boxes = boxes[vector]`, where vector is a torch.ByteTensor - with `length = len(boxes)`. Nonzero elements in the vector will be selected. - - Note that the returned RotatedBoxes might share storage with this RotatedBoxes, - subject to Pytorch's indexing semantics. - """ - if isinstance(item, int): - return RotatedBoxes(self.tensor[item].view(1, -1)) - b = self.tensor[item] - assert b.dim() == 2, "Indexing on RotatedBoxes with {} failed to return a matrix!".format( - item - ) - return RotatedBoxes(b) - - def __len__(self) -> int: - return self.tensor.shape[0] - - def __repr__(self) -> str: - return "RotatedBoxes(" + str(self.tensor) + ")" - - def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor: - """ - Args: - box_size (height, width): Size of the reference box covering - [0, width] x [0, height] - boundary_threshold (int): Boxes that extend beyond the reference box - boundary by more than boundary_threshold are considered "outside". - - For RRPN, it might not be necessary to call this function since it's common - for rotated box to extend to outside of the image boundaries - (the clip function only clips the near-horizontal boxes) - - Returns: - a binary vector, indicating whether each box is inside the reference box. - """ - height, width = box_size - - cnt_x = self.tensor[..., 0] - cnt_y = self.tensor[..., 1] - half_w = self.tensor[..., 2] / 2.0 - half_h = self.tensor[..., 3] / 2.0 - a = self.tensor[..., 4] - c = torch.abs(torch.cos(a * math.pi / 180.0)) - s = torch.abs(torch.sin(a * math.pi / 180.0)) - # This basically computes the horizontal bounding rectangle of the rotated box - max_rect_dx = c * half_w + s * half_h - max_rect_dy = c * half_h + s * half_w - - inds_inside = ( - (cnt_x - max_rect_dx >= -boundary_threshold) - & (cnt_y - max_rect_dy >= -boundary_threshold) - & (cnt_x + max_rect_dx < width + boundary_threshold) - & (cnt_y + max_rect_dy < height + boundary_threshold) - ) - - return inds_inside - - def get_centers(self) -> torch.Tensor: - """ - Returns: - The box centers in a Nx2 array of (x, y). - """ - return self.tensor[:, :2] - - def scale(self, scale_x: float, scale_y: float) -> None: - """ - Scale the rotated box with horizontal and vertical scaling factors - Note: when scale_factor_x != scale_factor_y, - the rotated box does not preserve the rectangular shape when the angle - is not a multiple of 90 degrees under resize transformation. - Instead, the shape is a parallelogram (that has skew) - Here we make an approximation by fitting a rotated rectangle to the parallelogram. - """ - self.tensor[:, 0] *= scale_x - self.tensor[:, 1] *= scale_y - theta = self.tensor[:, 4] * math.pi / 180.0 - c = torch.cos(theta) - s = torch.sin(theta) - - # In image space, y is top->down and x is left->right - # Consider the local coordintate system for the rotated box, - # where the box center is located at (0, 0), and the four vertices ABCD are - # A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2) - # the midpoint of the left edge AD of the rotated box E is: - # E = (A+D)/2 = (-w / 2, 0) - # the midpoint of the top edge AB of the rotated box F is: - # F(0, -h / 2) - # To get the old coordinates in the global system, apply the rotation transformation - # (Note: the right-handed coordinate system for image space is yOx): - # (old_x, old_y) = (s * y + c * x, c * y - s * x) - # E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2) - # F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2) - # After applying the scaling factor (sfx, sfy): - # E(new) = (-sfx * c * w / 2, sfy * s * w / 2) - # F(new) = (-sfx * s * h / 2, -sfy * c * h / 2) - # The new width after scaling tranformation becomes: - - # w(new) = |E(new) - O| * 2 - # = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2 - # = sqrt[(sfx * c)^2 + (sfy * s)^2] * w - # i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2] - # - # For example, - # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x; - # when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y - self.tensor[:, 2] *= torch.sqrt((scale_x * c) ** 2 + (scale_y * s) ** 2) - - # h(new) = |F(new) - O| * 2 - # = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2 - # = sqrt[(sfx * s)^2 + (sfy * c)^2] * h - # i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2] - # - # For example, - # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y; - # when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x - self.tensor[:, 3] *= torch.sqrt((scale_x * s) ** 2 + (scale_y * c) ** 2) - - # The angle is the rotation angle from y-axis in image space to the height - # vector (top->down in the box's local coordinate system) of the box in CCW. - # - # angle(new) = angle_yOx(O - F(new)) - # = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) ) - # = atan2(sfx * s * h / 2, sfy * c * h / 2) - # = atan2(sfx * s, sfy * c) - # - # For example, - # when sfx == sfy, angle(new) == atan2(s, c) == angle(old) - self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi - - @classmethod - def cat(cls, boxes_list: List["RotatedBoxes"]) -> "RotatedBoxes": - """ - Concatenates a list of RotatedBoxes into a single RotatedBoxes - - Arguments: - boxes_list (list[RotatedBoxes]) - - Returns: - RotatedBoxes: the concatenated RotatedBoxes - """ - assert isinstance(boxes_list, (list, tuple)) - if len(boxes_list) == 0: - return cls(torch.empty(0)) - assert all([isinstance(box, RotatedBoxes) for box in boxes_list]) - - # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input - cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0)) - return cat_boxes - - @property - def device(self) -> torch.device: - return self.tensor.device - - @torch.jit.unused - def __iter__(self): - """ - Yield a box as a Tensor of shape (5,) at a time. - """ - yield from self.tensor - - -def pairwise_iou(boxes1: RotatedBoxes, boxes2: RotatedBoxes) -> None: - """ - Given two lists of rotated boxes of size N and M, - compute the IoU (intersection over union) - between **all** N x M pairs of boxes. - The box order must be (x_center, y_center, width, height, angle). - - Args: - boxes1, boxes2 (RotatedBoxes): - two `RotatedBoxes`. Contains N & M rotated boxes, respectively. - - Returns: - Tensor: IoU, sized [N,M]. - """ - - return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor) diff --git a/detectron2/detectron2/tracking/__init__.py b/detectron2/detectron2/tracking/__init__.py deleted file mode 100644 index 21078ae822b04b71dbd8b056b5993d173eaf6bff..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/tracking/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .base_tracker import ( # noqa - BaseTracker, - build_tracker_head, - TRACKER_HEADS_REGISTRY, -) -from .bbox_iou_tracker import BBoxIOUTracker # noqa -from .hungarian_tracker import BaseHungarianTracker # noqa -from .iou_weighted_hungarian_bbox_iou_tracker import ( # noqa - IOUWeightedHungarianBBoxIOUTracker, -) -from .utils import create_prediction_pairs # noqa -from .vanilla_hungarian_bbox_iou_tracker import VanillaHungarianBBoxIOUTracker # noqa - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/detectron2/detectron2/tracking/base_tracker.py b/detectron2/detectron2/tracking/base_tracker.py deleted file mode 100644 index f2f20455c1841324292e9b9d8f42669c8ba61825..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/tracking/base_tracker.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2004-present Facebook. All Rights Reserved. -from detectron2.config import configurable -from detectron2.utils.registry import Registry - -from ..config.config import CfgNode as CfgNode_ -from ..structures import Instances - -TRACKER_HEADS_REGISTRY = Registry("TRACKER_HEADS") -TRACKER_HEADS_REGISTRY.__doc__ = """ -Registry for tracking classes. -""" - - -class BaseTracker: - """ - A parent class for all trackers - """ - - @configurable - def __init__(self, **kwargs): - self._prev_instances = None # (D2)instances for previous frame - self._matched_idx = set() # indices in prev_instances found matching - self._matched_ID = set() # idendities in prev_instances found matching - self._untracked_prev_idx = set() # indices in prev_instances not found matching - self._id_count = 0 # used to assign new id - - @classmethod - def from_config(cls, cfg: CfgNode_): - raise NotImplementedError("Calling BaseTracker::from_config") - - def update(self, predictions: Instances) -> Instances: - """ - Args: - predictions: D2 Instances for predictions of the current frame - Return: - D2 Instances for predictions of the current frame with ID assigned - - _prev_instances and instances will have the following fields: - .pred_boxes (shape=[N, 4]) - .scores (shape=[N,]) - .pred_classes (shape=[N,]) - .pred_keypoints (shape=[N, M, 3], Optional) - .pred_masks (shape=List[2D_MASK], Optional) 2D_MASK: shape=[H, W] - .ID (shape=[N,]) - - N: # of detected bboxes - H and W: height and width of 2D mask - """ - raise NotImplementedError("Calling BaseTracker::update") - - -def build_tracker_head(cfg: CfgNode_) -> BaseTracker: - """ - Build a tracker head from `cfg.TRACKER_HEADS.TRACKER_NAME`. - - Args: - cfg: D2 CfgNode, config file with tracker information - Return: - tracker object - """ - name = cfg.TRACKER_HEADS.TRACKER_NAME - tracker_class = TRACKER_HEADS_REGISTRY.get(name) - return tracker_class(cfg) diff --git a/detectron2/detectron2/tracking/bbox_iou_tracker.py b/detectron2/detectron2/tracking/bbox_iou_tracker.py deleted file mode 100644 index 598081cb542ce64dd1d100c0d3e12a59f57b8e0e..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/tracking/bbox_iou_tracker.py +++ /dev/null @@ -1,276 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2004-present Facebook. All Rights Reserved. -import copy -import numpy as np -from typing import List -import torch - -from detectron2.config import configurable -from detectron2.structures import Boxes, Instances -from detectron2.structures.boxes import pairwise_iou - -from ..config.config import CfgNode as CfgNode_ -from .base_tracker import TRACKER_HEADS_REGISTRY, BaseTracker - - -@TRACKER_HEADS_REGISTRY.register() -class BBoxIOUTracker(BaseTracker): - """ - A bounding box tracker to assign ID based on IoU between current and previous instances - """ - - @configurable - def __init__( - self, - *, - video_height: int, - video_width: int, - max_num_instances: int = 200, - max_lost_frame_count: int = 0, - min_box_rel_dim: float = 0.02, - min_instance_period: int = 1, - track_iou_threshold: float = 0.5, - **kwargs, - ): - """ - Args: - video_height: height the video frame - video_width: width of the video frame - max_num_instances: maximum number of id allowed to be tracked - max_lost_frame_count: maximum number of frame an id can lost tracking - exceed this number, an id is considered as lost - forever - min_box_rel_dim: a percentage, smaller than this dimension, a bbox is - removed from tracking - min_instance_period: an instance will be shown after this number of period - since its first showing up in the video - track_iou_threshold: iou threshold, below this number a bbox pair is removed - from tracking - """ - super().__init__(**kwargs) - self._video_height = video_height - self._video_width = video_width - self._max_num_instances = max_num_instances - self._max_lost_frame_count = max_lost_frame_count - self._min_box_rel_dim = min_box_rel_dim - self._min_instance_period = min_instance_period - self._track_iou_threshold = track_iou_threshold - - @classmethod - def from_config(cls, cfg: CfgNode_): - """ - Old style initialization using CfgNode - - Args: - cfg: D2 CfgNode, config file - Return: - dictionary storing arguments for __init__ method - """ - assert "VIDEO_HEIGHT" in cfg.TRACKER_HEADS - assert "VIDEO_WIDTH" in cfg.TRACKER_HEADS - video_height = cfg.TRACKER_HEADS.get("VIDEO_HEIGHT") - video_width = cfg.TRACKER_HEADS.get("VIDEO_WIDTH") - max_num_instances = cfg.TRACKER_HEADS.get("MAX_NUM_INSTANCES", 200) - max_lost_frame_count = cfg.TRACKER_HEADS.get("MAX_LOST_FRAME_COUNT", 0) - min_box_rel_dim = cfg.TRACKER_HEADS.get("MIN_BOX_REL_DIM", 0.02) - min_instance_period = cfg.TRACKER_HEADS.get("MIN_INSTANCE_PERIOD", 1) - track_iou_threshold = cfg.TRACKER_HEADS.get("TRACK_IOU_THRESHOLD", 0.5) - return { - "_target_": "detectron2.tracking.bbox_iou_tracker.BBoxIOUTracker", - "video_height": video_height, - "video_width": video_width, - "max_num_instances": max_num_instances, - "max_lost_frame_count": max_lost_frame_count, - "min_box_rel_dim": min_box_rel_dim, - "min_instance_period": min_instance_period, - "track_iou_threshold": track_iou_threshold, - } - - def update(self, instances: Instances) -> Instances: - """ - See BaseTracker description - """ - instances = self._initialize_extra_fields(instances) - if self._prev_instances is not None: - # calculate IoU of all bbox pairs - iou_all = pairwise_iou( - boxes1=instances.pred_boxes, - boxes2=self._prev_instances.pred_boxes, - ) - # sort IoU in descending order - bbox_pairs = self._create_prediction_pairs(instances, iou_all) - # assign previous ID to current bbox if IoU > track_iou_threshold - self._reset_fields() - for bbox_pair in bbox_pairs: - idx = bbox_pair["idx"] - prev_id = bbox_pair["prev_id"] - if ( - idx in self._matched_idx - or prev_id in self._matched_ID - or bbox_pair["IoU"] < self._track_iou_threshold - ): - continue - instances.ID[idx] = prev_id - instances.ID_period[idx] = bbox_pair["prev_period"] + 1 - instances.lost_frame_count[idx] = 0 - self._matched_idx.add(idx) - self._matched_ID.add(prev_id) - self._untracked_prev_idx.remove(bbox_pair["prev_idx"]) - instances = self._assign_new_id(instances) - instances = self._merge_untracked_instances(instances) - self._prev_instances = copy.deepcopy(instances) - return instances - - def _create_prediction_pairs(self, instances: Instances, iou_all: np.ndarray) -> List: - """ - For all instances in previous and current frames, create pairs. For each - pair, store index of the instance in current frame predcitions, index in - previous predictions, ID in previous predictions, IoU of the bboxes in this - pair, period in previous predictions. - - Args: - instances: D2 Instances, for predictions of the current frame - iou_all: IoU for all bboxes pairs - Return: - A list of IoU for all pairs - """ - bbox_pairs = [] - for i in range(len(instances)): - for j in range(len(self._prev_instances)): - bbox_pairs.append( - { - "idx": i, - "prev_idx": j, - "prev_id": self._prev_instances.ID[j], - "IoU": iou_all[i, j], - "prev_period": self._prev_instances.ID_period[j], - } - ) - return bbox_pairs - - def _initialize_extra_fields(self, instances: Instances) -> Instances: - """ - If input instances don't have ID, ID_period, lost_frame_count fields, - this method is used to initialize these fields. - - Args: - instances: D2 Instances, for predictions of the current frame - Return: - D2 Instances with extra fields added - """ - if not instances.has("ID"): - instances.set("ID", [None] * len(instances)) - if not instances.has("ID_period"): - instances.set("ID_period", [None] * len(instances)) - if not instances.has("lost_frame_count"): - instances.set("lost_frame_count", [None] * len(instances)) - if self._prev_instances is None: - instances.ID = list(range(len(instances))) - self._id_count += len(instances) - instances.ID_period = [1] * len(instances) - instances.lost_frame_count = [0] * len(instances) - return instances - - def _reset_fields(self): - """ - Before each uodate call, reset fields first - """ - self._matched_idx = set() - self._matched_ID = set() - self._untracked_prev_idx = set(range(len(self._prev_instances))) - - def _assign_new_id(self, instances: Instances) -> Instances: - """ - For each untracked instance, assign a new id - - Args: - instances: D2 Instances, for predictions of the current frame - Return: - D2 Instances with new ID assigned - """ - untracked_idx = set(range(len(instances))).difference(self._matched_idx) - for idx in untracked_idx: - instances.ID[idx] = self._id_count - self._id_count += 1 - instances.ID_period[idx] = 1 - instances.lost_frame_count[idx] = 0 - return instances - - def _merge_untracked_instances(self, instances: Instances) -> Instances: - """ - For untracked previous instances, under certain condition, still keep them - in tracking and merge with the current instances. - - Args: - instances: D2 Instances, for predictions of the current frame - Return: - D2 Instances merging current instances and instances from previous - frame decided to keep tracking - """ - untracked_instances = Instances( - image_size=instances.image_size, - pred_boxes=[], - pred_classes=[], - scores=[], - ID=[], - ID_period=[], - lost_frame_count=[], - ) - prev_bboxes = list(self._prev_instances.pred_boxes) - prev_classes = list(self._prev_instances.pred_classes) - prev_scores = list(self._prev_instances.scores) - prev_ID_period = self._prev_instances.ID_period - if instances.has("pred_masks"): - untracked_instances.set("pred_masks", []) - prev_masks = list(self._prev_instances.pred_masks) - if instances.has("pred_keypoints"): - untracked_instances.set("pred_keypoints", []) - prev_keypoints = list(self._prev_instances.pred_keypoints) - if instances.has("pred_keypoint_heatmaps"): - untracked_instances.set("pred_keypoint_heatmaps", []) - prev_keypoint_heatmaps = list(self._prev_instances.pred_keypoint_heatmaps) - for idx in self._untracked_prev_idx: - x_left, y_top, x_right, y_bot = prev_bboxes[idx] - if ( - (1.0 * (x_right - x_left) / self._video_width < self._min_box_rel_dim) - or (1.0 * (y_bot - y_top) / self._video_height < self._min_box_rel_dim) - or self._prev_instances.lost_frame_count[idx] >= self._max_lost_frame_count - or prev_ID_period[idx] <= self._min_instance_period - ): - continue - untracked_instances.pred_boxes.append(list(prev_bboxes[idx].numpy())) - untracked_instances.pred_classes.append(int(prev_classes[idx])) - untracked_instances.scores.append(float(prev_scores[idx])) - untracked_instances.ID.append(self._prev_instances.ID[idx]) - untracked_instances.ID_period.append(self._prev_instances.ID_period[idx]) - untracked_instances.lost_frame_count.append( - self._prev_instances.lost_frame_count[idx] + 1 - ) - if instances.has("pred_masks"): - untracked_instances.pred_masks.append(prev_masks[idx].numpy().astype(np.uint8)) - if instances.has("pred_keypoints"): - untracked_instances.pred_keypoints.append( - prev_keypoints[idx].numpy().astype(np.uint8) - ) - if instances.has("pred_keypoint_heatmaps"): - untracked_instances.pred_keypoint_heatmaps.append( - prev_keypoint_heatmaps[idx].numpy().astype(np.float32) - ) - untracked_instances.pred_boxes = Boxes(torch.FloatTensor(untracked_instances.pred_boxes)) - untracked_instances.pred_classes = torch.IntTensor(untracked_instances.pred_classes) - untracked_instances.scores = torch.FloatTensor(untracked_instances.scores) - if instances.has("pred_masks"): - untracked_instances.pred_masks = torch.IntTensor(untracked_instances.pred_masks) - if instances.has("pred_keypoints"): - untracked_instances.pred_keypoints = torch.IntTensor(untracked_instances.pred_keypoints) - if instances.has("pred_keypoint_heatmaps"): - untracked_instances.pred_keypoint_heatmaps = torch.FloatTensor( - untracked_instances.pred_keypoint_heatmaps - ) - - return Instances.cat( - [ - instances, - untracked_instances, - ] - ) diff --git a/detectron2/detectron2/tracking/hungarian_tracker.py b/detectron2/detectron2/tracking/hungarian_tracker.py deleted file mode 100644 index 5b3ce884d80d9cdc2e0da07194693dd1bf16dd61..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/tracking/hungarian_tracker.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2004-present Facebook. All Rights Reserved. -import copy -import numpy as np -from typing import Dict -import torch -from scipy.optimize import linear_sum_assignment - -from detectron2.config import configurable -from detectron2.structures import Boxes, Instances - -from ..config.config import CfgNode as CfgNode_ -from .base_tracker import BaseTracker - - -class BaseHungarianTracker(BaseTracker): - """ - A base class for all Hungarian trackers - """ - - @configurable - def __init__( - self, - video_height: int, - video_width: int, - max_num_instances: int = 200, - max_lost_frame_count: int = 0, - min_box_rel_dim: float = 0.02, - min_instance_period: int = 1, - **kwargs - ): - """ - Args: - video_height: height the video frame - video_width: width of the video frame - max_num_instances: maximum number of id allowed to be tracked - max_lost_frame_count: maximum number of frame an id can lost tracking - exceed this number, an id is considered as lost - forever - min_box_rel_dim: a percentage, smaller than this dimension, a bbox is - removed from tracking - min_instance_period: an instance will be shown after this number of period - since its first showing up in the video - """ - super().__init__(**kwargs) - self._video_height = video_height - self._video_width = video_width - self._max_num_instances = max_num_instances - self._max_lost_frame_count = max_lost_frame_count - self._min_box_rel_dim = min_box_rel_dim - self._min_instance_period = min_instance_period - - @classmethod - def from_config(cls, cfg: CfgNode_) -> Dict: - raise NotImplementedError("Calling HungarianTracker::from_config") - - def build_cost_matrix(self, instances: Instances, prev_instances: Instances) -> np.ndarray: - raise NotImplementedError("Calling HungarianTracker::build_matrix") - - def update(self, instances: Instances) -> Instances: - if instances.has("pred_keypoints"): - raise NotImplementedError("Need to add support for keypoints") - instances = self._initialize_extra_fields(instances) - if self._prev_instances is not None: - self._untracked_prev_idx = set(range(len(self._prev_instances))) - cost_matrix = self.build_cost_matrix(instances, self._prev_instances) - matched_idx, matched_prev_idx = linear_sum_assignment(cost_matrix) - instances = self._process_matched_idx(instances, matched_idx, matched_prev_idx) - instances = self._process_unmatched_idx(instances, matched_idx) - instances = self._process_unmatched_prev_idx(instances, matched_prev_idx) - self._prev_instances = copy.deepcopy(instances) - return instances - - def _initialize_extra_fields(self, instances: Instances) -> Instances: - """ - If input instances don't have ID, ID_period, lost_frame_count fields, - this method is used to initialize these fields. - - Args: - instances: D2 Instances, for predictions of the current frame - Return: - D2 Instances with extra fields added - """ - if not instances.has("ID"): - instances.set("ID", [None] * len(instances)) - if not instances.has("ID_period"): - instances.set("ID_period", [None] * len(instances)) - if not instances.has("lost_frame_count"): - instances.set("lost_frame_count", [None] * len(instances)) - if self._prev_instances is None: - instances.ID = list(range(len(instances))) - self._id_count += len(instances) - instances.ID_period = [1] * len(instances) - instances.lost_frame_count = [0] * len(instances) - return instances - - def _process_matched_idx( - self, instances: Instances, matched_idx: np.ndarray, matched_prev_idx: np.ndarray - ) -> Instances: - assert matched_idx.size == matched_prev_idx.size - for i in range(matched_idx.size): - instances.ID[matched_idx[i]] = self._prev_instances.ID[matched_prev_idx[i]] - instances.ID_period[matched_idx[i]] = ( - self._prev_instances.ID_period[matched_prev_idx[i]] + 1 - ) - instances.lost_frame_count[matched_idx[i]] = 0 - return instances - - def _process_unmatched_idx(self, instances: Instances, matched_idx: np.ndarray) -> Instances: - untracked_idx = set(range(len(instances))).difference(set(matched_idx)) - for idx in untracked_idx: - instances.ID[idx] = self._id_count - self._id_count += 1 - instances.ID_period[idx] = 1 - instances.lost_frame_count[idx] = 0 - return instances - - def _process_unmatched_prev_idx( - self, instances: Instances, matched_prev_idx: np.ndarray - ) -> Instances: - untracked_instances = Instances( - image_size=instances.image_size, - pred_boxes=[], - pred_masks=[], - pred_classes=[], - scores=[], - ID=[], - ID_period=[], - lost_frame_count=[], - ) - prev_bboxes = list(self._prev_instances.pred_boxes) - prev_classes = list(self._prev_instances.pred_classes) - prev_scores = list(self._prev_instances.scores) - prev_ID_period = self._prev_instances.ID_period - if instances.has("pred_masks"): - prev_masks = list(self._prev_instances.pred_masks) - untracked_prev_idx = set(range(len(self._prev_instances))).difference(set(matched_prev_idx)) - for idx in untracked_prev_idx: - x_left, y_top, x_right, y_bot = prev_bboxes[idx] - if ( - (1.0 * (x_right - x_left) / self._video_width < self._min_box_rel_dim) - or (1.0 * (y_bot - y_top) / self._video_height < self._min_box_rel_dim) - or self._prev_instances.lost_frame_count[idx] >= self._max_lost_frame_count - or prev_ID_period[idx] <= self._min_instance_period - ): - continue - untracked_instances.pred_boxes.append(list(prev_bboxes[idx].numpy())) - untracked_instances.pred_classes.append(int(prev_classes[idx])) - untracked_instances.scores.append(float(prev_scores[idx])) - untracked_instances.ID.append(self._prev_instances.ID[idx]) - untracked_instances.ID_period.append(self._prev_instances.ID_period[idx]) - untracked_instances.lost_frame_count.append( - self._prev_instances.lost_frame_count[idx] + 1 - ) - if instances.has("pred_masks"): - untracked_instances.pred_masks.append(prev_masks[idx].numpy().astype(np.uint8)) - - untracked_instances.pred_boxes = Boxes(torch.FloatTensor(untracked_instances.pred_boxes)) - untracked_instances.pred_classes = torch.IntTensor(untracked_instances.pred_classes) - untracked_instances.scores = torch.FloatTensor(untracked_instances.scores) - if instances.has("pred_masks"): - untracked_instances.pred_masks = torch.IntTensor(untracked_instances.pred_masks) - else: - untracked_instances.remove("pred_masks") - - return Instances.cat( - [ - instances, - untracked_instances, - ] - ) diff --git a/detectron2/detectron2/tracking/iou_weighted_hungarian_bbox_iou_tracker.py b/detectron2/detectron2/tracking/iou_weighted_hungarian_bbox_iou_tracker.py deleted file mode 100644 index b3b4d1c5663fb49b2fc40752d6b7a42eddd58e75..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/tracking/iou_weighted_hungarian_bbox_iou_tracker.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2004-present Facebook. All Rights Reserved. - -import numpy as np -from typing import List - -from detectron2.config import CfgNode as CfgNode_ -from detectron2.config import configurable - -from .base_tracker import TRACKER_HEADS_REGISTRY -from .vanilla_hungarian_bbox_iou_tracker import VanillaHungarianBBoxIOUTracker - - -@TRACKER_HEADS_REGISTRY.register() -class IOUWeightedHungarianBBoxIOUTracker(VanillaHungarianBBoxIOUTracker): - """ - A tracker using IoU as weight in Hungarian algorithm, also known - as Munkres or Kuhn-Munkres algorithm - """ - - @configurable - def __init__( - self, - *, - video_height: int, - video_width: int, - max_num_instances: int = 200, - max_lost_frame_count: int = 0, - min_box_rel_dim: float = 0.02, - min_instance_period: int = 1, - track_iou_threshold: float = 0.5, - **kwargs, - ): - """ - Args: - video_height: height the video frame - video_width: width of the video frame - max_num_instances: maximum number of id allowed to be tracked - max_lost_frame_count: maximum number of frame an id can lost tracking - exceed this number, an id is considered as lost - forever - min_box_rel_dim: a percentage, smaller than this dimension, a bbox is - removed from tracking - min_instance_period: an instance will be shown after this number of period - since its first showing up in the video - track_iou_threshold: iou threshold, below this number a bbox pair is removed - from tracking - """ - super().__init__( - video_height=video_height, - video_width=video_width, - max_num_instances=max_num_instances, - max_lost_frame_count=max_lost_frame_count, - min_box_rel_dim=min_box_rel_dim, - min_instance_period=min_instance_period, - track_iou_threshold=track_iou_threshold, - ) - - @classmethod - def from_config(cls, cfg: CfgNode_): - """ - Old style initialization using CfgNode - - Args: - cfg: D2 CfgNode, config file - Return: - dictionary storing arguments for __init__ method - """ - assert "VIDEO_HEIGHT" in cfg.TRACKER_HEADS - assert "VIDEO_WIDTH" in cfg.TRACKER_HEADS - video_height = cfg.TRACKER_HEADS.get("VIDEO_HEIGHT") - video_width = cfg.TRACKER_HEADS.get("VIDEO_WIDTH") - max_num_instances = cfg.TRACKER_HEADS.get("MAX_NUM_INSTANCES", 200) - max_lost_frame_count = cfg.TRACKER_HEADS.get("MAX_LOST_FRAME_COUNT", 0) - min_box_rel_dim = cfg.TRACKER_HEADS.get("MIN_BOX_REL_DIM", 0.02) - min_instance_period = cfg.TRACKER_HEADS.get("MIN_INSTANCE_PERIOD", 1) - track_iou_threshold = cfg.TRACKER_HEADS.get("TRACK_IOU_THRESHOLD", 0.5) - return { - "_target_": "detectron2.tracking.iou_weighted_hungarian_bbox_iou_tracker.IOUWeightedHungarianBBoxIOUTracker", # noqa - "video_height": video_height, - "video_width": video_width, - "max_num_instances": max_num_instances, - "max_lost_frame_count": max_lost_frame_count, - "min_box_rel_dim": min_box_rel_dim, - "min_instance_period": min_instance_period, - "track_iou_threshold": track_iou_threshold, - } - - def assign_cost_matrix_values(self, cost_matrix: np.ndarray, bbox_pairs: List) -> np.ndarray: - """ - Based on IoU for each pair of bbox, assign the associated value in cost matrix - - Args: - cost_matrix: np.ndarray, initialized 2D array with target dimensions - bbox_pairs: list of bbox pair, in each pair, iou value is stored - Return: - np.ndarray, cost_matrix with assigned values - """ - for pair in bbox_pairs: - # assign (-1 * IoU) for above threshold pairs, algorithms will minimize cost - cost_matrix[pair["idx"]][pair["prev_idx"]] = -1 * pair["IoU"] - return cost_matrix diff --git a/detectron2/detectron2/tracking/utils.py b/detectron2/detectron2/tracking/utils.py deleted file mode 100644 index 92634c5cfe0c18eda00ce6c8bfe767ed20470a80..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/tracking/utils.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 -import numpy as np -from typing import List - -from detectron2.structures import Instances - - -def create_prediction_pairs( - instances: Instances, - prev_instances: Instances, - iou_all: np.ndarray, - threshold: float = 0.5, -) -> List: - """ - Args: - instances: predictions from current frame - prev_instances: predictions from previous frame - iou_all: 2D numpy array containing iou for each bbox pair - threshold: below the threshold, doesn't consider the pair of bbox is valid - Return: - List of bbox pairs - """ - bbox_pairs = [] - for i in range(len(instances)): - for j in range(len(prev_instances)): - if iou_all[i, j] < threshold: - continue - bbox_pairs.append( - { - "idx": i, - "prev_idx": j, - "prev_id": prev_instances.ID[j], - "IoU": iou_all[i, j], - "prev_period": prev_instances.ID_period[j], - } - ) - return bbox_pairs - - -LARGE_COST_VALUE = 100000 diff --git a/detectron2/detectron2/tracking/vanilla_hungarian_bbox_iou_tracker.py b/detectron2/detectron2/tracking/vanilla_hungarian_bbox_iou_tracker.py deleted file mode 100644 index 5629f7383adcafeaa1ebdae1f38f968437149652..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/tracking/vanilla_hungarian_bbox_iou_tracker.py +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2004-present Facebook. All Rights Reserved. - -import numpy as np -from typing import List - -from detectron2.config import CfgNode as CfgNode_ -from detectron2.config import configurable -from detectron2.structures import Instances -from detectron2.structures.boxes import pairwise_iou -from detectron2.tracking.utils import LARGE_COST_VALUE, create_prediction_pairs - -from .base_tracker import TRACKER_HEADS_REGISTRY -from .hungarian_tracker import BaseHungarianTracker - - -@TRACKER_HEADS_REGISTRY.register() -class VanillaHungarianBBoxIOUTracker(BaseHungarianTracker): - """ - Hungarian algo based tracker using bbox iou as metric - """ - - @configurable - def __init__( - self, - *, - video_height: int, - video_width: int, - max_num_instances: int = 200, - max_lost_frame_count: int = 0, - min_box_rel_dim: float = 0.02, - min_instance_period: int = 1, - track_iou_threshold: float = 0.5, - **kwargs, - ): - """ - Args: - video_height: height the video frame - video_width: width of the video frame - max_num_instances: maximum number of id allowed to be tracked - max_lost_frame_count: maximum number of frame an id can lost tracking - exceed this number, an id is considered as lost - forever - min_box_rel_dim: a percentage, smaller than this dimension, a bbox is - removed from tracking - min_instance_period: an instance will be shown after this number of period - since its first showing up in the video - track_iou_threshold: iou threshold, below this number a bbox pair is removed - from tracking - """ - super().__init__( - video_height=video_height, - video_width=video_width, - max_num_instances=max_num_instances, - max_lost_frame_count=max_lost_frame_count, - min_box_rel_dim=min_box_rel_dim, - min_instance_period=min_instance_period, - ) - self._track_iou_threshold = track_iou_threshold - - @classmethod - def from_config(cls, cfg: CfgNode_): - """ - Old style initialization using CfgNode - - Args: - cfg: D2 CfgNode, config file - Return: - dictionary storing arguments for __init__ method - """ - assert "VIDEO_HEIGHT" in cfg.TRACKER_HEADS - assert "VIDEO_WIDTH" in cfg.TRACKER_HEADS - video_height = cfg.TRACKER_HEADS.get("VIDEO_HEIGHT") - video_width = cfg.TRACKER_HEADS.get("VIDEO_WIDTH") - max_num_instances = cfg.TRACKER_HEADS.get("MAX_NUM_INSTANCES", 200) - max_lost_frame_count = cfg.TRACKER_HEADS.get("MAX_LOST_FRAME_COUNT", 0) - min_box_rel_dim = cfg.TRACKER_HEADS.get("MIN_BOX_REL_DIM", 0.02) - min_instance_period = cfg.TRACKER_HEADS.get("MIN_INSTANCE_PERIOD", 1) - track_iou_threshold = cfg.TRACKER_HEADS.get("TRACK_IOU_THRESHOLD", 0.5) - return { - "_target_": "detectron2.tracking.vanilla_hungarian_bbox_iou_tracker.VanillaHungarianBBoxIOUTracker", # noqa - "video_height": video_height, - "video_width": video_width, - "max_num_instances": max_num_instances, - "max_lost_frame_count": max_lost_frame_count, - "min_box_rel_dim": min_box_rel_dim, - "min_instance_period": min_instance_period, - "track_iou_threshold": track_iou_threshold, - } - - def build_cost_matrix(self, instances: Instances, prev_instances: Instances) -> np.ndarray: - """ - Build the cost matrix for assignment problem - (https://en.wikipedia.org/wiki/Assignment_problem) - - Args: - instances: D2 Instances, for current frame predictions - prev_instances: D2 Instances, for previous frame predictions - - Return: - the cost matrix in numpy array - """ - assert instances is not None and prev_instances is not None - # calculate IoU of all bbox pairs - iou_all = pairwise_iou( - boxes1=instances.pred_boxes, - boxes2=self._prev_instances.pred_boxes, - ) - bbox_pairs = create_prediction_pairs( - instances, self._prev_instances, iou_all, threshold=self._track_iou_threshold - ) - # assign large cost value to make sure pair below IoU threshold won't be matched - cost_matrix = np.full((len(instances), len(prev_instances)), LARGE_COST_VALUE) - return self.assign_cost_matrix_values(cost_matrix, bbox_pairs) - - def assign_cost_matrix_values(self, cost_matrix: np.ndarray, bbox_pairs: List) -> np.ndarray: - """ - Based on IoU for each pair of bbox, assign the associated value in cost matrix - - Args: - cost_matrix: np.ndarray, initialized 2D array with target dimensions - bbox_pairs: list of bbox pair, in each pair, iou value is stored - Return: - np.ndarray, cost_matrix with assigned values - """ - for pair in bbox_pairs: - # assign -1 for IoU above threshold pairs, algorithms will minimize cost - cost_matrix[pair["idx"]][pair["prev_idx"]] = -1 - return cost_matrix diff --git a/detectron2/detectron2/utils/README.md b/detectron2/detectron2/utils/README.md deleted file mode 100644 index 9765b24a730b77556104187ac3ef5439ab0859fd..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Utility functions - -This folder contain utility functions that are not used in the -core library, but are useful for building models or training -code using the config system. diff --git a/detectron2/detectron2/utils/__init__.py b/detectron2/detectron2/utils/__init__.py deleted file mode 100644 index 9020c2df23e2af280b7bb168b996ae9eaf312eb8..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. diff --git a/detectron2/detectron2/utils/analysis.py b/detectron2/detectron2/utils/analysis.py deleted file mode 100644 index 178da7968cc08c29ec61b823bba8b74e8d97e1d6..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/analysis.py +++ /dev/null @@ -1,188 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# -*- coding: utf-8 -*- - -import typing -from typing import Any, List -import fvcore -from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table -from torch import nn - -from detectron2.export import TracingAdapter - -__all__ = [ - "activation_count_operators", - "flop_count_operators", - "parameter_count_table", - "parameter_count", - "FlopCountAnalysis", -] - -FLOPS_MODE = "flops" -ACTIVATIONS_MODE = "activations" - - -# Some extra ops to ignore from counting, including elementwise and reduction ops -_IGNORED_OPS = { - "aten::add", - "aten::add_", - "aten::argmax", - "aten::argsort", - "aten::batch_norm", - "aten::constant_pad_nd", - "aten::div", - "aten::div_", - "aten::exp", - "aten::log2", - "aten::max_pool2d", - "aten::meshgrid", - "aten::mul", - "aten::mul_", - "aten::neg", - "aten::nonzero_numpy", - "aten::reciprocal", - "aten::repeat_interleave", - "aten::rsub", - "aten::sigmoid", - "aten::sigmoid_", - "aten::softmax", - "aten::sort", - "aten::sqrt", - "aten::sub", - "torchvision::nms", # TODO estimate flop for nms -} - - -class FlopCountAnalysis(fvcore.nn.FlopCountAnalysis): - """ - Same as :class:`fvcore.nn.FlopCountAnalysis`, but supports detectron2 models. - """ - - def __init__(self, model, inputs): - """ - Args: - model (nn.Module): - inputs (Any): inputs of the given model. Does not have to be tuple of tensors. - """ - wrapper = TracingAdapter(model, inputs, allow_non_tensor=True) - super().__init__(wrapper, wrapper.flattened_inputs) - self.set_op_handle(**{k: None for k in _IGNORED_OPS}) - - -def flop_count_operators(model: nn.Module, inputs: list) -> typing.DefaultDict[str, float]: - """ - Implement operator-level flops counting using jit. - This is a wrapper of :func:`fvcore.nn.flop_count` and adds supports for standard - detection models in detectron2. - Please use :class:`FlopCountAnalysis` for more advanced functionalities. - - Note: - The function runs the input through the model to compute flops. - The flops of a detection model is often input-dependent, for example, - the flops of box & mask head depends on the number of proposals & - the number of detected objects. - Therefore, the flops counting using a single input may not accurately - reflect the computation cost of a model. It's recommended to average - across a number of inputs. - - Args: - model: a detectron2 model that takes `list[dict]` as input. - inputs (list[dict]): inputs to model, in detectron2's standard format. - Only "image" key will be used. - supported_ops (dict[str, Handle]): see documentation of :func:`fvcore.nn.flop_count` - - Returns: - Counter: Gflop count per operator - """ - old_train = model.training - model.eval() - ret = FlopCountAnalysis(model, inputs).by_operator() - model.train(old_train) - return {k: v / 1e9 for k, v in ret.items()} - - -def activation_count_operators( - model: nn.Module, inputs: list, **kwargs -) -> typing.DefaultDict[str, float]: - """ - Implement operator-level activations counting using jit. - This is a wrapper of fvcore.nn.activation_count, that supports standard detection models - in detectron2. - - Note: - The function runs the input through the model to compute activations. - The activations of a detection model is often input-dependent, for example, - the activations of box & mask head depends on the number of proposals & - the number of detected objects. - - Args: - model: a detectron2 model that takes `list[dict]` as input. - inputs (list[dict]): inputs to model, in detectron2's standard format. - Only "image" key will be used. - - Returns: - Counter: activation count per operator - """ - return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs) - - -def _wrapper_count_operators( - model: nn.Module, inputs: list, mode: str, **kwargs -) -> typing.DefaultDict[str, float]: - # ignore some ops - supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS} - supported_ops.update(kwargs.pop("supported_ops", {})) - kwargs["supported_ops"] = supported_ops - - assert len(inputs) == 1, "Please use batch size=1" - tensor_input = inputs[0]["image"] - inputs = [{"image": tensor_input}] # remove other keys, in case there are any - - old_train = model.training - if isinstance(model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)): - model = model.module - wrapper = TracingAdapter(model, inputs) - wrapper.eval() - if mode == FLOPS_MODE: - ret = flop_count(wrapper, (tensor_input,), **kwargs) - elif mode == ACTIVATIONS_MODE: - ret = activation_count(wrapper, (tensor_input,), **kwargs) - else: - raise NotImplementedError("Count for mode {} is not supported yet.".format(mode)) - # compatible with change in fvcore - if isinstance(ret, tuple): - ret = ret[0] - model.train(old_train) - return ret - - -def find_unused_parameters(model: nn.Module, inputs: Any) -> List[str]: - """ - Given a model, find parameters that do not contribute - to the loss. - - Args: - model: a model in training mode that returns losses - inputs: argument or a tuple of arguments. Inputs of the model - - Returns: - list[str]: the name of unused parameters - """ - assert model.training - for _, prm in model.named_parameters(): - prm.grad = None - - if isinstance(inputs, tuple): - losses = model(*inputs) - else: - losses = model(inputs) - - if isinstance(losses, dict): - losses = sum(losses.values()) - losses.backward() - - unused: List[str] = [] - for name, prm in model.named_parameters(): - if prm.grad is None: - unused.append(name) - prm.grad = None - return unused diff --git a/detectron2/detectron2/utils/collect_env.py b/detectron2/detectron2/utils/collect_env.py deleted file mode 100644 index a4c0b1f91855c46fbcbcf19ffb9b285cac9471df..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/collect_env.py +++ /dev/null @@ -1,263 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import importlib -import numpy as np -import os -import re -import subprocess -import sys -from collections import defaultdict -import PIL -import torch -import torchvision -from tabulate import tabulate - -__all__ = ["collect_env_info"] - - -def collect_torch_env(): - try: - import torch.__config__ - - return torch.__config__.show() - except ImportError: - # compatible with older versions of pytorch - from torch.utils.collect_env import get_pretty_env_info - - return get_pretty_env_info() - - -def get_env_module(): - var_name = "DETECTRON2_ENV_MODULE" - return var_name, os.environ.get(var_name, "") - - -def detect_compute_compatibility(CUDA_HOME, so_file): - try: - cuobjdump = os.path.join(CUDA_HOME, "bin", "cuobjdump") - if os.path.isfile(cuobjdump): - output = subprocess.check_output( - "'{}' --list-elf '{}'".format(cuobjdump, so_file), shell=True - ) - output = output.decode("utf-8").strip().split("\n") - arch = [] - for line in output: - line = re.findall(r"\.sm_([0-9]*)\.", line)[0] - arch.append(".".join(line)) - arch = sorted(set(arch)) - return ", ".join(arch) - else: - return so_file + "; cannot find cuobjdump" - except Exception: - # unhandled failure - return so_file - - -def collect_env_info(): - has_gpu = torch.cuda.is_available() # true for both CUDA & ROCM - torch_version = torch.__version__ - - # NOTE that CUDA_HOME/ROCM_HOME could be None even when CUDA runtime libs are functional - from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME - - has_rocm = False - if (getattr(torch.version, "hip", None) is not None) and (ROCM_HOME is not None): - has_rocm = True - has_cuda = has_gpu and (not has_rocm) - - data = [] - data.append(("sys.platform", sys.platform)) # check-template.yml depends on it - data.append(("Python", sys.version.replace("\n", ""))) - data.append(("numpy", np.__version__)) - - try: - import detectron2 # noqa - - data.append( - ( - "detectron2", - detectron2.__version__ + " @" + os.path.dirname(detectron2.__file__), - ) - ) - except ImportError: - data.append(("detectron2", "failed to import")) - except AttributeError: - data.append(("detectron2", "imported a wrong installation")) - - try: - import detectron2._C as _C - except ImportError as e: - data.append(("detectron2._C", f"not built correctly: {e}")) - - # print system compilers when extension fails to build - if sys.platform != "win32": # don't know what to do for windows - try: - # this is how torch/utils/cpp_extensions.py choose compiler - cxx = os.environ.get("CXX", "c++") - cxx = subprocess.check_output("'{}' --version".format(cxx), shell=True) - cxx = cxx.decode("utf-8").strip().split("\n")[0] - except subprocess.SubprocessError: - cxx = "Not found" - data.append(("Compiler ($CXX)", cxx)) - - if has_cuda and CUDA_HOME is not None: - try: - nvcc = os.path.join(CUDA_HOME, "bin", "nvcc") - nvcc = subprocess.check_output("'{}' -V".format(nvcc), shell=True) - nvcc = nvcc.decode("utf-8").strip().split("\n")[-1] - except subprocess.SubprocessError: - nvcc = "Not found" - data.append(("CUDA compiler", nvcc)) - if has_cuda and sys.platform != "win32": - try: - so_file = importlib.util.find_spec("detectron2._C").origin - except (ImportError, AttributeError): - pass - else: - data.append( - ( - "detectron2 arch flags", - detect_compute_compatibility(CUDA_HOME, so_file), - ) - ) - else: - # print compilers that are used to build extension - data.append(("Compiler", _C.get_compiler_version())) - data.append(("CUDA compiler", _C.get_cuda_version())) # cuda or hip - if has_cuda and getattr(_C, "has_cuda", lambda: True)(): - data.append( - ( - "detectron2 arch flags", - detect_compute_compatibility(CUDA_HOME, _C.__file__), - ) - ) - - data.append(get_env_module()) - data.append(("PyTorch", torch_version + " @" + os.path.dirname(torch.__file__))) - data.append(("PyTorch debug build", torch.version.debug)) - try: - data.append(("torch._C._GLIBCXX_USE_CXX11_ABI", torch._C._GLIBCXX_USE_CXX11_ABI)) - except Exception: - pass - - if not has_gpu: - has_gpu_text = "No: torch.cuda.is_available() == False" - else: - has_gpu_text = "Yes" - data.append(("GPU available", has_gpu_text)) - if has_gpu: - devices = defaultdict(list) - for k in range(torch.cuda.device_count()): - cap = ".".join((str(x) for x in torch.cuda.get_device_capability(k))) - name = torch.cuda.get_device_name(k) + f" (arch={cap})" - devices[name].append(str(k)) - for name, devids in devices.items(): - data.append(("GPU " + ",".join(devids), name)) - - if has_rocm: - msg = " - invalid!" if not (ROCM_HOME and os.path.isdir(ROCM_HOME)) else "" - data.append(("ROCM_HOME", str(ROCM_HOME) + msg)) - else: - try: - from torch.utils.collect_env import ( - get_nvidia_driver_version, - run as _run, - ) - - data.append(("Driver version", get_nvidia_driver_version(_run))) - except Exception: - pass - msg = " - invalid!" if not (CUDA_HOME and os.path.isdir(CUDA_HOME)) else "" - data.append(("CUDA_HOME", str(CUDA_HOME) + msg)) - - cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None) - if cuda_arch_list: - data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list)) - data.append(("Pillow", PIL.__version__)) - - try: - data.append( - ( - "torchvision", - str(torchvision.__version__) + " @" + os.path.dirname(torchvision.__file__), - ) - ) - if has_cuda: - try: - torchvision_C = importlib.util.find_spec("torchvision._C").origin - msg = detect_compute_compatibility(CUDA_HOME, torchvision_C) - data.append(("torchvision arch flags", msg)) - except (ImportError, AttributeError): - data.append(("torchvision._C", "Not found")) - except AttributeError: - data.append(("torchvision", "unknown")) - - try: - import fvcore - - data.append(("fvcore", fvcore.__version__)) - except (ImportError, AttributeError): - pass - - try: - import iopath - - data.append(("iopath", iopath.__version__)) - except (ImportError, AttributeError): - pass - - try: - import cv2 - - data.append(("cv2", cv2.__version__)) - except (ImportError, AttributeError): - data.append(("cv2", "Not found")) - env_str = tabulate(data) + "\n" - env_str += collect_torch_env() - return env_str - - -def test_nccl_ops(): - num_gpu = torch.cuda.device_count() - if os.access("/tmp", os.W_OK): - import torch.multiprocessing as mp - - dist_url = "file:///tmp/nccl_tmp_file" - print("Testing NCCL connectivity ... this should not hang.") - mp.spawn(_test_nccl_worker, nprocs=num_gpu, args=(num_gpu, dist_url), daemon=False) - print("NCCL succeeded.") - - -def _test_nccl_worker(rank, num_gpu, dist_url): - import torch.distributed as dist - - dist.init_process_group(backend="NCCL", init_method=dist_url, rank=rank, world_size=num_gpu) - dist.barrier(device_ids=[rank]) - - -def main() -> None: - global x - try: - from detectron2.utils.collect_env import collect_env_info as f - - print(f()) - except ImportError: - print(collect_env_info()) - - if torch.cuda.is_available(): - num_gpu = torch.cuda.device_count() - for k in range(num_gpu): - device = f"cuda:{k}" - try: - x = torch.tensor([1, 2.0], dtype=torch.float32) - x = x.to(device) - except Exception as e: - print( - f"Unable to copy tensor to device={device}: {e}. " - "Your CUDA environment is broken." - ) - if num_gpu > 1: - test_nccl_ops() - - -if __name__ == "__main__": - main() # pragma: no cover diff --git a/detectron2/detectron2/utils/colormap.py b/detectron2/detectron2/utils/colormap.py deleted file mode 100644 index 14ded1659b40b161358c4aaf9cc84ffe0ffafe64..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/colormap.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -An awesome colormap for really neat visualizations. -Copied from Detectron, and removed gray colors. -""" - -import numpy as np -import random - -__all__ = ["colormap", "random_color", "random_colors"] - -# fmt: off -# RGB: -_COLORS = np.array( - [ - 0.000, 0.447, 0.741, - 0.850, 0.325, 0.098, - 0.929, 0.694, 0.125, - 0.494, 0.184, 0.556, - 0.466, 0.674, 0.188, - 0.301, 0.745, 0.933, - 0.635, 0.078, 0.184, - 0.300, 0.300, 0.300, - 0.600, 0.600, 0.600, - 1.000, 0.000, 0.000, - 1.000, 0.500, 0.000, - 0.749, 0.749, 0.000, - 0.000, 1.000, 0.000, - 0.000, 0.000, 1.000, - 0.667, 0.000, 1.000, - 0.333, 0.333, 0.000, - 0.333, 0.667, 0.000, - 0.333, 1.000, 0.000, - 0.667, 0.333, 0.000, - 0.667, 0.667, 0.000, - 0.667, 1.000, 0.000, - 1.000, 0.333, 0.000, - 1.000, 0.667, 0.000, - 1.000, 1.000, 0.000, - 0.000, 0.333, 0.500, - 0.000, 0.667, 0.500, - 0.000, 1.000, 0.500, - 0.333, 0.000, 0.500, - 0.333, 0.333, 0.500, - 0.333, 0.667, 0.500, - 0.333, 1.000, 0.500, - 0.667, 0.000, 0.500, - 0.667, 0.333, 0.500, - 0.667, 0.667, 0.500, - 0.667, 1.000, 0.500, - 1.000, 0.000, 0.500, - 1.000, 0.333, 0.500, - 1.000, 0.667, 0.500, - 1.000, 1.000, 0.500, - 0.000, 0.333, 1.000, - 0.000, 0.667, 1.000, - 0.000, 1.000, 1.000, - 0.333, 0.000, 1.000, - 0.333, 0.333, 1.000, - 0.333, 0.667, 1.000, - 0.333, 1.000, 1.000, - 0.667, 0.000, 1.000, - 0.667, 0.333, 1.000, - 0.667, 0.667, 1.000, - 0.667, 1.000, 1.000, - 1.000, 0.000, 1.000, - 1.000, 0.333, 1.000, - 1.000, 0.667, 1.000, - 0.333, 0.000, 0.000, - 0.500, 0.000, 0.000, - 0.667, 0.000, 0.000, - 0.833, 0.000, 0.000, - 1.000, 0.000, 0.000, - 0.000, 0.167, 0.000, - 0.000, 0.333, 0.000, - 0.000, 0.500, 0.000, - 0.000, 0.667, 0.000, - 0.000, 0.833, 0.000, - 0.000, 1.000, 0.000, - 0.000, 0.000, 0.167, - 0.000, 0.000, 0.333, - 0.000, 0.000, 0.500, - 0.000, 0.000, 0.667, - 0.000, 0.000, 0.833, - 0.000, 0.000, 1.000, - 0.000, 0.000, 0.000, - 0.143, 0.143, 0.143, - 0.857, 0.857, 0.857, - 1.000, 1.000, 1.000 - ] -).astype(np.float32).reshape(-1, 3) -# fmt: on - - -def colormap(rgb=False, maximum=255): - """ - Args: - rgb (bool): whether to return RGB colors or BGR colors. - maximum (int): either 255 or 1 - - Returns: - ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] - """ - assert maximum in [255, 1], maximum - c = _COLORS * maximum - if not rgb: - c = c[:, ::-1] - return c - - -def random_color(rgb=False, maximum=255): - """ - Args: - rgb (bool): whether to return RGB colors or BGR colors. - maximum (int): either 255 or 1 - - Returns: - ndarray: a vector of 3 numbers - """ - idx = np.random.randint(0, len(_COLORS)) - ret = _COLORS[idx] * maximum - if not rgb: - ret = ret[::-1] - return ret - - -def random_colors(N, rgb=False, maximum=255): - """ - Args: - N (int): number of unique colors needed - rgb (bool): whether to return RGB colors or BGR colors. - maximum (int): either 255 or 1 - - Returns: - ndarray: a list of random_color - """ - indices = random.sample(range(len(_COLORS)), N) - ret = [_COLORS[i] * maximum for i in indices] - if not rgb: - ret = [x[::-1] for x in ret] - return ret - - -if __name__ == "__main__": - import cv2 - - size = 100 - H, W = 10, 10 - canvas = np.random.rand(H * size, W * size, 3).astype("float32") - for h in range(H): - for w in range(W): - idx = h * W + w - if idx >= len(_COLORS): - break - canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx] - cv2.imshow("a", canvas) - cv2.waitKey(0) diff --git a/detectron2/detectron2/utils/comm.py b/detectron2/detectron2/utils/comm.py deleted file mode 100644 index a9ea9a9f578c5704d1e7ff563ef156e9133ab465..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/comm.py +++ /dev/null @@ -1,238 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -""" -This file contains primitives for multi-gpu communication. -This is useful when doing distributed training. -""" - -import functools -import numpy as np -import torch -import torch.distributed as dist - -_LOCAL_PROCESS_GROUP = None -_MISSING_LOCAL_PG_ERROR = ( - "Local process group is not yet created! Please use detectron2's `launch()` " - "to start processes and initialize pytorch process group. If you need to start " - "processes in other ways, please call comm.create_local_process_group(" - "num_workers_per_machine) after calling torch.distributed.init_process_group()." -) - - -def get_world_size() -> int: - if not dist.is_available(): - return 1 - if not dist.is_initialized(): - return 1 - return dist.get_world_size() - - -def get_rank() -> int: - if not dist.is_available(): - return 0 - if not dist.is_initialized(): - return 0 - return dist.get_rank() - - -@functools.lru_cache() -def create_local_process_group(num_workers_per_machine: int) -> None: - """ - Create a process group that contains ranks within the same machine. - - Detectron2's launch() in engine/launch.py will call this function. If you start - workers without launch(), you'll have to also call this. Otherwise utilities - like `get_local_rank()` will not work. - - This function contains a barrier. All processes must call it together. - - Args: - num_workers_per_machine: the number of worker processes per machine. Typically - the number of GPUs. - """ - global _LOCAL_PROCESS_GROUP - assert _LOCAL_PROCESS_GROUP is None - assert get_world_size() % num_workers_per_machine == 0 - num_machines = get_world_size() // num_workers_per_machine - machine_rank = get_rank() // num_workers_per_machine - for i in range(num_machines): - ranks_on_i = list(range(i * num_workers_per_machine, (i + 1) * num_workers_per_machine)) - pg = dist.new_group(ranks_on_i) - if i == machine_rank: - _LOCAL_PROCESS_GROUP = pg - - -def get_local_process_group(): - """ - Returns: - A torch process group which only includes processes that are on the same - machine as the current process. This group can be useful for communication - within a machine, e.g. a per-machine SyncBN. - """ - assert _LOCAL_PROCESS_GROUP is not None, _MISSING_LOCAL_PG_ERROR - return _LOCAL_PROCESS_GROUP - - -def get_local_rank() -> int: - """ - Returns: - The rank of the current process within the local (per-machine) process group. - """ - if not dist.is_available(): - return 0 - if not dist.is_initialized(): - return 0 - assert _LOCAL_PROCESS_GROUP is not None, _MISSING_LOCAL_PG_ERROR - return dist.get_rank(group=_LOCAL_PROCESS_GROUP) - - -def get_local_size() -> int: - """ - Returns: - The size of the per-machine process group, - i.e. the number of processes per machine. - """ - if not dist.is_available(): - return 1 - if not dist.is_initialized(): - return 1 - assert _LOCAL_PROCESS_GROUP is not None, _MISSING_LOCAL_PG_ERROR - return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) - - -def is_main_process() -> bool: - return get_rank() == 0 - - -def synchronize(): - """ - Helper function to synchronize (barrier) among all processes when - using distributed training - """ - if not dist.is_available(): - return - if not dist.is_initialized(): - return - world_size = dist.get_world_size() - if world_size == 1: - return - if dist.get_backend() == dist.Backend.NCCL: - # This argument is needed to avoid warnings. - # It's valid only for NCCL backend. - dist.barrier(device_ids=[torch.cuda.current_device()]) - else: - dist.barrier() - - -@functools.lru_cache() -def _get_global_gloo_group(): - """ - Return a process group based on gloo backend, containing all the ranks - The result is cached. - """ - if dist.get_backend() == "nccl": - return dist.new_group(backend="gloo") - else: - return dist.group.WORLD - - -def all_gather(data, group=None): - """ - Run all_gather on arbitrary picklable data (not necessarily tensors). - - Args: - data: any picklable object - group: a torch process group. By default, will use a group which - contains all ranks on gloo backend. - - Returns: - list[data]: list of data gathered from each rank - """ - if get_world_size() == 1: - return [data] - if group is None: - group = _get_global_gloo_group() # use CPU group by default, to reduce GPU RAM usage. - world_size = dist.get_world_size(group) - if world_size == 1: - return [data] - - output = [None for _ in range(world_size)] - dist.all_gather_object(output, data, group=group) - return output - - -def gather(data, dst=0, group=None): - """ - Run gather on arbitrary picklable data (not necessarily tensors). - - Args: - data: any picklable object - dst (int): destination rank - group: a torch process group. By default, will use a group which - contains all ranks on gloo backend. - - Returns: - list[data]: on dst, a list of data gathered from each rank. Otherwise, - an empty list. - """ - if get_world_size() == 1: - return [data] - if group is None: - group = _get_global_gloo_group() - world_size = dist.get_world_size(group=group) - if world_size == 1: - return [data] - rank = dist.get_rank(group=group) - - if rank == dst: - output = [None for _ in range(world_size)] - dist.gather_object(data, output, dst=dst, group=group) - return output - else: - dist.gather_object(data, None, dst=dst, group=group) - return [] - - -def shared_random_seed(): - """ - Returns: - int: a random number that is the same across all workers. - If workers need a shared RNG, they can use this shared seed to - create one. - - All workers must call this function, otherwise it will deadlock. - """ - ints = np.random.randint(2**31) - all_ints = all_gather(ints) - return all_ints[0] - - -def reduce_dict(input_dict, average=True): - """ - Reduce the values in the dictionary from all processes so that process with rank - 0 has the reduced results. - - Args: - input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. - average (bool): whether to do average or sum - - Returns: - a dict with the same keys as input_dict, after reduction. - """ - world_size = get_world_size() - if world_size < 2: - return input_dict - with torch.no_grad(): - names = [] - values = [] - # sort the keys so that they are consistent across processes - for k in sorted(input_dict.keys()): - names.append(k) - values.append(input_dict[k]) - values = torch.stack(values, dim=0) - dist.reduce(values, dst=0) - if dist.get_rank() == 0 and average: - # only main process gets accumulated, so only divide by - # world_size in this case - values /= world_size - reduced_dict = {k: v for k, v in zip(names, values)} - return reduced_dict diff --git a/detectron2/detectron2/utils/develop.py b/detectron2/detectron2/utils/develop.py deleted file mode 100644 index e8416984954f7b32fc269100620e3c0d0d0f9585..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/develop.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -""" Utilities for developers only. -These are not visible to users (not automatically imported). And should not -appeared in docs.""" -# adapted from https://github.com/tensorpack/tensorpack/blob/master/tensorpack/utils/develop.py - - -def create_dummy_class(klass, dependency, message=""): - """ - When a dependency of a class is not available, create a dummy class which throws ImportError - when used. - - Args: - klass (str): name of the class. - dependency (str): name of the dependency. - message: extra message to print - Returns: - class: a class object - """ - err = "Cannot import '{}', therefore '{}' is not available.".format(dependency, klass) - if message: - err = err + " " + message - - class _DummyMetaClass(type): - # throw error on class attribute access - def __getattr__(_, __): # noqa: B902 - raise ImportError(err) - - class _Dummy(object, metaclass=_DummyMetaClass): - # throw error on constructor - def __init__(self, *args, **kwargs): - raise ImportError(err) - - return _Dummy - - -def create_dummy_func(func, dependency, message=""): - """ - When a dependency of a function is not available, create a dummy function which throws - ImportError when used. - - Args: - func (str): name of the function. - dependency (str or list[str]): name(s) of the dependency. - message: extra message to print - Returns: - function: a function object - """ - err = "Cannot import '{}', therefore '{}' is not available.".format(dependency, func) - if message: - err = err + " " + message - - if isinstance(dependency, (list, tuple)): - dependency = ",".join(dependency) - - def _dummy(*args, **kwargs): - raise ImportError(err) - - return _dummy diff --git a/detectron2/detectron2/utils/env.py b/detectron2/detectron2/utils/env.py deleted file mode 100644 index 2e292d43c6217647b186d55b704b312a160d2c89..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/env.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import importlib -import importlib.util -import logging -import numpy as np -import os -import random -import sys -from datetime import datetime -import torch - -__all__ = ["seed_all_rng"] - - -TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2]) -""" -PyTorch version as a tuple of 2 ints. Useful for comparison. -""" - - -DOC_BUILDING = os.getenv("_DOC_BUILDING", False) # set in docs/conf.py -""" -Whether we're building documentation. -""" - - -def seed_all_rng(seed=None): - """ - Set the random seed for the RNG in torch, numpy and python. - - Args: - seed (int): if None, will use a strong random seed. - """ - if seed is None: - seed = ( - os.getpid() - + int(datetime.now().strftime("%S%f")) - + int.from_bytes(os.urandom(2), "big") - ) - logger = logging.getLogger(__name__) - logger.info("Using a generated random seed {}".format(seed)) - np.random.seed(seed) - torch.manual_seed(seed) - random.seed(seed) - torch.cuda.manual_seed_all(str(seed)) - os.environ["PYTHONHASHSEED"] = str(seed) - - -# from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path -def _import_file(module_name, file_path, make_importable=False): - spec = importlib.util.spec_from_file_location(module_name, file_path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - if make_importable: - sys.modules[module_name] = module - return module - - -def _configure_libraries(): - """ - Configurations for some libraries. - """ - # An environment option to disable `import cv2` globally, - # in case it leads to negative performance impact - disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False)) - if disable_cv2: - sys.modules["cv2"] = None - else: - # Disable opencl in opencv since its interaction with cuda often has negative effects - # This envvar is supported after OpenCV 3.4.0 - os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" - try: - import cv2 - - if int(cv2.__version__.split(".")[0]) >= 3: - cv2.ocl.setUseOpenCL(False) - except ModuleNotFoundError: - # Other types of ImportError, if happened, should not be ignored. - # Because a failed opencv import could mess up address space - # https://github.com/skvark/opencv-python/issues/381 - pass - - def get_version(module, digit=2): - return tuple(map(int, module.__version__.split(".")[:digit])) - - # fmt: off - assert get_version(torch) >= (1, 4), "Requires torch>=1.4" - import fvcore - assert get_version(fvcore, 3) >= (0, 1, 2), "Requires fvcore>=0.1.2" - import yaml - assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1" - # fmt: on - - -_ENV_SETUP_DONE = False - - -def setup_environment(): - """Perform environment setup work. The default setup is a no-op, but this - function allows the user to specify a Python source file or a module in - the $DETECTRON2_ENV_MODULE environment variable, that performs - custom setup work that may be necessary to their computing environment. - """ - global _ENV_SETUP_DONE - if _ENV_SETUP_DONE: - return - _ENV_SETUP_DONE = True - - _configure_libraries() - - custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE") - - if custom_module_path: - setup_custom_environment(custom_module_path) - else: - # The default setup is a no-op - pass - - -def setup_custom_environment(custom_module): - """ - Load custom environment setup by importing a Python source file or a - module, and run the setup function. - """ - if custom_module.endswith(".py"): - module = _import_file("detectron2.utils.env.custom_module", custom_module) - else: - module = importlib.import_module(custom_module) - assert hasattr(module, "setup_environment") and callable(module.setup_environment), ( - "Custom environment module defined in {} does not have the " - "required callable attribute 'setup_environment'." - ).format(custom_module) - module.setup_environment() - - -def fixup_module_metadata(module_name, namespace, keys=None): - """ - Fix the __qualname__ of module members to be their exported api name, so - when they are referenced in docs, sphinx can find them. Reference: - https://github.com/python-trio/trio/blob/6754c74eacfad9cc5c92d5c24727a2f3b620624e/trio/_util.py#L216-L241 - """ - if not DOC_BUILDING: - return - seen_ids = set() - - def fix_one(qualname, name, obj): - # avoid infinite recursion (relevant when using - # typing.Generic, for example) - if id(obj) in seen_ids: - return - seen_ids.add(id(obj)) - - mod = getattr(obj, "__module__", None) - if mod is not None and (mod.startswith(module_name) or mod.startswith("fvcore.")): - obj.__module__ = module_name - # Modules, unlike everything else in Python, put fully-qualitied - # names into their __name__ attribute. We check for "." to avoid - # rewriting these. - if hasattr(obj, "__name__") and "." not in obj.__name__: - obj.__name__ = name - obj.__qualname__ = qualname - if isinstance(obj, type): - for attr_name, attr_value in obj.__dict__.items(): - fix_one(objname + "." + attr_name, attr_name, attr_value) - - if keys is None: - keys = namespace.keys() - for objname in keys: - if not objname.startswith("_"): - obj = namespace[objname] - fix_one(objname, objname, obj) diff --git a/detectron2/detectron2/utils/events.py b/detectron2/detectron2/utils/events.py deleted file mode 100644 index c4f9dadfd87bab25c3d25bcf3a65bc7191e52cf1..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/events.py +++ /dev/null @@ -1,557 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import datetime -import json -import logging -import os -import time -from collections import defaultdict -from contextlib import contextmanager -from functools import cached_property -from typing import Optional -import torch -from fvcore.common.history_buffer import HistoryBuffer - -from detectron2.utils.file_io import PathManager - -__all__ = [ - "get_event_storage", - "has_event_storage", - "JSONWriter", - "TensorboardXWriter", - "CommonMetricPrinter", - "EventStorage", -] - -_CURRENT_STORAGE_STACK = [] - - -def get_event_storage(): - """ - Returns: - The :class:`EventStorage` object that's currently being used. - Throws an error if no :class:`EventStorage` is currently enabled. - """ - assert len( - _CURRENT_STORAGE_STACK - ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!" - return _CURRENT_STORAGE_STACK[-1] - - -def has_event_storage(): - """ - Returns: - Check if there are EventStorage() context existed. - """ - return len(_CURRENT_STORAGE_STACK) > 0 - - -class EventWriter: - """ - Base class for writers that obtain events from :class:`EventStorage` and process them. - """ - - def write(self): - raise NotImplementedError - - def close(self): - pass - - -class JSONWriter(EventWriter): - """ - Write scalars to a json file. - - It saves scalars as one json per line (instead of a big json) for easy parsing. - - Examples parsing such a json file: - :: - $ cat metrics.json | jq -s '.[0:2]' - [ - { - "data_time": 0.008433341979980469, - "iteration": 19, - "loss": 1.9228371381759644, - "loss_box_reg": 0.050025828182697296, - "loss_classifier": 0.5316952466964722, - "loss_mask": 0.7236229181289673, - "loss_rpn_box": 0.0856662318110466, - "loss_rpn_cls": 0.48198649287223816, - "lr": 0.007173333333333333, - "time": 0.25401854515075684 - }, - { - "data_time": 0.007216215133666992, - "iteration": 39, - "loss": 1.282649278640747, - "loss_box_reg": 0.06222952902317047, - "loss_classifier": 0.30682939291000366, - "loss_mask": 0.6970193982124329, - "loss_rpn_box": 0.038663312792778015, - "loss_rpn_cls": 0.1471673548221588, - "lr": 0.007706666666666667, - "time": 0.2490077018737793 - } - ] - - $ cat metrics.json | jq '.loss_mask' - 0.7126231789588928 - 0.689423680305481 - 0.6776131987571716 - ... - - """ - - def __init__(self, json_file, window_size=20): - """ - Args: - json_file (str): path to the json file. New data will be appended if the file exists. - window_size (int): the window size of median smoothing for the scalars whose - `smoothing_hint` are True. - """ - self._file_handle = PathManager.open(json_file, "a") - self._window_size = window_size - self._last_write = -1 - - def write(self): - storage = get_event_storage() - to_save = defaultdict(dict) - - for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items(): - # keep scalars that have not been written - if iter <= self._last_write: - continue - to_save[iter][k] = v - if len(to_save): - all_iters = sorted(to_save.keys()) - self._last_write = max(all_iters) - - for itr, scalars_per_iter in to_save.items(): - scalars_per_iter["iteration"] = itr - self._file_handle.write(json.dumps(scalars_per_iter, sort_keys=True) + "\n") - self._file_handle.flush() - try: - os.fsync(self._file_handle.fileno()) - except AttributeError: - pass - - def close(self): - self._file_handle.close() - - -class TensorboardXWriter(EventWriter): - """ - Write all scalars to a tensorboard file. - """ - - def __init__(self, log_dir: str, window_size: int = 20, **kwargs): - """ - Args: - log_dir (str): the directory to save the output events - window_size (int): the scalars will be median-smoothed by this window size - - kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)` - """ - self._window_size = window_size - self._writer_args = {"log_dir": log_dir, **kwargs} - self._last_write = -1 - - @cached_property - def _writer(self): - from torch.utils.tensorboard import SummaryWriter - - return SummaryWriter(**self._writer_args) - - def write(self): - storage = get_event_storage() - new_last_write = self._last_write - for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items(): - if iter > self._last_write: - self._writer.add_scalar(k, v, iter) - new_last_write = max(new_last_write, iter) - self._last_write = new_last_write - - # storage.put_{image,histogram} is only meant to be used by - # tensorboard writer. So we access its internal fields directly from here. - if len(storage._vis_data) >= 1: - for img_name, img, step_num in storage._vis_data: - self._writer.add_image(img_name, img, step_num) - # Storage stores all image data and rely on this writer to clear them. - # As a result it assumes only one writer will use its image data. - # An alternative design is to let storage store limited recent - # data (e.g. only the most recent image) that all writers can access. - # In that case a writer may not see all image data if its period is long. - storage.clear_images() - - if len(storage._histograms) >= 1: - for params in storage._histograms: - self._writer.add_histogram_raw(**params) - storage.clear_histograms() - - def close(self): - if "_writer" in self.__dict__: - self._writer.close() - - -class CommonMetricPrinter(EventWriter): - """ - Print **common** metrics to the terminal, including - iteration time, ETA, memory, all losses, and the learning rate. - It also applies smoothing using a window of 20 elements. - - It's meant to print common metrics in common ways. - To print something in more customized ways, please implement a similar printer by yourself. - """ - - def __init__(self, max_iter: Optional[int] = None, window_size: int = 20): - """ - Args: - max_iter: the maximum number of iterations to train. - Used to compute ETA. If not given, ETA will not be printed. - window_size (int): the losses will be median-smoothed by this window size - """ - self.logger = logging.getLogger("detectron2.utils.events") - self._max_iter = max_iter - self._window_size = window_size - self._last_write = None # (step, time) of last call to write(). Used to compute ETA - - def _get_eta(self, storage) -> Optional[str]: - if self._max_iter is None: - return "" - iteration = storage.iter - try: - eta_seconds = storage.history("time").median(1000) * (self._max_iter - iteration - 1) - storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False) - return str(datetime.timedelta(seconds=int(eta_seconds))) - except KeyError: - # estimate eta on our own - more noisy - eta_string = None - if self._last_write is not None: - estimate_iter_time = (time.perf_counter() - self._last_write[1]) / ( - iteration - self._last_write[0] - ) - eta_seconds = estimate_iter_time * (self._max_iter - iteration - 1) - eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) - self._last_write = (iteration, time.perf_counter()) - return eta_string - - def write(self): - storage = get_event_storage() - iteration = storage.iter - if iteration == self._max_iter: - # This hook only reports training progress (loss, ETA, etc) but not other data, - # therefore do not write anything after training succeeds, even if this method - # is called. - return - - try: - avg_data_time = storage.history("data_time").avg( - storage.count_samples("data_time", self._window_size) - ) - last_data_time = storage.history("data_time").latest() - except KeyError: - # they may not exist in the first few iterations (due to warmup) - # or when SimpleTrainer is not used - avg_data_time = None - last_data_time = None - try: - avg_iter_time = storage.history("time").global_avg() - last_iter_time = storage.history("time").latest() - except KeyError: - avg_iter_time = None - last_iter_time = None - try: - lr = "{:.5g}".format(storage.history("lr").latest()) - except KeyError: - lr = "N/A" - - eta_string = self._get_eta(storage) - - if torch.cuda.is_available(): - max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 - else: - max_mem_mb = None - - # NOTE: max_mem is parsed by grep in "dev/parse_results.sh" - self.logger.info( - str.format( - " {eta}iter: {iter} {losses} {non_losses} {avg_time}{last_time}" - + "{avg_data_time}{last_data_time} lr: {lr} {memory}", - eta=f"eta: {eta_string} " if eta_string else "", - iter=iteration, - losses=" ".join( - [ - "{}: {:.4g}".format( - k, v.median(storage.count_samples(k, self._window_size)) - ) - for k, v in storage.histories().items() - if "loss" in k - ] - ), - non_losses=" ".join( - [ - "{}: {:.4g}".format( - k, v.median(storage.count_samples(k, self._window_size)) - ) - for k, v in storage.histories().items() - if "[metric]" in k - ] - ), - avg_time=( - "time: {:.4f} ".format(avg_iter_time) if avg_iter_time is not None else "" - ), - last_time=( - "last_time: {:.4f} ".format(last_iter_time) - if last_iter_time is not None - else "" - ), - avg_data_time=( - "data_time: {:.4f} ".format(avg_data_time) if avg_data_time is not None else "" - ), - last_data_time=( - "last_data_time: {:.4f} ".format(last_data_time) - if last_data_time is not None - else "" - ), - lr=lr, - memory="max_mem: {:.0f}M".format(max_mem_mb) if max_mem_mb is not None else "", - ) - ) - - -class EventStorage: - """ - The user-facing class that provides metric storage functionalities. - - In the future we may add support for storing / logging other types of data if needed. - """ - - def __init__(self, start_iter=0): - """ - Args: - start_iter (int): the iteration number to start with - """ - self._history = defaultdict(HistoryBuffer) - self._smoothing_hints = {} - self._latest_scalars = {} - self._iter = start_iter - self._current_prefix = "" - self._vis_data = [] - self._histograms = [] - - def put_image(self, img_name, img_tensor): - """ - Add an `img_tensor` associated with `img_name`, to be shown on - tensorboard. - - Args: - img_name (str): The name of the image to put into tensorboard. - img_tensor (torch.Tensor or numpy.array): An `uint8` or `float` - Tensor of shape `[channel, height, width]` where `channel` is - 3. The image format should be RGB. The elements in img_tensor - can either have values in [0, 1] (float32) or [0, 255] (uint8). - The `img_tensor` will be visualized in tensorboard. - """ - self._vis_data.append((img_name, img_tensor, self._iter)) - - def put_scalar(self, name, value, smoothing_hint=True, cur_iter=None): - """ - Add a scalar `value` to the `HistoryBuffer` associated with `name`. - - Args: - smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be - smoothed when logged. The hint will be accessible through - :meth:`EventStorage.smoothing_hints`. A writer may ignore the hint - and apply custom smoothing rule. - - It defaults to True because most scalars we save need to be smoothed to - provide any useful signal. - cur_iter (int): an iteration number to set explicitly instead of current iteration - """ - name = self._current_prefix + name - cur_iter = self._iter if cur_iter is None else cur_iter - history = self._history[name] - value = float(value) - history.update(value, cur_iter) - self._latest_scalars[name] = (value, cur_iter) - - existing_hint = self._smoothing_hints.get(name) - - if existing_hint is not None: - assert ( - existing_hint == smoothing_hint - ), "Scalar {} was put with a different smoothing_hint!".format(name) - else: - self._smoothing_hints[name] = smoothing_hint - - def put_scalars(self, *, smoothing_hint=True, cur_iter=None, **kwargs): - """ - Put multiple scalars from keyword arguments. - - Examples: - - storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True) - """ - for k, v in kwargs.items(): - self.put_scalar(k, v, smoothing_hint=smoothing_hint, cur_iter=cur_iter) - - def put_histogram(self, hist_name, hist_tensor, bins=1000): - """ - Create a histogram from a tensor. - - Args: - hist_name (str): The name of the histogram to put into tensorboard. - hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted - into a histogram. - bins (int): Number of histogram bins. - """ - ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item() - - # Create a histogram with PyTorch - hist_counts = torch.histc(hist_tensor, bins=bins) - hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32) - - # Parameter for the add_histogram_raw function of SummaryWriter - hist_params = dict( - tag=hist_name, - min=ht_min, - max=ht_max, - num=len(hist_tensor), - sum=float(hist_tensor.sum()), - sum_squares=float(torch.sum(hist_tensor**2)), - bucket_limits=hist_edges[1:].tolist(), - bucket_counts=hist_counts.tolist(), - global_step=self._iter, - ) - self._histograms.append(hist_params) - - def history(self, name): - """ - Returns: - HistoryBuffer: the scalar history for name - """ - ret = self._history.get(name, None) - if ret is None: - raise KeyError("No history metric available for {}!".format(name)) - return ret - - def histories(self): - """ - Returns: - dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars - """ - return self._history - - def latest(self): - """ - Returns: - dict[str -> (float, int)]: mapping from the name of each scalar to the most - recent value and the iteration number its added. - """ - return self._latest_scalars - - def latest_with_smoothing_hint(self, window_size=20): - """ - Similar to :meth:`latest`, but the returned values - are either the un-smoothed original latest value, - or a median of the given window_size, - depend on whether the smoothing_hint is True. - - This provides a default behavior that other writers can use. - - Note: All scalars saved in the past `window_size` iterations are used for smoothing. - This is different from the `window_size` definition in HistoryBuffer. - Use :meth:`get_history_window_size` to get the `window_size` used in HistoryBuffer. - """ - result = {} - for k, (v, itr) in self._latest_scalars.items(): - result[k] = ( - ( - self._history[k].median(self.count_samples(k, window_size)) - if self._smoothing_hints[k] - else v - ), - itr, - ) - return result - - def count_samples(self, name, window_size=20): - """ - Return the number of samples logged in the past `window_size` iterations. - """ - samples = 0 - data = self._history[name].values() - for _, iter_ in reversed(data): - if iter_ > data[-1][1] - window_size: - samples += 1 - else: - break - return samples - - def smoothing_hints(self): - """ - Returns: - dict[name -> bool]: the user-provided hint on whether the scalar - is noisy and needs smoothing. - """ - return self._smoothing_hints - - def step(self): - """ - User should either: (1) Call this function to increment storage.iter when needed. Or - (2) Set `storage.iter` to the correct iteration number before each iteration. - - The storage will then be able to associate the new data with an iteration number. - """ - self._iter += 1 - - @property - def iter(self): - """ - Returns: - int: The current iteration number. When used together with a trainer, - this is ensured to be the same as trainer.iter. - """ - return self._iter - - @iter.setter - def iter(self, val): - self._iter = int(val) - - @property - def iteration(self): - # for backward compatibility - return self._iter - - def __enter__(self): - _CURRENT_STORAGE_STACK.append(self) - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - assert _CURRENT_STORAGE_STACK[-1] == self - _CURRENT_STORAGE_STACK.pop() - - @contextmanager - def name_scope(self, name): - """ - Yields: - A context within which all the events added to this storage - will be prefixed by the name scope. - """ - old_prefix = self._current_prefix - self._current_prefix = name.rstrip("/") + "/" - yield - self._current_prefix = old_prefix - - def clear_images(self): - """ - Delete all the stored images for visualization. This should be called - after images are written to tensorboard. - """ - self._vis_data = [] - - def clear_histograms(self): - """ - Delete all the stored histograms for visualization. - This should be called after histograms are written to tensorboard. - """ - self._histograms = [] diff --git a/detectron2/detectron2/utils/file_io.py b/detectron2/detectron2/utils/file_io.py deleted file mode 100644 index 09f7dffdb36199350bba57bd3b4e9e8babb40594..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/file_io.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from iopath.common.file_io import HTTPURLHandler, OneDrivePathHandler, PathHandler -from iopath.common.file_io import PathManager as PathManagerBase - -__all__ = ["PathManager", "PathHandler"] - - -PathManager = PathManagerBase() -""" -This is a detectron2 project-specific PathManager. -We try to stay away from global PathManager in fvcore as it -introduces potential conflicts among other libraries. -""" - - -class Detectron2Handler(PathHandler): - """ - Resolve anything that's hosted under detectron2's namespace. - """ - - PREFIX = "detectron2://" - S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" - - def _get_supported_prefixes(self): - return [self.PREFIX] - - def _get_local_path(self, path, **kwargs): - name = path[len(self.PREFIX) :] - return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name, **kwargs) - - def _open(self, path, mode="r", **kwargs): - return PathManager.open( - self.S3_DETECTRON2_PREFIX + path[len(self.PREFIX) :], mode, **kwargs - ) - - -PathManager.register_handler(HTTPURLHandler()) -PathManager.register_handler(OneDrivePathHandler()) -PathManager.register_handler(Detectron2Handler()) diff --git a/detectron2/detectron2/utils/logger.py b/detectron2/detectron2/utils/logger.py deleted file mode 100644 index 85be03cb174a8802ff775842395fd30b4b5db61b..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/logger.py +++ /dev/null @@ -1,261 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import atexit -import functools -import logging -import os -import sys -import time -from collections import Counter -import torch -from tabulate import tabulate -from termcolor import colored - -from detectron2.utils.file_io import PathManager - -__all__ = ["setup_logger", "log_first_n", "log_every_n", "log_every_n_seconds"] - -D2_LOG_BUFFER_SIZE_KEY: str = "D2_LOG_BUFFER_SIZE" - -DEFAULT_LOG_BUFFER_SIZE: int = 1024 * 1024 # 1MB - - -class _ColorfulFormatter(logging.Formatter): - def __init__(self, *args, **kwargs): - self._root_name = kwargs.pop("root_name") + "." - self._abbrev_name = kwargs.pop("abbrev_name", "") - if len(self._abbrev_name): - self._abbrev_name = self._abbrev_name + "." - super(_ColorfulFormatter, self).__init__(*args, **kwargs) - - def formatMessage(self, record): - record.name = record.name.replace(self._root_name, self._abbrev_name) - log = super(_ColorfulFormatter, self).formatMessage(record) - if record.levelno == logging.WARNING: - prefix = colored("WARNING", "red", attrs=["blink"]) - elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL: - prefix = colored("ERROR", "red", attrs=["blink", "underline"]) - else: - return log - return prefix + " " + log - - -@functools.lru_cache() # so that calling setup_logger multiple times won't add many handlers -def setup_logger( - output=None, - distributed_rank=0, - *, - color=True, - name="detectron2", - abbrev_name=None, - enable_propagation: bool = False, - configure_stdout: bool = True -): - """ - Initialize the detectron2 logger and set its verbosity level to "DEBUG". - - Args: - output (str): a file name or a directory to save log. If None, will not save log file. - If ends with ".txt" or ".log", assumed to be a file name. - Otherwise, logs will be saved to `output/log.txt`. - name (str): the root module name of this logger - abbrev_name (str): an abbreviation of the module, to avoid long names in logs. - Set to "" to not log the root module in logs. - By default, will abbreviate "detectron2" to "d2" and leave other - modules unchanged. - enable_propagation (bool): whether to propagate logs to the parent logger. - configure_stdout (bool): whether to configure logging to stdout. - - - Returns: - logging.Logger: a logger - """ - logger = logging.getLogger(name) - logger.setLevel(logging.DEBUG) - logger.propagate = enable_propagation - - if abbrev_name is None: - abbrev_name = "d2" if name == "detectron2" else name - - plain_formatter = logging.Formatter( - "[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S" - ) - # stdout logging: master only - if configure_stdout and distributed_rank == 0: - ch = logging.StreamHandler(stream=sys.stdout) - ch.setLevel(logging.DEBUG) - if color: - formatter = _ColorfulFormatter( - colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s", - datefmt="%m/%d %H:%M:%S", - root_name=name, - abbrev_name=str(abbrev_name), - ) - else: - formatter = plain_formatter - ch.setFormatter(formatter) - logger.addHandler(ch) - - # file logging: all workers - if output is not None: - if output.endswith(".txt") or output.endswith(".log"): - filename = output - else: - filename = os.path.join(output, "log.txt") - if distributed_rank > 0: - filename = filename + ".rank{}".format(distributed_rank) - PathManager.mkdirs(os.path.dirname(filename)) - - fh = logging.StreamHandler(_cached_log_stream(filename)) - fh.setLevel(logging.DEBUG) - fh.setFormatter(plain_formatter) - logger.addHandler(fh) - - return logger - - -# cache the opened file object, so that different calls to `setup_logger` -# with the same file name can safely write to the same file. -@functools.lru_cache(maxsize=None) -def _cached_log_stream(filename): - # use 1K buffer if writing to cloud storage - io = PathManager.open(filename, "a", buffering=_get_log_stream_buffer_size(filename)) - atexit.register(io.close) - return io - - -def _get_log_stream_buffer_size(filename: str) -> int: - if "://" not in filename: - # Local file, no extra caching is necessary - return -1 - # Remote file requires a larger cache to avoid many small writes. - if D2_LOG_BUFFER_SIZE_KEY in os.environ: - return int(os.environ[D2_LOG_BUFFER_SIZE_KEY]) - return DEFAULT_LOG_BUFFER_SIZE - - -""" -Below are some other convenient logging methods. -They are mainly adopted from -https://github.com/abseil/abseil-py/blob/master/absl/logging/__init__.py -""" - - -def _find_caller(): - """ - Returns: - str: module name of the caller - tuple: a hashable key to be used to identify different callers - """ - frame = sys._getframe(2) - while frame: - code = frame.f_code - if os.path.join("utils", "logger.") not in code.co_filename: - mod_name = frame.f_globals["__name__"] - if mod_name == "__main__": - mod_name = "detectron2" - return mod_name, (code.co_filename, frame.f_lineno, code.co_name) - frame = frame.f_back - - -_LOG_COUNTER = Counter() -_LOG_TIMER = {} - - -def log_first_n(lvl, msg, n=1, *, name=None, key="caller"): - """ - Log only for the first n times. - - Args: - lvl (int): the logging level - msg (str): - n (int): - name (str): name of the logger to use. Will use the caller's module by default. - key (str or tuple[str]): the string(s) can be one of "caller" or - "message", which defines how to identify duplicated logs. - For example, if called with `n=1, key="caller"`, this function - will only log the first call from the same caller, regardless of - the message content. - If called with `n=1, key="message"`, this function will log the - same content only once, even if they are called from different places. - If called with `n=1, key=("caller", "message")`, this function - will not log only if the same caller has logged the same message before. - """ - if isinstance(key, str): - key = (key,) - assert len(key) > 0 - - caller_module, caller_key = _find_caller() - hash_key = () - if "caller" in key: - hash_key = hash_key + caller_key - if "message" in key: - hash_key = hash_key + (msg,) - - _LOG_COUNTER[hash_key] += 1 - if _LOG_COUNTER[hash_key] <= n: - logging.getLogger(name or caller_module).log(lvl, msg) - - -def log_every_n(lvl, msg, n=1, *, name=None): - """ - Log once per n times. - - Args: - lvl (int): the logging level - msg (str): - n (int): - name (str): name of the logger to use. Will use the caller's module by default. - """ - caller_module, key = _find_caller() - _LOG_COUNTER[key] += 1 - if n == 1 or _LOG_COUNTER[key] % n == 1: - logging.getLogger(name or caller_module).log(lvl, msg) - - -def log_every_n_seconds(lvl, msg, n=1, *, name=None): - """ - Log no more than once per n seconds. - - Args: - lvl (int): the logging level - msg (str): - n (int): - name (str): name of the logger to use. Will use the caller's module by default. - """ - caller_module, key = _find_caller() - last_logged = _LOG_TIMER.get(key, None) - current_time = time.time() - if last_logged is None or current_time - last_logged >= n: - logging.getLogger(name or caller_module).log(lvl, msg) - _LOG_TIMER[key] = current_time - - -def create_small_table(small_dict): - """ - Create a small table using the keys of small_dict as headers. This is only - suitable for small dictionaries. - - Args: - small_dict (dict): a result dictionary of only a few items. - - Returns: - str: the table as a string. - """ - keys, values = tuple(zip(*small_dict.items())) - table = tabulate( - [values], - headers=keys, - tablefmt="pipe", - floatfmt=".3f", - stralign="center", - numalign="center", - ) - return table - - -def _log_api_usage(identifier: str): - """ - Internal function used to log the usage of different detectron2 components - inside facebook's infra. - """ - torch._C._log_api_usage_once("detectron2." + identifier) diff --git a/detectron2/detectron2/utils/memory.py b/detectron2/detectron2/utils/memory.py deleted file mode 100644 index bd494780b9dbbd1571688cd270bb9b53d113c13e..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/memory.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import logging -from contextlib import contextmanager -from functools import wraps -import torch - -__all__ = ["retry_if_cuda_oom"] - - -@contextmanager -def _ignore_torch_cuda_oom(): - """ - A context which ignores CUDA OOM exception from pytorch. - """ - try: - yield - except RuntimeError as e: - # NOTE: the string may change? - if "CUDA out of memory. " in str(e): - pass - else: - raise - - -def retry_if_cuda_oom(func): - """ - Makes a function retry itself after encountering - pytorch's CUDA OOM error. - It will first retry after calling `torch.cuda.empty_cache()`. - - If that still fails, it will then retry by trying to convert inputs to CPUs. - In this case, it expects the function to dispatch to CPU implementation. - The return values may become CPU tensors as well and it's user's - responsibility to convert it back to CUDA tensor if needed. - - Args: - func: a stateless callable that takes tensor-like objects as arguments - - Returns: - a callable which retries `func` if OOM is encountered. - - Examples: - :: - output = retry_if_cuda_oom(some_torch_function)(input1, input2) - # output may be on CPU even if inputs are on GPU - - Note: - 1. When converting inputs to CPU, it will only look at each argument and check - if it has `.device` and `.to` for conversion. Nested structures of tensors - are not supported. - - 2. Since the function might be called more than once, it has to be - stateless. - """ - - def maybe_to_cpu(x): - try: - like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to") - except AttributeError: - like_gpu_tensor = False - if like_gpu_tensor: - return x.to(device="cpu") - else: - return x - - @wraps(func) - def wrapped(*args, **kwargs): - with _ignore_torch_cuda_oom(): - return func(*args, **kwargs) - - # Clear cache and retry - torch.cuda.empty_cache() - with _ignore_torch_cuda_oom(): - return func(*args, **kwargs) - - # Try on CPU. This slows down the code significantly, therefore print a notice. - logger = logging.getLogger(__name__) - logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func))) - new_args = (maybe_to_cpu(x) for x in args) - new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()} - return func(*new_args, **new_kwargs) - - return wrapped diff --git a/detectron2/detectron2/utils/registry.py b/detectron2/detectron2/utils/registry.py deleted file mode 100644 index 4b01e9007c2578a7b5ae555c926cc06c8a3010f9..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/registry.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -from typing import Any -import pydoc -from fvcore.common.registry import Registry # for backward compatibility. - -""" -``Registry`` and `locate` provide ways to map a string (typically found -in config files) to callable objects. -""" - -__all__ = ["Registry", "locate"] - - -def _convert_target_to_string(t: Any) -> str: - """ - Inverse of ``locate()``. - - Args: - t: any object with ``__module__`` and ``__qualname__`` - """ - module, qualname = t.__module__, t.__qualname__ - - # Compress the path to this object, e.g. ``module.submodule._impl.class`` - # may become ``module.submodule.class``, if the later also resolves to the same - # object. This simplifies the string, and also is less affected by moving the - # class implementation. - module_parts = module.split(".") - for k in range(1, len(module_parts)): - prefix = ".".join(module_parts[:k]) - candidate = f"{prefix}.{qualname}" - try: - if locate(candidate) is t: - return candidate - except ImportError: - pass - return f"{module}.{qualname}" - - -def locate(name: str) -> Any: - """ - Locate and return an object ``x`` using an input string ``{x.__module__}.{x.__qualname__}``, - such as "module.submodule.class_name". - - Raise Exception if it cannot be found. - """ - obj = pydoc.locate(name) - - # Some cases (e.g. torch.optim.sgd.SGD) not handled correctly - # by pydoc.locate. Try a private function from hydra. - if obj is None: - try: - # from hydra.utils import get_method - will print many errors - from hydra.utils import _locate - except ImportError as e: - raise ImportError(f"Cannot dynamically locate object {name}!") from e - else: - obj = _locate(name) # it raises if fails - - return obj diff --git a/detectron2/detectron2/utils/serialize.py b/detectron2/detectron2/utils/serialize.py deleted file mode 100644 index 611903d287c6ccd4195f391bfb134ac2a7b5ddec..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/serialize.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import cloudpickle - - -class PicklableWrapper: - """ - Wrap an object to make it more picklable, note that it uses - heavy weight serialization libraries that are slower than pickle. - It's best to use it only on closures (which are usually not picklable). - - This is a simplified version of - https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py - """ - - def __init__(self, obj): - while isinstance(obj, PicklableWrapper): - # Wrapping an object twice is no-op - obj = obj._obj - self._obj = obj - - def __reduce__(self): - s = cloudpickle.dumps(self._obj) - return cloudpickle.loads, (s,) - - def __call__(self, *args, **kwargs): - return self._obj(*args, **kwargs) - - def __getattr__(self, attr): - # Ensure that the wrapped object can be used seamlessly as the previous object. - if attr not in ["_obj"]: - return getattr(self._obj, attr) - return getattr(self, attr) diff --git a/detectron2/detectron2/utils/testing.py b/detectron2/detectron2/utils/testing.py deleted file mode 100644 index 3f5b9dbe4438e1f5c6976b45bafed8966aee2dd9..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/testing.py +++ /dev/null @@ -1,478 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import io -import numpy as np -import os -import re -import tempfile -import unittest -from typing import Callable -import torch -import torch.onnx.symbolic_helper as sym_help -from packaging import version -from torch._C import ListType -from torch.onnx import register_custom_op_symbolic - -from detectron2 import model_zoo -from detectron2.config import CfgNode, LazyConfig, instantiate -from detectron2.data import DatasetCatalog -from detectron2.data.detection_utils import read_image -from detectron2.modeling import build_model -from detectron2.structures import Boxes, Instances, ROIMasks -from detectron2.utils.file_io import PathManager - - -""" -Internal utilities for tests. Don't use except for writing tests. -""" - - -def get_model_no_weights(config_path): - """ - Like model_zoo.get, but do not load any weights (even pretrained) - """ - cfg = model_zoo.get_config(config_path) - if isinstance(cfg, CfgNode): - if not torch.cuda.is_available(): - cfg.MODEL.DEVICE = "cpu" - return build_model(cfg) - else: - return instantiate(cfg.model) - - -def random_boxes(num_boxes, max_coord=100, device="cpu"): - """ - Create a random Nx4 boxes tensor, with coordinates < max_coord. - """ - boxes = torch.rand(num_boxes, 4, device=device) * (max_coord * 0.5) - boxes.clamp_(min=1.0) # tiny boxes cause numerical instability in box regression - # Note: the implementation of this function in torchvision is: - # boxes[:, 2:] += torch.rand(N, 2) * 100 - # but it does not guarantee non-negative widths/heights constraints: - # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]: - boxes[:, 2:] += boxes[:, :2] - return boxes - - -def get_sample_coco_image(tensor=True): - """ - Args: - tensor (bool): if True, returns 3xHxW tensor. - else, returns a HxWx3 numpy array. - - Returns: - an image, in BGR color. - """ - try: - file_name = DatasetCatalog.get("coco_2017_val_100")[0]["file_name"] - if not PathManager.exists(file_name): - raise FileNotFoundError() - except IOError: - # for public CI to run - file_name = PathManager.get_local_path( - "http://images.cocodataset.org/train2017/000000000009.jpg" - ) - ret = read_image(file_name, format="BGR") - if tensor: - ret = torch.from_numpy(np.ascontiguousarray(ret.transpose(2, 0, 1))) - return ret - - -def convert_scripted_instances(instances): - """ - Convert a scripted Instances object to a regular :class:`Instances` object - """ - assert hasattr( - instances, "image_size" - ), f"Expect an Instances object, but got {type(instances)}!" - ret = Instances(instances.image_size) - for name in instances._field_names: - val = getattr(instances, "_" + name, None) - if val is not None: - ret.set(name, val) - return ret - - -def assert_instances_allclose(input, other, *, rtol=1e-5, msg="", size_as_tensor=False): - """ - Args: - input, other (Instances): - size_as_tensor: compare image_size of the Instances as tensors (instead of tuples). - Useful for comparing outputs of tracing. - """ - if not isinstance(input, Instances): - input = convert_scripted_instances(input) - if not isinstance(other, Instances): - other = convert_scripted_instances(other) - - if not msg: - msg = "Two Instances are different! " - else: - msg = msg.rstrip() + " " - - size_error_msg = msg + f"image_size is {input.image_size} vs. {other.image_size}!" - if size_as_tensor: - assert torch.equal( - torch.tensor(input.image_size), torch.tensor(other.image_size) - ), size_error_msg - else: - assert input.image_size == other.image_size, size_error_msg - fields = sorted(input.get_fields().keys()) - fields_other = sorted(other.get_fields().keys()) - assert fields == fields_other, msg + f"Fields are {fields} vs {fields_other}!" - - for f in fields: - val1, val2 = input.get(f), other.get(f) - if isinstance(val1, (Boxes, ROIMasks)): - # boxes in the range of O(100) and can have a larger tolerance - assert torch.allclose(val1.tensor, val2.tensor, atol=100 * rtol), ( - msg + f"Field {f} differs too much!" - ) - elif isinstance(val1, torch.Tensor): - if val1.dtype.is_floating_point: - mag = torch.abs(val1).max().cpu().item() - assert torch.allclose(val1, val2, atol=mag * rtol), ( - msg + f"Field {f} differs too much!" - ) - else: - assert torch.equal(val1, val2), msg + f"Field {f} is different!" - else: - raise ValueError(f"Don't know how to compare type {type(val1)}") - - -def reload_script_model(module): - """ - Save a jit module and load it back. - Similar to the `getExportImportCopy` function in torch/testing/ - """ - buffer = io.BytesIO() - torch.jit.save(module, buffer) - buffer.seek(0) - return torch.jit.load(buffer) - - -def reload_lazy_config(cfg): - """ - Save an object by LazyConfig.save and load it back. - This is used to test that a config still works the same after - serialization/deserialization. - """ - with tempfile.TemporaryDirectory(prefix="detectron2") as d: - fname = os.path.join(d, "d2_cfg_test.yaml") - LazyConfig.save(cfg, fname) - return LazyConfig.load(fname) - - -def min_torch_version(min_version: str) -> bool: - """ - Returns True when torch's version is at least `min_version`. - """ - try: - import torch - except ImportError: - return False - - installed_version = version.parse(torch.__version__.split("+")[0]) - min_version = version.parse(min_version) - return installed_version >= min_version - - -def has_dynamic_axes(onnx_model): - """ - Return True when all ONNX input/output have only dynamic axes for all ranks - """ - return all( - not dim.dim_param.isnumeric() - for inp in onnx_model.graph.input - for dim in inp.type.tensor_type.shape.dim - ) and all( - not dim.dim_param.isnumeric() - for out in onnx_model.graph.output - for dim in out.type.tensor_type.shape.dim - ) - - -def register_custom_op_onnx_export( - opname: str, symbolic_fn: Callable, opset_version: int, min_version: str -) -> None: - """ - Register `symbolic_fn` as PyTorch's symbolic `opname`-`opset_version` for ONNX export. - The registration is performed only when current PyTorch's version is < `min_version.` - IMPORTANT: symbolic must be manually unregistered after the caller function returns - """ - if min_torch_version(min_version): - return - register_custom_op_symbolic(opname, symbolic_fn, opset_version) - print(f"_register_custom_op_onnx_export({opname}, {opset_version}) succeeded.") - - -def unregister_custom_op_onnx_export(opname: str, opset_version: int, min_version: str) -> None: - """ - Unregister PyTorch's symbolic `opname`-`opset_version` for ONNX export. - The un-registration is performed only when PyTorch's version is < `min_version` - IMPORTANT: The symbolic must have been manually registered by the caller, otherwise - the incorrect symbolic may be unregistered instead. - """ - - # TODO: _unregister_custom_op_symbolic is introduced PyTorch>=1.10 - # Remove after PyTorch 1.10+ is used by ALL detectron2's CI - try: - from torch.onnx import unregister_custom_op_symbolic as _unregister_custom_op_symbolic - except ImportError: - - def _unregister_custom_op_symbolic(symbolic_name, opset_version): - import torch.onnx.symbolic_registry as sym_registry - from torch.onnx.symbolic_helper import _onnx_main_opset, _onnx_stable_opsets - - def _get_ns_op_name_from_custom_op(symbolic_name): - try: - from torch.onnx.utils import get_ns_op_name_from_custom_op - - ns, op_name = get_ns_op_name_from_custom_op(symbolic_name) - except ImportError as import_error: - if not bool( - re.match(r"^[a-zA-Z0-9-_]*::[a-zA-Z-_]+[a-zA-Z0-9-_]*$", symbolic_name) - ): - raise ValueError( - f"Invalid symbolic name {symbolic_name}. Must be `domain::name`" - ) from import_error - - ns, op_name = symbolic_name.split("::") - if ns == "onnx": - raise ValueError(f"{ns} domain cannot be modified.") from import_error - - if ns == "aten": - ns = "" - - return ns, op_name - - def _unregister_op(opname: str, domain: str, version: int): - try: - sym_registry.unregister_op(op_name, ns, ver) - except AttributeError as attribute_error: - if sym_registry.is_registered_op(opname, domain, version): - del sym_registry._registry[(domain, version)][opname] - if not sym_registry._registry[(domain, version)]: - del sym_registry._registry[(domain, version)] - else: - raise RuntimeError( - f"The opname {opname} is not registered." - ) from attribute_error - - ns, op_name = _get_ns_op_name_from_custom_op(symbolic_name) - for ver in _onnx_stable_opsets + [_onnx_main_opset]: - if ver >= opset_version: - _unregister_op(op_name, ns, ver) - - if min_torch_version(min_version): - return - _unregister_custom_op_symbolic(opname, opset_version) - print(f"_unregister_custom_op_onnx_export({opname}, {opset_version}) succeeded.") - - -skipIfOnCPUCI = unittest.skipIf( - os.environ.get("CI") and not torch.cuda.is_available(), - "The test is too slow on CPUs and will be executed on CircleCI's GPU jobs.", -) - - -def skipIfUnsupportedMinOpsetVersion(min_opset_version, current_opset_version=None): - """ - Skips tests for ONNX Opset versions older than min_opset_version. - """ - - def skip_dec(func): - def wrapper(self): - try: - opset_version = self.opset_version - except AttributeError: - opset_version = current_opset_version - if opset_version < min_opset_version: - raise unittest.SkipTest( - f"Unsupported opset_version {opset_version}" - f", required is {min_opset_version}" - ) - return func(self) - - return wrapper - - return skip_dec - - -def skipIfUnsupportedMinTorchVersion(min_version): - """ - Skips tests for PyTorch versions older than min_version. - """ - reason = f"module 'torch' has __version__ {torch.__version__}" f", required is: {min_version}" - return unittest.skipIf(not min_torch_version(min_version), reason) - - -# TODO: Remove after PyTorch 1.11.1+ is used by detectron2's CI -def _pytorch1111_symbolic_opset9_to(g, self, *args): - """aten::to() symbolic that must be used for testing with PyTorch < 1.11.1.""" - - def is_aten_to_device_only(args): - if len(args) == 4: - # aten::to(Tensor, Device, bool, bool, memory_format) - return ( - args[0].node().kind() == "prim::device" - or args[0].type().isSubtypeOf(ListType.ofInts()) - or ( - sym_help._is_value(args[0]) - and args[0].node().kind() == "onnx::Constant" - and isinstance(args[0].node()["value"], str) - ) - ) - elif len(args) == 5: - # aten::to(Tensor, Device, ScalarType, bool, bool, memory_format) - # When dtype is None, this is a aten::to(device) call - dtype = sym_help._get_const(args[1], "i", "dtype") - return dtype is None - elif len(args) in (6, 7): - # aten::to(Tensor, ScalarType, Layout, Device, bool, bool, memory_format) - # aten::to(Tensor, ScalarType, Layout, Device, bool, bool, bool, memory_format) - # When dtype is None, this is a aten::to(device) call - dtype = sym_help._get_const(args[0], "i", "dtype") - return dtype is None - return False - - # ONNX doesn't have a concept of a device, so we ignore device-only casts - if is_aten_to_device_only(args): - return self - - if len(args) == 4: - # TestONNXRuntime::test_ones_bool shows args[0] of aten::to can be onnx::Constant[Tensor] - # In this case, the constant value is a tensor not int, - # so sym_help._maybe_get_const(args[0], 'i') would not work. - dtype = args[0] - if sym_help._is_value(args[0]) and args[0].node().kind() == "onnx::Constant": - tval = args[0].node()["value"] - if isinstance(tval, torch.Tensor): - if len(tval.shape) == 0: - tval = tval.item() - dtype = int(tval) - else: - dtype = tval - - if sym_help._is_value(dtype) or isinstance(dtype, torch.Tensor): - # aten::to(Tensor, Tensor, bool, bool, memory_format) - dtype = args[0].type().scalarType() - return g.op("Cast", self, to_i=sym_help.cast_pytorch_to_onnx[dtype]) - else: - # aten::to(Tensor, ScalarType, bool, bool, memory_format) - # memory_format is ignored - return g.op("Cast", self, to_i=sym_help.scalar_type_to_onnx[dtype]) - elif len(args) == 5: - # aten::to(Tensor, Device, ScalarType, bool, bool, memory_format) - dtype = sym_help._get_const(args[1], "i", "dtype") - # memory_format is ignored - return g.op("Cast", self, to_i=sym_help.scalar_type_to_onnx[dtype]) - elif len(args) == 6: - # aten::to(Tensor, ScalarType, Layout, Device, bool, bool, memory_format) - dtype = sym_help._get_const(args[0], "i", "dtype") - # Layout, device and memory_format are ignored - return g.op("Cast", self, to_i=sym_help.scalar_type_to_onnx[dtype]) - elif len(args) == 7: - # aten::to(Tensor, ScalarType, Layout, Device, bool, bool, bool, memory_format) - dtype = sym_help._get_const(args[0], "i", "dtype") - # Layout, device and memory_format are ignored - return g.op("Cast", self, to_i=sym_help.scalar_type_to_onnx[dtype]) - else: - return sym_help._onnx_unsupported("Unknown aten::to signature") - - -# TODO: Remove after PyTorch 1.11.1+ is used by detectron2's CI -def _pytorch1111_symbolic_opset9_repeat_interleave(g, self, repeats, dim=None, output_size=None): - - # from torch.onnx.symbolic_helper import ScalarType - from torch.onnx.symbolic_opset9 import expand, unsqueeze - - input = self - # if dim is None flatten - # By default, use the flattened input array, and return a flat output array - if sym_help._is_none(dim): - input = sym_help._reshape_helper(g, self, g.op("Constant", value_t=torch.tensor([-1]))) - dim = 0 - else: - dim = sym_help._maybe_get_scalar(dim) - - repeats_dim = sym_help._get_tensor_rank(repeats) - repeats_sizes = sym_help._get_tensor_sizes(repeats) - input_sizes = sym_help._get_tensor_sizes(input) - if repeats_dim is None: - raise RuntimeError( - "Unsupported: ONNX export of repeat_interleave for unknown " "repeats rank." - ) - if repeats_sizes is None: - raise RuntimeError( - "Unsupported: ONNX export of repeat_interleave for unknown " "repeats size." - ) - if input_sizes is None: - raise RuntimeError( - "Unsupported: ONNX export of repeat_interleave for unknown " "input size." - ) - - input_sizes_temp = input_sizes.copy() - for idx, input_size in enumerate(input_sizes): - if input_size is None: - input_sizes[idx], input_sizes_temp[idx] = 0, -1 - - # Cases where repeats is an int or single value tensor - if repeats_dim == 0 or (repeats_dim == 1 and repeats_sizes[0] == 1): - if not sym_help._is_tensor(repeats): - repeats = g.op("Constant", value_t=torch.LongTensor(repeats)) - if input_sizes[dim] == 0: - return sym_help._onnx_opset_unsupported_detailed( - "repeat_interleave", - 9, - 13, - "Unsupported along dimension with unknown input size", - ) - else: - reps = input_sizes[dim] - repeats = expand(g, repeats, g.op("Constant", value_t=torch.tensor([reps])), None) - - # Cases where repeats is a 1 dim Tensor - elif repeats_dim == 1: - if input_sizes[dim] == 0: - return sym_help._onnx_opset_unsupported_detailed( - "repeat_interleave", - 9, - 13, - "Unsupported along dimension with unknown input size", - ) - if repeats_sizes[0] is None: - return sym_help._onnx_opset_unsupported_detailed( - "repeat_interleave", 9, 13, "Unsupported for cases with dynamic repeats" - ) - assert ( - repeats_sizes[0] == input_sizes[dim] - ), "repeats must have the same size as input along dim" - reps = repeats_sizes[0] - else: - raise RuntimeError("repeats must be 0-dim or 1-dim tensor") - - final_splits = list() - r_splits = sym_help._repeat_interleave_split_helper(g, repeats, reps, 0) - if isinstance(r_splits, torch._C.Value): - r_splits = [r_splits] - i_splits = sym_help._repeat_interleave_split_helper(g, input, reps, dim) - if isinstance(i_splits, torch._C.Value): - i_splits = [i_splits] - input_sizes[dim], input_sizes_temp[dim] = -1, 1 - for idx, r_split in enumerate(r_splits): - i_split = unsqueeze(g, i_splits[idx], dim + 1) - r_concat = [ - g.op("Constant", value_t=torch.LongTensor(input_sizes_temp[: dim + 1])), - r_split, - g.op("Constant", value_t=torch.LongTensor(input_sizes_temp[dim + 1 :])), - ] - r_concat = g.op("Concat", *r_concat, axis_i=0) - i_split = expand(g, i_split, r_concat, None) - i_split = sym_help._reshape_helper( - g, - i_split, - g.op("Constant", value_t=torch.LongTensor(input_sizes)), - allowzero=0, - ) - final_splits.append(i_split) - return g.op("Concat", *final_splits, axis_i=dim) diff --git a/detectron2/detectron2/utils/tracing.py b/detectron2/detectron2/utils/tracing.py deleted file mode 100644 index 002c0a3ee4892be23dfbcd103028d99911a32461..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/tracing.py +++ /dev/null @@ -1,73 +0,0 @@ -import inspect -import torch - -from detectron2.utils.env import TORCH_VERSION - -try: - from torch.fx._symbolic_trace import is_fx_tracing as is_fx_tracing_current - - tracing_current_exists = True -except ImportError: - tracing_current_exists = False - -try: - from torch.fx._symbolic_trace import _orig_module_call - - tracing_legacy_exists = True -except ImportError: - tracing_legacy_exists = False - - -@torch.jit.ignore -def is_fx_tracing_legacy() -> bool: - """ - Returns a bool indicating whether torch.fx is currently symbolically tracing a module. - Can be useful for gating module logic that is incompatible with symbolic tracing. - """ - return torch.nn.Module.__call__ is not _orig_module_call - - -def is_fx_tracing() -> bool: - """Returns whether execution is currently in - Torch FX tracing mode""" - if torch.jit.is_scripting(): - return False - if TORCH_VERSION >= (1, 10) and tracing_current_exists: - return is_fx_tracing_current() - elif tracing_legacy_exists: - return is_fx_tracing_legacy() - else: - # Can't find either current or legacy tracing indication code. - # Enabling this assert_fx_safe() call regardless of tracing status. - return False - - -def assert_fx_safe(condition: bool, message: str) -> torch.Tensor: - """An FX-tracing safe version of assert. - Avoids erroneous type assertion triggering when types are masked inside - an fx.proxy.Proxy object during tracing. - Args: condition - either a boolean expression or a string representing - the condition to test. If this assert triggers an exception when tracing - due to dynamic control flow, try encasing the expression in quotation - marks and supplying it as a string.""" - # Must return a concrete tensor for compatibility with PyTorch <=1.8. - # If <=1.8 compatibility is not needed, return type can be converted to None - if torch.jit.is_scripting() or is_fx_tracing(): - return torch.zeros(1) - return _do_assert_fx_safe(condition, message) - - -def _do_assert_fx_safe(condition: bool, message: str) -> torch.Tensor: - try: - if isinstance(condition, str): - caller_frame = inspect.currentframe().f_back - torch._assert(eval(condition, caller_frame.f_globals, caller_frame.f_locals), message) - return torch.ones(1) - else: - torch._assert(condition, message) - return torch.ones(1) - except torch.fx.proxy.TraceError as e: - print( - "Found a non-FX compatible assertion. Skipping the check. Failure is shown below" - + str(e) - ) diff --git a/detectron2/detectron2/utils/video_visualizer.py b/detectron2/detectron2/utils/video_visualizer.py deleted file mode 100644 index 42685be53bb09bab8420b1bcd4d63d8dc6ba7cab..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/video_visualizer.py +++ /dev/null @@ -1,287 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -from typing import List -import pycocotools.mask as mask_util - -from detectron2.structures import Instances -from detectron2.utils.visualizer import ( - ColorMode, - Visualizer, - _create_text_labels, - _PanopticPrediction, -) - -from .colormap import random_color, random_colors - - -class _DetectedInstance: - """ - Used to store data about detected objects in video frame, - in order to transfer color to objects in the future frames. - - Attributes: - label (int): - bbox (tuple[float]): - mask_rle (dict): - color (tuple[float]): RGB colors in range (0, 1) - ttl (int): time-to-live for the instance. For example, if ttl=2, - the instance color can be transferred to objects in the next two frames. - """ - - __slots__ = ["label", "bbox", "mask_rle", "color", "ttl"] - - def __init__(self, label, bbox, mask_rle, color, ttl): - self.label = label - self.bbox = bbox - self.mask_rle = mask_rle - self.color = color - self.ttl = ttl - - -class VideoVisualizer: - def __init__(self, metadata, instance_mode=ColorMode.IMAGE): - """ - Args: - metadata (MetadataCatalog): image metadata. - """ - self.metadata = metadata - self._old_instances = [] - assert instance_mode in [ - ColorMode.IMAGE, - ColorMode.IMAGE_BW, - ], "Other mode not supported yet." - self._instance_mode = instance_mode - self._max_num_instances = self.metadata.get("max_num_instances", 74) - self._assigned_colors = {} - self._color_pool = random_colors(self._max_num_instances, rgb=True, maximum=1) - self._color_idx_set = set(range(len(self._color_pool))) - - def draw_instance_predictions(self, frame, predictions): - """ - Draw instance-level prediction results on an image. - - Args: - frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. - predictions (Instances): the output of an instance detection/segmentation - model. Following fields will be used to draw: - "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). - - Returns: - output (VisImage): image object with visualizations. - """ - frame_visualizer = Visualizer(frame, self.metadata) - num_instances = len(predictions) - if num_instances == 0: - return frame_visualizer.output - - boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None - scores = predictions.scores if predictions.has("scores") else None - classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None - keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None - colors = predictions.COLOR if predictions.has("COLOR") else [None] * len(predictions) - periods = predictions.ID_period if predictions.has("ID_period") else None - period_threshold = self.metadata.get("period_threshold", 0) - visibilities = ( - [True] * len(predictions) - if periods is None - else [x > period_threshold for x in periods] - ) - - if predictions.has("pred_masks"): - masks = predictions.pred_masks - # mask IOU is not yet enabled - # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) - # assert len(masks_rles) == num_instances - else: - masks = None - - if not predictions.has("COLOR"): - if predictions.has("ID"): - colors = self._assign_colors_by_id(predictions) - else: - # ToDo: clean old assign color method and use a default tracker to assign id - detected = [ - _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=colors[i], ttl=8) - for i in range(num_instances) - ] - colors = self._assign_colors(detected) - - labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) - - if self._instance_mode == ColorMode.IMAGE_BW: - # any() returns uint8 tensor - frame_visualizer.output.reset_image( - frame_visualizer._create_grayscale_image( - (masks.any(dim=0) > 0).numpy() if masks is not None else None - ) - ) - alpha = 0.3 - else: - alpha = 0.5 - - labels = ( - None - if labels is None - else [y[0] for y in filter(lambda x: x[1], zip(labels, visibilities))] - ) # noqa - assigned_colors = ( - None - if colors is None - else [y[0] for y in filter(lambda x: x[1], zip(colors, visibilities))] - ) # noqa - frame_visualizer.overlay_instances( - boxes=None if masks is not None else boxes[visibilities], # boxes are a bit distracting - masks=None if masks is None else masks[visibilities], - labels=labels, - keypoints=None if keypoints is None else keypoints[visibilities], - assigned_colors=assigned_colors, - alpha=alpha, - ) - - return frame_visualizer.output - - def draw_sem_seg(self, frame, sem_seg, area_threshold=None): - """ - Args: - sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W), - each value is the integer label. - area_threshold (Optional[int]): only draw segmentations larger than the threshold - """ - # don't need to do anything special - frame_visualizer = Visualizer(frame, self.metadata) - frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None) - return frame_visualizer.output - - def draw_panoptic_seg_predictions( - self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5 - ): - frame_visualizer = Visualizer(frame, self.metadata) - pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata) - - if self._instance_mode == ColorMode.IMAGE_BW: - frame_visualizer.output.reset_image( - frame_visualizer._create_grayscale_image(pred.non_empty_mask()) - ) - - # draw mask for all semantic segments first i.e. "stuff" - for mask, sinfo in pred.semantic_masks(): - category_idx = sinfo["category_id"] - try: - mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] - except AttributeError: - mask_color = None - - frame_visualizer.draw_binary_mask( - mask, - color=mask_color, - text=self.metadata.stuff_classes[category_idx], - alpha=alpha, - area_threshold=area_threshold, - ) - - all_instances = list(pred.instance_masks()) - if len(all_instances) == 0: - return frame_visualizer.output - # draw mask for all instances second - masks, sinfo = list(zip(*all_instances)) - num_instances = len(masks) - masks_rles = mask_util.encode( - np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F") - ) - assert len(masks_rles) == num_instances - - category_ids = [x["category_id"] for x in sinfo] - detected = [ - _DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8) - for i in range(num_instances) - ] - colors = self._assign_colors(detected) - labels = [self.metadata.thing_classes[k] for k in category_ids] - - frame_visualizer.overlay_instances( - boxes=None, - masks=masks, - labels=labels, - keypoints=None, - assigned_colors=colors, - alpha=alpha, - ) - return frame_visualizer.output - - def _assign_colors(self, instances): - """ - Naive tracking heuristics to assign same color to the same instance, - will update the internal state of tracked instances. - - Returns: - list[tuple[float]]: list of colors. - """ - - # Compute iou with either boxes or masks: - is_crowd = np.zeros((len(instances),), dtype=bool) - if instances[0].bbox is None: - assert instances[0].mask_rle is not None - # use mask iou only when box iou is None - # because box seems good enough - rles_old = [x.mask_rle for x in self._old_instances] - rles_new = [x.mask_rle for x in instances] - ious = mask_util.iou(rles_old, rles_new, is_crowd) - threshold = 0.5 - else: - boxes_old = [x.bbox for x in self._old_instances] - boxes_new = [x.bbox for x in instances] - ious = mask_util.iou(boxes_old, boxes_new, is_crowd) - threshold = 0.6 - if len(ious) == 0: - ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32") - - # Only allow matching instances of the same label: - for old_idx, old in enumerate(self._old_instances): - for new_idx, new in enumerate(instances): - if old.label != new.label: - ious[old_idx, new_idx] = 0 - - matched_new_per_old = np.asarray(ious).argmax(axis=1) - max_iou_per_old = np.asarray(ious).max(axis=1) - - # Try to find match for each old instance: - extra_instances = [] - for idx, inst in enumerate(self._old_instances): - if max_iou_per_old[idx] > threshold: - newidx = matched_new_per_old[idx] - if instances[newidx].color is None: - instances[newidx].color = inst.color - continue - # If an old instance does not match any new instances, - # keep it for the next frame in case it is just missed by the detector - inst.ttl -= 1 - if inst.ttl > 0: - extra_instances.append(inst) - - # Assign random color to newly-detected instances: - for inst in instances: - if inst.color is None: - inst.color = random_color(rgb=True, maximum=1) - self._old_instances = instances[:] + extra_instances - return [d.color for d in instances] - - def _assign_colors_by_id(self, instances: Instances) -> List: - colors = [] - untracked_ids = set(self._assigned_colors.keys()) - for id in instances.ID: - if id in self._assigned_colors: - colors.append(self._color_pool[self._assigned_colors[id]]) - untracked_ids.remove(id) - else: - assert ( - len(self._color_idx_set) >= 1 - ), f"Number of id exceeded maximum, \ - max = {self._max_num_instances}" - idx = self._color_idx_set.pop() - color = self._color_pool[idx] - self._assigned_colors[id] = idx - colors.append(color) - for id in untracked_ids: - self._color_idx_set.add(self._assigned_colors[id]) - del self._assigned_colors[id] - return colors diff --git a/detectron2/detectron2/utils/visualizer.py b/detectron2/detectron2/utils/visualizer.py deleted file mode 100644 index bb6c24ee971c616e03bc55cb6359260d530dfc4e..0000000000000000000000000000000000000000 --- a/detectron2/detectron2/utils/visualizer.py +++ /dev/null @@ -1,1281 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import colorsys -import logging -import math -import numpy as np -from enum import Enum, unique -import cv2 -import matplotlib as mpl -import matplotlib.colors as mplc -import matplotlib.figure as mplfigure -import pycocotools.mask as mask_util -import torch -from matplotlib.backends.backend_agg import FigureCanvasAgg -from PIL import Image - -from detectron2.data import MetadataCatalog -from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes -from detectron2.utils.file_io import PathManager - -from .colormap import random_color - -logger = logging.getLogger(__name__) - -__all__ = ["ColorMode", "VisImage", "Visualizer"] - - -_SMALL_OBJECT_AREA_THRESH = 1000 -_LARGE_MASK_AREA_THRESH = 120000 -_OFF_WHITE = (1.0, 1.0, 240.0 / 255) -_BLACK = (0, 0, 0) -_RED = (1.0, 0, 0) - -_KEYPOINT_THRESHOLD = 0.05 - - -@unique -class ColorMode(Enum): - """ - Enum of different color modes to use for instance visualizations. - """ - - IMAGE = 0 - """ - Picks a random color for every instance and overlay segmentations with low opacity. - """ - SEGMENTATION = 1 - """ - Let instances of the same category have similar colors - (from metadata.thing_colors), and overlay them with - high opacity. This provides more attention on the quality of segmentation. - """ - IMAGE_BW = 2 - """ - Same as IMAGE, but convert all areas without masks to gray-scale. - Only available for drawing per-instance mask predictions. - """ - - -class GenericMask: - """ - Attribute: - polygons (list[ndarray]): list[ndarray]: polygons for this mask. - Each ndarray has format [x, y, x, y, ...] - mask (ndarray): a binary mask - """ - - def __init__(self, mask_or_polygons, height, width): - self._mask = self._polygons = self._has_holes = None - self.height = height - self.width = width - - m = mask_or_polygons - if isinstance(m, dict): - # RLEs - assert "counts" in m and "size" in m - if isinstance(m["counts"], list): # uncompressed RLEs - h, w = m["size"] - assert h == height and w == width - m = mask_util.frPyObjects(m, h, w) - self._mask = mask_util.decode(m)[:, :] - return - - if isinstance(m, list): # list[ndarray] - self._polygons = [np.asarray(x).reshape(-1) for x in m] - return - - if isinstance(m, np.ndarray): # assumed to be a binary mask - assert m.shape[1] != 2, m.shape - assert m.shape == ( - height, - width, - ), f"mask shape: {m.shape}, target dims: {height}, {width}" - self._mask = m.astype("uint8") - return - - raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m))) - - @property - def mask(self): - if self._mask is None: - self._mask = self.polygons_to_mask(self._polygons) - return self._mask - - @property - def polygons(self): - if self._polygons is None: - self._polygons, self._has_holes = self.mask_to_polygons(self._mask) - return self._polygons - - @property - def has_holes(self): - if self._has_holes is None: - if self._mask is not None: - self._polygons, self._has_holes = self.mask_to_polygons(self._mask) - else: - self._has_holes = False # if original format is polygon, does not have holes - return self._has_holes - - def mask_to_polygons(self, mask): - # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level - # hierarchy. External contours (boundary) of the object are placed in hierarchy-1. - # Internal contours (holes) are placed in hierarchy-2. - # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours. - mask = np.ascontiguousarray(mask) # some versions of cv2 does not support incontiguous arr - res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) - hierarchy = res[-1] - if hierarchy is None: # empty mask - return [], False - has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0 - res = res[-2] - res = [x.flatten() for x in res] - # These coordinates from OpenCV are integers in range [0, W-1 or H-1]. - # We add 0.5 to turn them into real-value coordinate space. A better solution - # would be to first +0.5 and then dilate the returned polygon by 0.5. - res = [x + 0.5 for x in res if len(x) >= 6] - return res, has_holes - - def polygons_to_mask(self, polygons): - rle = mask_util.frPyObjects(polygons, self.height, self.width) - rle = mask_util.merge(rle) - return mask_util.decode(rle)[:, :] - - def area(self): - return self.mask.sum() - - def bbox(self): - p = mask_util.frPyObjects(self.polygons, self.height, self.width) - p = mask_util.merge(p) - bbox = mask_util.toBbox(p) - bbox[2] += bbox[0] - bbox[3] += bbox[1] - return bbox - - -class _PanopticPrediction: - """ - Unify different panoptic annotation/prediction formats - """ - - def __init__(self, panoptic_seg, segments_info, metadata=None): - if segments_info is None: - assert metadata is not None - # If "segments_info" is None, we assume "panoptic_img" is a - # H*W int32 image storing the panoptic_id in the format of - # category_id * label_divisor + instance_id. We reserve -1 for - # VOID label. - label_divisor = metadata.label_divisor - segments_info = [] - for panoptic_label in np.unique(panoptic_seg.numpy()): - if panoptic_label == -1: - # VOID region. - continue - pred_class = panoptic_label // label_divisor - isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values() - segments_info.append( - { - "id": int(panoptic_label), - "category_id": int(pred_class), - "isthing": bool(isthing), - } - ) - del metadata - - self._seg = panoptic_seg - - self._sinfo = {s["id"]: s for s in segments_info} # seg id -> seg info - segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True) - areas = areas.numpy() - sorted_idxs = np.argsort(-areas) - self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs] - self._seg_ids = self._seg_ids.tolist() - for sid, area in zip(self._seg_ids, self._seg_areas): - if sid in self._sinfo: - self._sinfo[sid]["area"] = float(area) - - def non_empty_mask(self): - """ - Returns: - (H, W) array, a mask for all pixels that have a prediction - """ - empty_ids = [] - for id in self._seg_ids: - if id not in self._sinfo: - empty_ids.append(id) - if len(empty_ids) == 0: - return np.zeros(self._seg.shape, dtype=np.uint8) - assert ( - len(empty_ids) == 1 - ), ">1 ids corresponds to no labels. This is currently not supported" - return (self._seg != empty_ids[0]).numpy().astype(bool) - - def semantic_masks(self): - for sid in self._seg_ids: - sinfo = self._sinfo.get(sid) - if sinfo is None or sinfo["isthing"]: - # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions. - continue - yield (self._seg == sid).numpy().astype(bool), sinfo - - def instance_masks(self): - for sid in self._seg_ids: - sinfo = self._sinfo.get(sid) - if sinfo is None or not sinfo["isthing"]: - continue - mask = (self._seg == sid).numpy().astype(bool) - if mask.sum() > 0: - yield mask, sinfo - - -def _create_text_labels(classes, scores, class_names, is_crowd=None): - """ - Args: - classes (list[int] or None): - scores (list[float] or None): - class_names (list[str] or None): - is_crowd (list[bool] or None): - - Returns: - list[str] or None - """ - labels = None - if classes is not None: - if class_names is not None and len(class_names) > 0: - labels = [class_names[i] for i in classes] - else: - labels = [str(i) for i in classes] - if scores is not None: - if labels is None: - labels = ["{:.0f}%".format(s * 100) for s in scores] - else: - labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)] - if labels is not None and is_crowd is not None: - labels = [l + ("|crowd" if crowd else "") for l, crowd in zip(labels, is_crowd)] - return labels - - -class VisImage: - def __init__(self, img, scale=1.0): - """ - Args: - img (ndarray): an RGB image of shape (H, W, 3) in range [0, 255]. - scale (float): scale the input image - """ - self.img = img - self.scale = scale - self.width, self.height = img.shape[1], img.shape[0] - self._setup_figure(img) - - def _setup_figure(self, img): - """ - Args: - Same as in :meth:`__init__()`. - - Returns: - fig (matplotlib.pyplot.figure): top level container for all the image plot elements. - ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system. - """ - fig = mplfigure.Figure(frameon=False) - self.dpi = fig.get_dpi() - # add a small 1e-2 to avoid precision lost due to matplotlib's truncation - # (https://github.com/matplotlib/matplotlib/issues/15363) - fig.set_size_inches( - (self.width * self.scale + 1e-2) / self.dpi, - (self.height * self.scale + 1e-2) / self.dpi, - ) - self.canvas = FigureCanvasAgg(fig) - # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig) - ax = fig.add_axes([0.0, 0.0, 1.0, 1.0]) - ax.axis("off") - self.fig = fig - self.ax = ax - self.reset_image(img) - - def reset_image(self, img): - """ - Args: - img: same as in __init__ - """ - img = img.astype("uint8") - self.ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest") - - def save(self, filepath): - """ - Args: - filepath (str): a string that contains the absolute path, including the file name, where - the visualized image will be saved. - """ - self.fig.savefig(filepath) - - def get_image(self): - """ - Returns: - ndarray: - the visualized image of shape (H, W, 3) (RGB) in uint8 type. - The shape is scaled w.r.t the input image using the given `scale` argument. - """ - canvas = self.canvas - s, (width, height) = canvas.print_to_buffer() - # buf = io.BytesIO() # works for cairo backend - # canvas.print_rgba(buf) - # width, height = self.width, self.height - # s = buf.getvalue() - - buffer = np.frombuffer(s, dtype="uint8") - - img_rgba = buffer.reshape(height, width, 4) - rgb, alpha = np.split(img_rgba, [3], axis=2) - return rgb.astype("uint8") - - -class Visualizer: - """ - Visualizer that draws data about detection/segmentation on images. - - It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}` - that draw primitive objects to images, as well as high-level wrappers like - `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}` - that draw composite data in some pre-defined style. - - Note that the exact visualization style for the high-level wrappers are subject to change. - Style such as color, opacity, label contents, visibility of labels, or even the visibility - of objects themselves (e.g. when the object is too small) may change according - to different heuristics, as long as the results still look visually reasonable. - - To obtain a consistent style, you can implement custom drawing functions with the - abovementioned primitive methods instead. If you need more customized visualization - styles, you can process the data yourself following their format documented in - tutorials (:doc:`/tutorials/models`, :doc:`/tutorials/datasets`). This class does not - intend to satisfy everyone's preference on drawing styles. - - This visualizer focuses on high rendering quality rather than performance. It is not - designed to be used for real-time applications. - """ - - # TODO implement a fast, rasterized version using OpenCV - - def __init__( - self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE, font_size_scale=1.0 - ): - """ - Args: - img_rgb: a numpy array of shape (H, W, C), where H and W correspond to - the height and width of the image respectively. C is the number of - color channels. The image is required to be in RGB format since that - is a requirement of the Matplotlib library. The image is also expected - to be in the range [0, 255]. - metadata (Metadata): dataset metadata (e.g. class names and colors) - instance_mode (ColorMode): defines one of the pre-defined style for drawing - instances on an image. - font_size_scale: extra scaling of font size on top of default font size - """ - self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8) - if metadata is None: - metadata = MetadataCatalog.get("__nonexist__") - self.metadata = metadata - self.output = VisImage(self.img, scale=scale) - self.cpu_device = torch.device("cpu") - - # too small texts are useless, therefore clamp to 9 - self._default_font_size = ( - max(np.sqrt(self.output.height * self.output.width) // 90, 10 // scale) - * font_size_scale - ) - self._instance_mode = instance_mode - self.keypoint_threshold = _KEYPOINT_THRESHOLD - - def draw_instance_predictions(self, predictions, jittering: bool = True): - """ - Draw instance-level prediction results on an image. - - Args: - predictions (Instances): the output of an instance detection/segmentation - model. Following fields will be used to draw: - "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). - jittering: if True, in color mode SEGMENTATION, randomly jitter the colors per class - to distinguish instances from the same class - - Returns: - output (VisImage): image object with visualizations. - """ - boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None - scores = predictions.scores if predictions.has("scores") else None - classes = predictions.pred_classes.tolist() if predictions.has("pred_classes") else None - labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) - keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None - - if predictions.has("pred_masks"): - masks = np.asarray(predictions.pred_masks) - masks = [GenericMask(x, self.output.height, self.output.width) for x in masks] - else: - masks = None - - if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"): - colors = ( - [self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes] - if jittering - else [ - tuple(mplc.to_rgb([x / 255 for x in self.metadata.thing_colors[c]])) - for c in classes - ] - ) - - alpha = 0.8 - else: - colors = None - alpha = 0.5 - - if self._instance_mode == ColorMode.IMAGE_BW: - self.output.reset_image( - self._create_grayscale_image( - (predictions.pred_masks.any(dim=0) > 0).numpy() - if predictions.has("pred_masks") - else None - ) - ) - alpha = 0.3 - - self.overlay_instances( - masks=masks, - boxes=boxes, - labels=labels, - keypoints=keypoints, - assigned_colors=colors, - alpha=alpha, - ) - return self.output - - def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8): - """ - Draw semantic segmentation predictions/labels. - - Args: - sem_seg (Tensor or ndarray): the segmentation of shape (H, W). - Each value is the integer label of the pixel. - area_threshold (int): segments with less than `area_threshold` are not drawn. - alpha (float): the larger it is, the more opaque the segmentations are. - - Returns: - output (VisImage): image object with visualizations. - """ - if isinstance(sem_seg, torch.Tensor): - sem_seg = sem_seg.numpy() - labels, areas = np.unique(sem_seg, return_counts=True) - sorted_idxs = np.argsort(-areas).tolist() - labels = labels[sorted_idxs] - for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels): - try: - mask_color = [x / 255 for x in self.metadata.stuff_colors[label]] - except (AttributeError, IndexError): - mask_color = None - - binary_mask = (sem_seg == label).astype(np.uint8) - text = self.metadata.stuff_classes[label] - self.draw_binary_mask( - binary_mask, - color=mask_color, - edge_color=_OFF_WHITE, - text=text, - alpha=alpha, - area_threshold=area_threshold, - ) - return self.output - - def draw_panoptic_seg(self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7): - """ - Draw panoptic prediction annotations or results. - - Args: - panoptic_seg (Tensor): of shape (height, width) where the values are ids for each - segment. - segments_info (list[dict] or None): Describe each segment in `panoptic_seg`. - If it is a ``list[dict]``, each dict contains keys "id", "category_id". - If None, category id of each pixel is computed by - ``pixel // metadata.label_divisor``. - area_threshold (int): stuff segments with less than `area_threshold` are not drawn. - - Returns: - output (VisImage): image object with visualizations. - """ - pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata) - - if self._instance_mode == ColorMode.IMAGE_BW: - self.output.reset_image(self._create_grayscale_image(pred.non_empty_mask())) - - # draw mask for all semantic segments first i.e. "stuff" - for mask, sinfo in pred.semantic_masks(): - category_idx = sinfo["category_id"] - try: - mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] - except AttributeError: - mask_color = None - - text = self.metadata.stuff_classes[category_idx] - self.draw_binary_mask( - mask, - color=mask_color, - edge_color=_OFF_WHITE, - text=text, - alpha=alpha, - area_threshold=area_threshold, - ) - - # draw mask for all instances second - all_instances = list(pred.instance_masks()) - if len(all_instances) == 0: - return self.output - masks, sinfo = list(zip(*all_instances)) - category_ids = [x["category_id"] for x in sinfo] - - try: - scores = [x["score"] for x in sinfo] - except KeyError: - scores = None - labels = _create_text_labels( - category_ids, scores, self.metadata.thing_classes, [x.get("iscrowd", 0) for x in sinfo] - ) - - try: - colors = [ - self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids - ] - except AttributeError: - colors = None - self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha) - - return self.output - - draw_panoptic_seg_predictions = draw_panoptic_seg # backward compatibility - - def draw_dataset_dict(self, dic): - """ - Draw annotations/segmentations in Detectron2 Dataset format. - - Args: - dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format. - - Returns: - output (VisImage): image object with visualizations. - """ - annos = dic.get("annotations", None) - if annos: - if "segmentation" in annos[0]: - masks = [x["segmentation"] for x in annos] - else: - masks = None - if "keypoints" in annos[0]: - keypts = [x["keypoints"] for x in annos] - keypts = np.array(keypts).reshape(len(annos), -1, 3) - else: - keypts = None - - boxes = [ - ( - BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) - if len(x["bbox"]) == 4 - else x["bbox"] - ) - for x in annos - ] - - colors = None - category_ids = [x["category_id"] for x in annos] - if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"): - colors = [ - self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) - for c in category_ids - ] - names = self.metadata.get("thing_classes", None) - labels = _create_text_labels( - category_ids, - scores=None, - class_names=names, - is_crowd=[x.get("iscrowd", 0) for x in annos], - ) - self.overlay_instances( - labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors - ) - - sem_seg = dic.get("sem_seg", None) - if sem_seg is None and "sem_seg_file_name" in dic: - with PathManager.open(dic["sem_seg_file_name"], "rb") as f: - sem_seg = Image.open(f) - sem_seg = np.asarray(sem_seg, dtype="uint8") - if sem_seg is not None: - self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5) - - pan_seg = dic.get("pan_seg", None) - if pan_seg is None and "pan_seg_file_name" in dic: - with PathManager.open(dic["pan_seg_file_name"], "rb") as f: - pan_seg = Image.open(f) - pan_seg = np.asarray(pan_seg) - from panopticapi.utils import rgb2id - - pan_seg = rgb2id(pan_seg) - if pan_seg is not None: - segments_info = dic["segments_info"] - pan_seg = torch.tensor(pan_seg) - self.draw_panoptic_seg(pan_seg, segments_info, area_threshold=0, alpha=0.5) - return self.output - - def overlay_instances( - self, - *, - boxes=None, - labels=None, - masks=None, - keypoints=None, - assigned_colors=None, - alpha=0.5, - ): - """ - Args: - boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`, - or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image, - or a :class:`RotatedBoxes`, - or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format - for the N objects in a single image, - labels (list[str]): the text to be displayed for each instance. - masks (masks-like object): Supported types are: - - * :class:`detectron2.structures.PolygonMasks`, - :class:`detectron2.structures.BitMasks`. - * list[list[ndarray]]: contains the segmentation masks for all objects in one image. - The first level of the list corresponds to individual instances. The second - level to all the polygon that compose the instance, and the third level - to the polygon coordinates. The third level should have the format of - [x0, y0, x1, y1, ..., xn, yn] (n >= 3). - * list[ndarray]: each ndarray is a binary mask of shape (H, W). - * list[dict]: each dict is a COCO-style RLE. - keypoints (Keypoint or array like): an array-like object of shape (N, K, 3), - where the N is the number of instances and K is the number of keypoints. - The last dimension corresponds to (x, y, visibility or score). - assigned_colors (list[matplotlib.colors]): a list of colors, where each color - corresponds to each mask or box in the image. Refer to 'matplotlib.colors' - for full list of formats that the colors are accepted in. - Returns: - output (VisImage): image object with visualizations. - """ - num_instances = 0 - if boxes is not None: - boxes = self._convert_boxes(boxes) - num_instances = len(boxes) - if masks is not None: - masks = self._convert_masks(masks) - if num_instances: - assert len(masks) == num_instances - else: - num_instances = len(masks) - if keypoints is not None: - if num_instances: - assert len(keypoints) == num_instances - else: - num_instances = len(keypoints) - keypoints = self._convert_keypoints(keypoints) - if labels is not None: - assert len(labels) == num_instances - if assigned_colors is None: - assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] - if num_instances == 0: - return self.output - if boxes is not None and boxes.shape[1] == 5: - return self.overlay_rotated_instances( - boxes=boxes, labels=labels, assigned_colors=assigned_colors - ) - - # Display in largest to smallest order to reduce occlusion. - areas = None - if boxes is not None: - areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) - elif masks is not None: - areas = np.asarray([x.area() for x in masks]) - - if areas is not None: - sorted_idxs = np.argsort(-areas).tolist() - # Re-order overlapped instances in descending order. - boxes = boxes[sorted_idxs] if boxes is not None else None - labels = [labels[k] for k in sorted_idxs] if labels is not None else None - masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None - assigned_colors = [assigned_colors[idx] for idx in sorted_idxs] - keypoints = keypoints[sorted_idxs] if keypoints is not None else None - - for i in range(num_instances): - color = assigned_colors[i] - if boxes is not None: - self.draw_box(boxes[i], edge_color=color) - - if masks is not None: - for segment in masks[i].polygons: - self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha) - - if labels is not None: - # first get a box - if boxes is not None: - x0, y0, x1, y1 = boxes[i] - text_pos = (x0, y0) # if drawing boxes, put text on the box corner. - horiz_align = "left" - elif masks is not None: - # skip small mask without polygon - if len(masks[i].polygons) == 0: - continue - - x0, y0, x1, y1 = masks[i].bbox() - - # draw text in the center (defined by median) when box is not drawn - # median is less sensitive to outliers. - text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1] - horiz_align = "center" - else: - continue # drawing the box confidence for keypoints isn't very useful. - # for small objects, draw text at the side to avoid occlusion - instance_area = (y1 - y0) * (x1 - x0) - if ( - instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale - or y1 - y0 < 40 * self.output.scale - ): - if y1 >= self.output.height - 5: - text_pos = (x1, y0) - else: - text_pos = (x0, y1) - - height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width) - lighter_color = self._change_color_brightness(color, brightness_factor=0.7) - font_size = ( - np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) - * 0.5 - * self._default_font_size - ) - self.draw_text( - labels[i], - text_pos, - color=lighter_color, - horizontal_alignment=horiz_align, - font_size=font_size, - ) - - # draw keypoints - if keypoints is not None: - for keypoints_per_instance in keypoints: - self.draw_and_connect_keypoints(keypoints_per_instance) - - return self.output - - def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None): - """ - Args: - boxes (ndarray): an Nx5 numpy array of - (x_center, y_center, width, height, angle_degrees) format - for the N objects in a single image. - labels (list[str]): the text to be displayed for each instance. - assigned_colors (list[matplotlib.colors]): a list of colors, where each color - corresponds to each mask or box in the image. Refer to 'matplotlib.colors' - for full list of formats that the colors are accepted in. - - Returns: - output (VisImage): image object with visualizations. - """ - num_instances = len(boxes) - - if assigned_colors is None: - assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] - if num_instances == 0: - return self.output - - # Display in largest to smallest order to reduce occlusion. - if boxes is not None: - areas = boxes[:, 2] * boxes[:, 3] - - sorted_idxs = np.argsort(-areas).tolist() - # Re-order overlapped instances in descending order. - boxes = boxes[sorted_idxs] - labels = [labels[k] for k in sorted_idxs] if labels is not None else None - colors = [assigned_colors[idx] for idx in sorted_idxs] - - for i in range(num_instances): - self.draw_rotated_box_with_label( - boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None - ) - - return self.output - - def draw_and_connect_keypoints(self, keypoints): - """ - Draws keypoints of an instance and follows the rules for keypoint connections - to draw lines between appropriate keypoints. This follows color heuristics for - line color. - - Args: - keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints - and the last dimension corresponds to (x, y, probability). - - Returns: - output (VisImage): image object with visualizations. - """ - visible = {} - keypoint_names = self.metadata.get("keypoint_names") - for idx, keypoint in enumerate(keypoints): - - # draw keypoint - x, y, prob = keypoint - if prob > self.keypoint_threshold: - self.draw_circle((x, y), color=_RED) - if keypoint_names: - keypoint_name = keypoint_names[idx] - visible[keypoint_name] = (x, y) - - if self.metadata.get("keypoint_connection_rules"): - for kp0, kp1, color in self.metadata.keypoint_connection_rules: - if kp0 in visible and kp1 in visible: - x0, y0 = visible[kp0] - x1, y1 = visible[kp1] - color = tuple(x / 255.0 for x in color) - self.draw_line([x0, x1], [y0, y1], color=color) - - # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip - # Note that this strategy is specific to person keypoints. - # For other keypoints, it should just do nothing - try: - ls_x, ls_y = visible["left_shoulder"] - rs_x, rs_y = visible["right_shoulder"] - mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2 - except KeyError: - pass - else: - # draw line from nose to mid-shoulder - nose_x, nose_y = visible.get("nose", (None, None)) - if nose_x is not None: - self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED) - - try: - # draw line from mid-shoulder to mid-hip - lh_x, lh_y = visible["left_hip"] - rh_x, rh_y = visible["right_hip"] - except KeyError: - pass - else: - mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2 - self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED) - return self.output - - """ - Primitive drawing functions: - """ - - def draw_text( - self, - text, - position, - *, - font_size=None, - color="g", - horizontal_alignment="center", - rotation=0, - ): - """ - Args: - text (str): class label - position (tuple): a tuple of the x and y coordinates to place text on image. - font_size (int, optional): font of the text. If not provided, a font size - proportional to the image width is calculated and used. - color: color of the text. Refer to `matplotlib.colors` for full list - of formats that are accepted. - horizontal_alignment (str): see `matplotlib.text.Text` - rotation: rotation angle in degrees CCW - - Returns: - output (VisImage): image object with text drawn. - """ - if not font_size: - font_size = self._default_font_size - - # since the text background is dark, we don't want the text to be dark - color = np.maximum(list(mplc.to_rgb(color)), 0.2) - color[np.argmax(color)] = max(0.8, np.max(color)) - - x, y = position - self.output.ax.text( - x, - y, - text, - size=font_size * self.output.scale, - family="sans-serif", - bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"}, - verticalalignment="top", - horizontalalignment=horizontal_alignment, - color=color, - zorder=10, - rotation=rotation, - ) - return self.output - - def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"): - """ - Args: - box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0 - are the coordinates of the image's top left corner. x1 and y1 are the - coordinates of the image's bottom right corner. - alpha (float): blending efficient. Smaller values lead to more transparent masks. - edge_color: color of the outline of the box. Refer to `matplotlib.colors` - for full list of formats that are accepted. - line_style (string): the string to use to create the outline of the boxes. - - Returns: - output (VisImage): image object with box drawn. - """ - x0, y0, x1, y1 = box_coord - width = x1 - x0 - height = y1 - y0 - - linewidth = max(self._default_font_size / 4, 1) - - self.output.ax.add_patch( - mpl.patches.Rectangle( - (x0, y0), - width, - height, - fill=False, - edgecolor=edge_color, - linewidth=linewidth * self.output.scale, - alpha=alpha, - linestyle=line_style, - ) - ) - return self.output - - def draw_rotated_box_with_label( - self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None - ): - """ - Draw a rotated box with label on its top-left corner. - - Args: - rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle), - where cnt_x and cnt_y are the center coordinates of the box. - w and h are the width and height of the box. angle represents how - many degrees the box is rotated CCW with regard to the 0-degree box. - alpha (float): blending efficient. Smaller values lead to more transparent masks. - edge_color: color of the outline of the box. Refer to `matplotlib.colors` - for full list of formats that are accepted. - line_style (string): the string to use to create the outline of the boxes. - label (string): label for rotated box. It will not be rendered when set to None. - - Returns: - output (VisImage): image object with box drawn. - """ - cnt_x, cnt_y, w, h, angle = rotated_box - area = w * h - # use thinner lines when the box is small - linewidth = self._default_font_size / ( - 6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3 - ) - - theta = angle * math.pi / 180.0 - c = math.cos(theta) - s = math.sin(theta) - rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)] - # x: left->right ; y: top->down - rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect] - for k in range(4): - j = (k + 1) % 4 - self.draw_line( - [rotated_rect[k][0], rotated_rect[j][0]], - [rotated_rect[k][1], rotated_rect[j][1]], - color=edge_color, - linestyle="--" if k == 1 else line_style, - linewidth=linewidth, - ) - - if label is not None: - text_pos = rotated_rect[1] # topleft corner - - height_ratio = h / np.sqrt(self.output.height * self.output.width) - label_color = self._change_color_brightness(edge_color, brightness_factor=0.7) - font_size = ( - np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size - ) - self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle) - - return self.output - - def draw_circle(self, circle_coord, color, radius=3): - """ - Args: - circle_coord (list(int) or tuple(int)): contains the x and y coordinates - of the center of the circle. - color: color of the polygon. Refer to `matplotlib.colors` for a full list of - formats that are accepted. - radius (int): radius of the circle. - - Returns: - output (VisImage): image object with box drawn. - """ - x, y = circle_coord - self.output.ax.add_patch( - mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color) - ) - return self.output - - def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None): - """ - Args: - x_data (list[int]): a list containing x values of all the points being drawn. - Length of list should match the length of y_data. - y_data (list[int]): a list containing y values of all the points being drawn. - Length of list should match the length of x_data. - color: color of the line. Refer to `matplotlib.colors` for a full list of - formats that are accepted. - linestyle: style of the line. Refer to `matplotlib.lines.Line2D` - for a full list of formats that are accepted. - linewidth (float or None): width of the line. When it's None, - a default value will be computed and used. - - Returns: - output (VisImage): image object with line drawn. - """ - if linewidth is None: - linewidth = self._default_font_size / 3 - linewidth = max(linewidth, 1) - self.output.ax.add_line( - mpl.lines.Line2D( - x_data, - y_data, - linewidth=linewidth * self.output.scale, - color=color, - linestyle=linestyle, - ) - ) - return self.output - - def draw_binary_mask( - self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=10 - ): - """ - Args: - binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and - W is the image width. Each value in the array is either a 0 or 1 value of uint8 - type. - color: color of the mask. Refer to `matplotlib.colors` for a full list of - formats that are accepted. If None, will pick a random color. - edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a - full list of formats that are accepted. - text (str): if None, will be drawn on the object - alpha (float): blending efficient. Smaller values lead to more transparent masks. - area_threshold (float): a connected component smaller than this area will not be shown. - - Returns: - output (VisImage): image object with mask drawn. - """ - if color is None: - color = random_color(rgb=True, maximum=1) - color = mplc.to_rgb(color) - - has_valid_segment = False - binary_mask = binary_mask.astype("uint8") # opencv needs uint8 - mask = GenericMask(binary_mask, self.output.height, self.output.width) - shape2d = (binary_mask.shape[0], binary_mask.shape[1]) - - if not mask.has_holes: - # draw polygons for regular masks - for segment in mask.polygons: - area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1])) - if area < (area_threshold or 0): - continue - has_valid_segment = True - segment = segment.reshape(-1, 2) - self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha) - else: - # TODO: Use Path/PathPatch to draw vector graphics: - # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon - rgba = np.zeros(shape2d + (4,), dtype="float32") - rgba[:, :, :3] = color - rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha - has_valid_segment = True - self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0)) - - if text is not None and has_valid_segment: - lighter_color = self._change_color_brightness(color, brightness_factor=0.7) - self._draw_text_in_mask(binary_mask, text, lighter_color) - return self.output - - def draw_soft_mask(self, soft_mask, color=None, *, text=None, alpha=0.5): - """ - Args: - soft_mask (ndarray): float array of shape (H, W), each value in [0, 1]. - color: color of the mask. Refer to `matplotlib.colors` for a full list of - formats that are accepted. If None, will pick a random color. - text (str): if None, will be drawn on the object - alpha (float): blending efficient. Smaller values lead to more transparent masks. - - Returns: - output (VisImage): image object with mask drawn. - """ - if color is None: - color = random_color(rgb=True, maximum=1) - color = mplc.to_rgb(color) - - shape2d = (soft_mask.shape[0], soft_mask.shape[1]) - rgba = np.zeros(shape2d + (4,), dtype="float32") - rgba[:, :, :3] = color - rgba[:, :, 3] = soft_mask * alpha - self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0)) - - if text is not None: - lighter_color = self._change_color_brightness(color, brightness_factor=0.7) - binary_mask = (soft_mask > 0.5).astype("uint8") - self._draw_text_in_mask(binary_mask, text, lighter_color) - return self.output - - def draw_polygon(self, segment, color, edge_color=None, alpha=0.5): - """ - Args: - segment: numpy array of shape Nx2, containing all the points in the polygon. - color: color of the polygon. Refer to `matplotlib.colors` for a full list of - formats that are accepted. - edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a - full list of formats that are accepted. If not provided, a darker shade - of the polygon color will be used instead. - alpha (float): blending efficient. Smaller values lead to more transparent masks. - - Returns: - output (VisImage): image object with polygon drawn. - """ - if edge_color is None: - # make edge color darker than the polygon color - if alpha > 0.8: - edge_color = self._change_color_brightness(color, brightness_factor=-0.7) - else: - edge_color = color - edge_color = mplc.to_rgb(edge_color) + (1,) - - polygon = mpl.patches.Polygon( - segment, - fill=True, - facecolor=mplc.to_rgb(color) + (alpha,), - edgecolor=edge_color, - linewidth=max(self._default_font_size // 15 * self.output.scale, 1), - ) - self.output.ax.add_patch(polygon) - return self.output - - """ - Internal methods: - """ - - def _jitter(self, color): - """ - Randomly modifies given color to produce a slightly different color than the color given. - - Args: - color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color - picked. The values in the list are in the [0.0, 1.0] range. - - Returns: - jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the - color after being jittered. The values in the list are in the [0.0, 1.0] range. - """ - color = mplc.to_rgb(color) - vec = np.random.rand(3) - # better to do it in another color space - vec = vec / np.linalg.norm(vec) * 0.5 - res = np.clip(vec + color, 0, 1) - return tuple(res) - - def _create_grayscale_image(self, mask=None): - """ - Create a grayscale version of the original image. - The colors in masked area, if given, will be kept. - """ - img_bw = self.img.astype("f4").mean(axis=2) - img_bw = np.stack([img_bw] * 3, axis=2) - if mask is not None: - img_bw[mask] = self.img[mask] - return img_bw - - def _change_color_brightness(self, color, brightness_factor): - """ - Depending on the brightness_factor, gives a lighter or darker color i.e. a color with - less or more saturation than the original color. - - Args: - color: color of the polygon. Refer to `matplotlib.colors` for a full list of - formats that are accepted. - brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of - 0 will correspond to no change, a factor in [-1.0, 0) range will result in - a darker color and a factor in (0, 1.0] range will result in a lighter color. - - Returns: - modified_color (tuple[double]): a tuple containing the RGB values of the - modified color. Each value in the tuple is in the [0.0, 1.0] range. - """ - assert brightness_factor >= -1.0 and brightness_factor <= 1.0 - color = mplc.to_rgb(color) - polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color)) - modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1]) - modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness - modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness - modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2]) - return tuple(np.clip(modified_color, 0.0, 1.0)) - - def _convert_boxes(self, boxes): - """ - Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension. - """ - if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes): - return boxes.tensor.detach().numpy() - else: - return np.asarray(boxes) - - def _convert_masks(self, masks_or_polygons): - """ - Convert different format of masks or polygons to a tuple of masks and polygons. - - Returns: - list[GenericMask]: - """ - - m = masks_or_polygons - if isinstance(m, PolygonMasks): - m = m.polygons - if isinstance(m, BitMasks): - m = m.tensor.numpy() - if isinstance(m, torch.Tensor): - m = m.numpy() - ret = [] - for x in m: - if isinstance(x, GenericMask): - ret.append(x) - else: - ret.append(GenericMask(x, self.output.height, self.output.width)) - return ret - - def _draw_text_in_mask(self, binary_mask, text, color): - """ - Find proper places to draw text given a binary mask. - """ - # TODO sometimes drawn on wrong objects. the heuristics here can improve. - _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8) - if stats[1:, -1].size == 0: - return - largest_component_id = np.argmax(stats[1:, -1]) + 1 - - # draw text on the largest component, as well as other very large components. - for cid in range(1, _num_cc): - if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH: - # median is more stable than centroid - # center = centroids[largest_component_id] - center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1] - self.draw_text(text, center, color=color) - - def _convert_keypoints(self, keypoints): - if isinstance(keypoints, Keypoints): - keypoints = keypoints.tensor - keypoints = np.asarray(keypoints) - return keypoints - - def get_output(self): - """ - Returns: - output (VisImage): the image output containing the visualizations added - to the image. - """ - return self.output diff --git a/detectron2/dev/README.md b/detectron2/dev/README.md deleted file mode 100644 index bec811ad002a016f2137d9d0ea61c27ee5e78992..0000000000000000000000000000000000000000 --- a/detectron2/dev/README.md +++ /dev/null @@ -1,7 +0,0 @@ - -## Some scripts for developers to use, include: - -- `linter.sh`: lint the codebase before commit. -- `run_{inference,instant}_tests.sh`: run inference/training for a few iterations. - Note that these tests require 2 GPUs. -- `parse_results.sh`: parse results from a log file. diff --git a/detectron2/dev/linter.sh b/detectron2/dev/linter.sh deleted file mode 100644 index fb1d514e71b868dab2511a2769fe2d7df3aaf953..0000000000000000000000000000000000000000 --- a/detectron2/dev/linter.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. - -# cd to detectron2 project root -cd "$(dirname "${BASH_SOURCE[0]}")/.." - -{ - black --version | grep -E "24\." > /dev/null -} || { - echo "Linter requires 'black==24.*' !" - exit 1 -} - -ISORT_VERSION=$(isort --version-number) -if [[ "$ISORT_VERSION" != 4.3* ]]; then - echo "Linter requires isort==4.3.21 !" - exit 1 -fi - -set -v - -echo "Running isort ..." -isort -y -sp . --atomic - -echo "Running black ..." -black -l 100 . - -echo "Running flake8 ..." -if [ -x "$(command -v flake8)" ]; then - flake8 . -else - python3 -m flake8 . -fi - -# echo "Running mypy ..." -# Pytorch does not have enough type annotations -# mypy detectron2/solver detectron2/structures detectron2/config - -echo "Running clang-format ..." -find . -regex ".*\.\(cpp\|c\|cc\|cu\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 clang-format -i - -command -v arc > /dev/null && arc lint diff --git a/detectron2/dev/packaging/README.md b/detectron2/dev/packaging/README.md deleted file mode 100644 index 0174b7dd528efcaa0fe27d46f40a3866f03e7c41..0000000000000000000000000000000000000000 --- a/detectron2/dev/packaging/README.md +++ /dev/null @@ -1,17 +0,0 @@ - -## To build a cu101 wheel for release: - -``` -$ nvidia-docker run -it --storage-opt "size=20GB" --name pt pytorch/manylinux-cuda101 -# inside the container: -# git clone https://github.com/facebookresearch/detectron2/ -# cd detectron2 -# export CU_VERSION=cu101 D2_VERSION_SUFFIX= PYTHON_VERSION=3.7 PYTORCH_VERSION=1.8 -# ./dev/packaging/build_wheel.sh -``` - -## To build all wheels for combinations of CUDA and Python -``` -./dev/packaging/build_all_wheels.sh -./dev/packaging/gen_wheel_index.sh /path/to/wheels -``` diff --git a/detectron2/dev/packaging/build_all_wheels.sh b/detectron2/dev/packaging/build_all_wheels.sh deleted file mode 100644 index 00f9de5e27867bf210438190c2951a571ac1f3fc..0000000000000000000000000000000000000000 --- a/detectron2/dev/packaging/build_all_wheels.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. - -[[ -d "dev/packaging" ]] || { - echo "Please run this script at detectron2 root!" - exit 1 -} - -build_one() { - cu=$1 - pytorch_ver=$2 - - case "$cu" in - cu*) - container_name=manylinux-cuda${cu/cu/} - ;; - cpu) - container_name=manylinux-cuda101 - ;; - *) - echo "Unrecognized cu=$cu" - exit 1 - ;; - esac - - echo "Launching container $container_name ..." - container_id="$container_name"_"$cu"_"$pytorch_ver" - - py_versions=(3.7 3.8 3.9) - - for py in "${py_versions[@]}"; do - docker run -itd \ - --name "$container_id" \ - --mount type=bind,source="$(pwd)",target=/detectron2 \ - pytorch/$container_name - - cat </dev/null 2>&1 && pwd )" -. "$script_dir/pkg_helpers.bash" - -echo "Build Settings:" -echo "CU_VERSION: $CU_VERSION" # e.g. cu101 -echo "D2_VERSION_SUFFIX: $D2_VERSION_SUFFIX" # e.g. +cu101 or "" -echo "PYTHON_VERSION: $PYTHON_VERSION" # e.g. 3.7 -echo "PYTORCH_VERSION: $PYTORCH_VERSION" # e.g. 1.4 - -setup_cuda -setup_wheel_python - -yum install ninja-build -y -ln -sv /usr/bin/ninja-build /usr/bin/ninja || true - -pip_install pip numpy -U -pip_install "torch==$PYTORCH_VERSION" \ - -f https://download.pytorch.org/whl/"$CU_VERSION"/torch_stable.html - -# use separate directories to allow parallel build -BASE_BUILD_DIR=build/$CU_VERSION-py$PYTHON_VERSION-pt$PYTORCH_VERSION -python setup.py \ - build -b "$BASE_BUILD_DIR" \ - bdist_wheel -b "$BASE_BUILD_DIR/build_dist" -d "wheels/$CU_VERSION/torch$PYTORCH_VERSION" -rm -rf "$BASE_BUILD_DIR" diff --git a/detectron2/dev/packaging/gen_install_table.py b/detectron2/dev/packaging/gen_install_table.py deleted file mode 100644 index b4c852dc53de613707b9668f748184c2b63b9dea..0000000000000000000000000000000000000000 --- a/detectron2/dev/packaging/gen_install_table.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. -# -*- coding: utf-8 -*- - -import argparse - -template = """
install
\
-python -m pip install detectron2{d2_version} -f \\
-  https://dl.fbaipublicfiles.com/detectron2/wheels/{cuda}/torch{torch}/index.html
-
""" -CUDA_SUFFIX = { - "11.3": "cu113", - "11.1": "cu111", - "11.0": "cu110", - "10.2": "cu102", - "10.1": "cu101", - "10.0": "cu100", - "9.2": "cu92", - "cpu": "cpu", -} - - -def gen_header(torch_versions): - return '' + "".join( - [ - ''.format(t) - for t in torch_versions - ] - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--d2-version", help="detectron2 version number, default to empty") - args = parser.parse_args() - d2_version = f"=={args.d2_version}" if args.d2_version else "" - - all_versions = ( - [("1.8", k) for k in ["11.1", "10.2", "10.1", "cpu"]] - + [("1.9", k) for k in ["11.1", "10.2", "cpu"]] - + [("1.10", k) for k in ["11.3", "11.1", "10.2", "cpu"]] - ) - - torch_versions = sorted( - {k[0] for k in all_versions}, key=lambda x: int(x.split(".")[1]), reverse=True - ) - cuda_versions = sorted( - {k[1] for k in all_versions}, key=lambda x: float(x) if x != "cpu" else 0, reverse=True - ) - - table = gen_header(torch_versions) - for cu in cuda_versions: - table += f""" """ - cu_suffix = CUDA_SUFFIX[cu] - for torch in torch_versions: - if (torch, cu) in all_versions: - cell = template.format(d2_version=d2_version, cuda=cu_suffix, torch=torch) - else: - cell = "" - table += f""" """ - table += "" - table += "
CUDA torch {}
{cu}{cell}
" - print(table) diff --git a/detectron2/dev/packaging/gen_wheel_index.sh b/detectron2/dev/packaging/gen_wheel_index.sh deleted file mode 100644 index ec96a27d809fe87ad963f3ffa7147ca4afbc1711..0000000000000000000000000000000000000000 --- a/detectron2/dev/packaging/gen_wheel_index.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. - - -root=$(readlink -f $1) -if [[ -z "$root" ]]; then - echo "Usage: ./gen_wheel_index.sh /absolute/path/to/wheels" - exit -fi - -export LC_ALL=C # reproducible sort -# NOTE: all sort in this script might not work when xx.10 is released - -index=$root/index.html - -cd "$root" -for cu in cpu cu92 cu100 cu101 cu102 cu110 cu111 cu113; do - mkdir -p "$root/$cu" - cd "$root/$cu" - echo "Creating $PWD/index.html ..." - # First sort by torch version, then stable sort by d2 version with unique. - # As a result, the latest torch version for each d2 version is kept. - for whl in $(find -type f -name '*.whl' -printf '%P\n' \ - | sort -k 1 -r | sort -t '/' -k 2 --stable -r --unique); do - echo "$whl
" - done > index.html - - - for torch in torch*; do - cd "$root/$cu/$torch" - - # list all whl for each cuda,torch version - echo "Creating $PWD/index.html ..." - for whl in $(find . -type f -name '*.whl' -printf '%P\n' | sort -r); do - echo "$whl
" - done > index.html - done -done - -cd "$root" -# Just list everything: -echo "Creating $index ..." -for whl in $(find . -type f -name '*.whl' -printf '%P\n' | sort -r); do - echo "$whl
" -done > "$index" - diff --git a/detectron2/dev/packaging/pkg_helpers.bash b/detectron2/dev/packaging/pkg_helpers.bash deleted file mode 100644 index 550bb6e5756d43da3d30c8cd9b602b3bd30a7e4a..0000000000000000000000000000000000000000 --- a/detectron2/dev/packaging/pkg_helpers.bash +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. - -# Function to retry functions that sometimes timeout or have flaky failures -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} -# Install with pip a bit more robustly than the default -pip_install() { - retry pip install --progress-bar off "$@" -} - - -setup_cuda() { - # Now work out the CUDA settings - # Like other torch domain libraries, we choose common GPU architectures only. - # See https://github.com/pytorch/pytorch/blob/master/torch/utils/cpp_extension.py - # and https://github.com/pytorch/vision/blob/main/packaging/pkg_helpers.bash for reference. - export FORCE_CUDA=1 - case "$CU_VERSION" in - cu113) - export CUDA_HOME=/usr/local/cuda-11.3/ - export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX;8.0;8.6+PTX" - ;; - cu112) - export CUDA_HOME=/usr/local/cuda-11.2/ - export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX;8.0;8.6+PTX" - ;; - cu111) - export CUDA_HOME=/usr/local/cuda-11.1/ - export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX;8.0;8.6+PTX" - ;; - cu110) - export CUDA_HOME=/usr/local/cuda-11.0/ - export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX;8.0+PTX" - ;; - cu102) - export CUDA_HOME=/usr/local/cuda-10.2/ - export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX" - ;; - cu101) - export CUDA_HOME=/usr/local/cuda-10.1/ - export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX" - ;; - cu100) - export CUDA_HOME=/usr/local/cuda-10.0/ - export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0;7.5+PTX" - ;; - cu92) - export CUDA_HOME=/usr/local/cuda-9.2/ - export TORCH_CUDA_ARCH_LIST="3.7;5.0;5.2;6.0;6.1+PTX;7.0+PTX" - ;; - cpu) - unset FORCE_CUDA - export CUDA_VISIBLE_DEVICES= - ;; - *) - echo "Unrecognized CU_VERSION=$CU_VERSION" - exit 1 - ;; - esac -} - -setup_wheel_python() { - case "$PYTHON_VERSION" in - 3.7) python_abi=cp37-cp37m ;; - 3.8) python_abi=cp38-cp38 ;; - 3.9) python_abi=cp39-cp39 ;; - *) - echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION" - exit 1 - ;; - esac - export PATH="/opt/python/$python_abi/bin:$PATH" -} diff --git a/detectron2/dev/parse_results.sh b/detectron2/dev/parse_results.sh deleted file mode 100644 index 80768a4005753447c49339790fe66c9b82a80aaf..0000000000000000000000000000000000000000 --- a/detectron2/dev/parse_results.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Copyright (c) Facebook, Inc. and its affiliates. - -# A shell script that parses metrics from the log file. -# Make it easier for developers to track performance of models. - -LOG="$1" - -if [[ -z "$LOG" ]]; then - echo "Usage: $0 /path/to/log/file" - exit 1 -fi - -# [12/15 11:47:32] trainer INFO: Total training time: 12:15:04.446477 (0.4900 s / it) -# [12/15 11:49:03] inference INFO: Total inference time: 0:01:25.326167 (0.13652186737060548 s / img per device, on 8 devices) -# [12/15 11:49:03] inference INFO: Total inference pure compute time: ..... - -# training time -trainspeed=$(grep -o 'Overall training.*' "$LOG" | grep -Eo '\(.*\)' | grep -o '[0-9\.]*') -echo "Training speed: $trainspeed s/it" - -# inference time: there could be multiple inference during training -inferencespeed=$(grep -o 'Total inference pure.*' "$LOG" | tail -n1 | grep -Eo '\(.*\)' | grep -o '[0-9\.]*' | head -n1) -echo "Inference speed: $inferencespeed s/it" - -# [12/15 11:47:18] trainer INFO: eta: 0:00:00 iter: 90000 loss: 0.5407 (0.7256) loss_classifier: 0.1744 (0.2446) loss_box_reg: 0.0838 (0.1160) loss_mask: 0.2159 (0.2722) loss_objectness: 0.0244 (0.0429) loss_rpn_box_reg: 0.0279 (0.0500) time: 0.4487 (0.4899) data: 0.0076 (0.0975) lr: 0.000200 max mem: 4161 -memory=$(grep -o 'max[_ ]mem: [0-9]*' "$LOG" | tail -n1 | grep -o '[0-9]*') -echo "Training memory: $memory MB" - -echo "Easy to copypaste:" -echo "$trainspeed","$inferencespeed","$memory" - -echo "------------------------------" - -# [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: bbox -# [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl -# [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0017,0.0024,0.0017,0.0005,0.0019,0.0011 -# [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: segm -# [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl -# [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0014,0.0021,0.0016,0.0005,0.0016,0.0011 - -echo "COCO Results:" -num_tasks=$(grep -o 'copypaste:.*Task.*' "$LOG" | sort -u | wc -l) -# each task has 3 lines -grep -o 'copypaste:.*' "$LOG" | cut -d ' ' -f 2- | tail -n $((num_tasks * 3)) diff --git a/detectron2/dev/run_inference_tests.sh b/detectron2/dev/run_inference_tests.sh deleted file mode 100644 index bc9dcc56f06f79fc5efa42c04ffdc07c2787e3ac..0000000000000000000000000000000000000000 --- a/detectron2/dev/run_inference_tests.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. - -BIN="python tools/train_net.py" -OUTPUT="inference_test_output" -NUM_GPUS=2 - -CFG_LIST=( "${@:1}" ) - -if [ ${#CFG_LIST[@]} -eq 0 ]; then - CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml ) -fi - -echo "========================================================================" -echo "Configs to run:" -echo "${CFG_LIST[@]}" -echo "========================================================================" - - -for cfg in "${CFG_LIST[@]}"; do - echo "========================================================================" - echo "Running $cfg ..." - echo "========================================================================" - $BIN \ - --eval-only \ - --num-gpus $NUM_GPUS \ - --config-file "$cfg" \ - OUTPUT_DIR $OUTPUT - rm -rf $OUTPUT -done - - -echo "========================================================================" -echo "Running demo.py ..." -echo "========================================================================" -DEMO_BIN="python demo/demo.py" -COCO_DIR=datasets/coco/val2014 -mkdir -pv $OUTPUT - -set -v - -$DEMO_BIN --config-file ./configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml \ - --input $COCO_DIR/COCO_val2014_0000001933* --output $OUTPUT -rm -rf $OUTPUT diff --git a/detectron2/dev/run_instant_tests.sh b/detectron2/dev/run_instant_tests.sh deleted file mode 100644 index 9fd9ba0c239d3e982c17711c9db872de3730decf..0000000000000000000000000000000000000000 --- a/detectron2/dev/run_instant_tests.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. - -BIN="python tools/train_net.py" -OUTPUT="instant_test_output" -NUM_GPUS=2 - -CFG_LIST=( "${@:1}" ) -if [ ${#CFG_LIST[@]} -eq 0 ]; then - CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml ) -fi - -echo "========================================================================" -echo "Configs to run:" -echo "${CFG_LIST[@]}" -echo "========================================================================" - -for cfg in "${CFG_LIST[@]}"; do - echo "========================================================================" - echo "Running $cfg ..." - echo "========================================================================" - $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \ - SOLVER.IMS_PER_BATCH $(($NUM_GPUS * 2)) \ - OUTPUT_DIR "$OUTPUT" - rm -rf "$OUTPUT" -done - diff --git a/detectron2/docker/Dockerfile b/detectron2/docker/Dockerfile deleted file mode 100644 index fae0060b2b78b26e4cef9631a04e84db4eb2c567..0000000000000000000000000000000000000000 --- a/detectron2/docker/Dockerfile +++ /dev/null @@ -1,47 +0,0 @@ -FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu18.04 -# use an older system (18.04) to avoid opencv incompatibility (issue#3524) - -ENV DEBIAN_FRONTEND noninteractive -RUN apt-get update && apt-get install -y \ - python3-opencv ca-certificates python3-dev git wget sudo ninja-build -RUN ln -sv /usr/bin/python3 /usr/bin/python - -# create a non-root user -ARG USER_ID=1000 -RUN useradd -m --no-log-init --system --uid ${USER_ID} appuser -g sudo -RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers -USER appuser -WORKDIR /home/appuser - -ENV PATH="/home/appuser/.local/bin:${PATH}" -RUN wget https://bootstrap.pypa.io/pip/3.6/get-pip.py && \ - python3 get-pip.py --user && \ - rm get-pip.py - -# install dependencies -# See https://pytorch.org/ for other options if you use a different version of CUDA -RUN pip install --user tensorboard cmake onnx # cmake from apt-get is too old -RUN pip install --user torch==1.10 torchvision==0.11.1 -f https://download.pytorch.org/whl/cu111/torch_stable.html - -RUN pip install --user 'git+https://github.com/facebookresearch/fvcore' -# install detectron2 -RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo -# set FORCE_CUDA because during `docker build` cuda is not accessible -ENV FORCE_CUDA="1" -# This will by default build detectron2 for all common cuda architectures and take a lot more time, -# because inside `docker build`, there is no way to tell which architecture will be used. -ARG TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing" -ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" - -RUN pip install --user -e detectron2_repo - -# Set a fixed model cache directory. -ENV FVCORE_CACHE="/tmp" -WORKDIR /home/appuser/detectron2_repo - -# run detectron2 under user "appuser": -# wget http://images.cocodataset.org/val2017/000000439715.jpg -O input.jpg -# python3 demo/demo.py \ - #--config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \ - #--input input.jpg --output outputs/ \ - #--opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl diff --git a/detectron2/docker/README.md b/detectron2/docker/README.md deleted file mode 100644 index ea709f33b007abd2de044a0338659ec003330725..0000000000000000000000000000000000000000 --- a/detectron2/docker/README.md +++ /dev/null @@ -1,45 +0,0 @@ - -## Use the container (with docker β‰₯ 19.03) - -``` -cd docker/ -# Build: -docker build --build-arg USER_ID=$UID -t detectron2:v0 . -# Launch (require GPUs): -docker run --gpus all -it \ - --shm-size=8gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \ - --name=detectron2 detectron2:v0 - -# Grant docker access to host X server to show images -xhost +local:`docker inspect --format='{{ .Config.Hostname }}' detectron2` -``` - -## Use the container (with docker-compose β‰₯ 1.28.0) - -Install docker-compose and nvidia-docker-toolkit, then run: -``` -cd docker && USER_ID=$UID docker-compose run detectron2 -``` - -## Use the deployment container (to test C++ examples) -After building the base detectron2 container as above, do: -``` -# Build: -docker build -t detectron2-deploy:v0 -f deploy.Dockerfile . -# Launch: -docker run --gpus all -it detectron2-deploy:v0 -``` - -#### Using a persistent cache directory - -You can prevent models from being re-downloaded on every run, -by storing them in a cache directory. - -To do this, add `--volume=$HOME/.torch/fvcore_cache:/tmp:rw` in the run command. - -## Install new dependencies -Add the following to `Dockerfile` to make persistent changes. -``` -RUN sudo apt-get update && sudo apt-get install -y vim -``` -Or run them in the container to make temporary changes. diff --git a/detectron2/docker/deploy.Dockerfile b/detectron2/docker/deploy.Dockerfile deleted file mode 100644 index 30b4ed774368af89d654c9f01850d769e6cf9f52..0000000000000000000000000000000000000000 --- a/detectron2/docker/deploy.Dockerfile +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# This file defines a container that compiles the C++ examples of detectron2. -# See docker/README.md for usage. - -# Depends on the image produced by "./Dockerfile" -FROM detectron2:v0 - -USER appuser -ENV HOME=/home/appuser -WORKDIR $HOME - -# Let torchvision find libtorch -ENV CMAKE_PREFIX_PATH=$HOME/.local/lib/python3.6/site-packages/torch/ - -RUN sudo apt-get update && sudo apt-get install libopencv-dev --yes - -# install libtorchvision -RUN git clone --branch v0.11.1 https://github.com/pytorch/vision/ -RUN mkdir vision/build && cd vision/build && \ - cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/.local -DCMAKE_BUILD_TYPE=Release -DWITH_CUDA=on -DTORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST && \ - make -j && make install - -# make our installation take effect -ENV CPATH=$HOME/.local/include \ - LIBRARY_PATH=$HOME/.local/lib \ - LD_LIBRARY_PATH=$HOME/.local/lib - - -# build C++ examples of detectron2 -RUN cd detectron2_repo/tools/deploy && mkdir build && cd build && \ - cmake -DTORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST .. && make -# binaries will be available under tools/deploy/build diff --git a/detectron2/docker/docker-compose.yml b/detectron2/docker/docker-compose.yml deleted file mode 100644 index 6665ab4c4bd40cae9973417b5b8d4c0c1edd7fc7..0000000000000000000000000000000000000000 --- a/detectron2/docker/docker-compose.yml +++ /dev/null @@ -1,26 +0,0 @@ -version: "2.3" -services: - detectron2: - build: - context: . - dockerfile: Dockerfile - args: - USER_ID: ${USER_ID:-1000} - deploy: - resources: - reservations: - devices: - - capabilities: - - gpu - shm_size: "8gb" - ulimits: - memlock: -1 - stack: 67108864 - volumes: - - /tmp/.X11-unix:/tmp/.X11-unix:ro - environment: - - DISPLAY=$DISPLAY - - NVIDIA_VISIBLE_DEVICES=all - # Uncomment with proper source to access webcam from docker - # devices: - # - /dev/video0:/dev/video0 diff --git a/detectron2/docs/.gitignore b/detectron2/docs/.gitignore deleted file mode 100644 index e35d8850c9688b1ce82711694692cc574a799396..0000000000000000000000000000000000000000 --- a/detectron2/docs/.gitignore +++ /dev/null @@ -1 +0,0 @@ -_build diff --git a/detectron2/docs/Makefile b/detectron2/docs/Makefile deleted file mode 100644 index 718eddce170fe13b67216baf9d4d25b20e860506..0000000000000000000000000000000000000000 --- a/detectron2/docs/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# Minimal makefile for Sphinx documentation -# Copyright (c) Facebook, Inc. and its affiliates. - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/detectron2/docs/README.md b/detectron2/docs/README.md deleted file mode 100644 index 8531cafd4d1aae0267f4fc5e7212f7db5ed90686..0000000000000000000000000000000000000000 --- a/detectron2/docs/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Read the docs: - -The latest documentation built from this directory is available at [detectron2.readthedocs.io](https://detectron2.readthedocs.io/). -Documents in this directory are not meant to be read on github. - -# Build the docs: - -1. Install detectron2 according to [INSTALL.md](../INSTALL.md). -2. Install additional libraries required to build docs: - - docutils==0.16 - - Sphinx==3.2.0 - - recommonmark==0.6.0 - - sphinx_rtd_theme - -3. Run `make html` from this directory. diff --git a/detectron2/docs/_static/css/custom.css b/detectron2/docs/_static/css/custom.css deleted file mode 100644 index 6c511764cf4c1d55a227619a98e5ba6578619ad7..0000000000000000000000000000000000000000 --- a/detectron2/docs/_static/css/custom.css +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * some extra css to make markdown look similar between github/sphinx - */ - -/* - * Below is for install.md: - */ -.rst-content code { - white-space: pre; - border: 0px; -} - -.rst-content th { - border: 1px solid #e1e4e5; -} - -.rst-content th p { - /* otherwise will be default 24px for regular paragraph */ - margin-bottom: 0px; -} - -.rst-content .line-block { - /* otherwise will be 24px */ - margin-bottom: 0px; -} - -div.section > details { - padding-bottom: 1em; -} diff --git a/detectron2/docs/conf.py b/detectron2/docs/conf.py deleted file mode 100644 index 1fb3e30f97dcc02b497e7c6de6bcc9e47ea94885..0000000000000000000000000000000000000000 --- a/detectron2/docs/conf.py +++ /dev/null @@ -1,395 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -# flake8: noqa - -# Configuration file for the Sphinx documentation builder. -# -# This file does only contain a selection of the most common options. For a -# full list see the documentation: -# http://www.sphinx-doc.org/en/master/config - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys -from unittest import mock -from sphinx.domains import Domain -from typing import Dict, List, Tuple - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -import sphinx_rtd_theme - - -class GithubURLDomain(Domain): - """ - Resolve certain links in markdown files to github source. - """ - - name = "githuburl" - ROOT = "https://github.com/facebookresearch/detectron2/blob/main/" - LINKED_DOC = ["tutorials/install", "tutorials/getting_started"] - - def resolve_any_xref(self, env, fromdocname, builder, target, node, contnode): - github_url = None - if not target.endswith("html") and target.startswith("../../"): - url = target.replace("../", "") - github_url = url - if fromdocname in self.LINKED_DOC: - # unresolved links in these docs are all github links - github_url = target - - if github_url is not None: - if github_url.endswith("MODEL_ZOO") or github_url.endswith("README"): - # bug of recommonmark. - # https://github.com/readthedocs/recommonmark/blob/ddd56e7717e9745f11300059e4268e204138a6b1/recommonmark/parser.py#L152-L155 - github_url += ".md" - print("Ref {} resolved to github:{}".format(target, github_url)) - contnode["refuri"] = self.ROOT + github_url - return [("githuburl:any", contnode)] - else: - return [] - - -# to support markdown -from recommonmark.parser import CommonMarkParser - -sys.path.insert(0, os.path.abspath("../")) -os.environ["_DOC_BUILDING"] = "True" -DEPLOY = os.environ.get("READTHEDOCS") == "True" - - -# -- Project information ----------------------------------------------------- - -# fmt: off -try: - import torch # noqa -except ImportError: - for m in [ - "torch", "torchvision", "torch.nn", "torch.nn.parallel", "torch.distributed", "torch.multiprocessing", "torch.autograd", - "torch.autograd.function", "torch.nn.modules", "torch.nn.modules.utils", "torch.utils", "torch.utils.data", "torch.onnx", - "torchvision", "torchvision.ops", - ]: - sys.modules[m] = mock.Mock(name=m) - sys.modules['torch'].__version__ = "1.7" # fake version - HAS_TORCH = False -else: - try: - torch.ops.detectron2 = mock.Mock(name="torch.ops.detectron2") - except: - pass - HAS_TORCH = True - -for m in [ - "cv2", "scipy", "portalocker", "detectron2._C", - "pycocotools", "pycocotools.mask", "pycocotools.coco", "pycocotools.cocoeval", - "google", "google.protobuf", "google.protobuf.internal", "onnx", - "caffe2", "caffe2.proto", "caffe2.python", "caffe2.python.utils", "caffe2.python.onnx", "caffe2.python.onnx.backend", -]: - sys.modules[m] = mock.Mock(name=m) -# fmt: on -sys.modules["cv2"].__version__ = "3.4" - -import detectron2 # isort: skip - -if HAS_TORCH: - from detectron2.utils.env import fixup_module_metadata - - fixup_module_metadata("torch.nn", torch.nn.__dict__) - fixup_module_metadata("torch.utils.data", torch.utils.data.__dict__) - - -project = "detectron2" -copyright = "2019-2020, detectron2 contributors" -author = "detectron2 contributors" - -# The short X.Y version -version = detectron2.__version__ -# The full version, including alpha/beta/rc tags -release = version - - -# -- General configuration --------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -# -needs_sphinx = "3.0" - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "recommonmark", - "sphinx.ext.autodoc", - "sphinx.ext.napoleon", - "sphinx.ext.intersphinx", - "sphinx.ext.todo", - "sphinx.ext.coverage", - "sphinx.ext.mathjax", - "sphinx.ext.viewcode", - "sphinx.ext.githubpages", -] - -# -- Configurations for plugins ------------ -napoleon_google_docstring = True -napoleon_include_init_with_doc = True -napoleon_include_special_with_doc = True -napoleon_numpy_docstring = False -napoleon_use_rtype = False -autodoc_inherit_docstrings = False -autodoc_member_order = "bysource" - -if DEPLOY: - intersphinx_timeout = 10 -else: - # skip this when building locally - intersphinx_timeout = 0.5 -intersphinx_mapping = { - "python": ("https://docs.python.org/3.7", None), - "numpy": ("https://docs.scipy.org/doc/numpy/", None), - "torch": ("https://pytorch.org/docs/master/", None), -} -# ------------------------- - - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -source_suffix = [".rst", ".md"] - -# The master toctree document. -master_doc = "index" - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "build", "README.md", "tutorials/README.md"] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" - - -# -- Options for HTML output ------------------------------------------------- - -html_theme = "sphinx_rtd_theme" -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] -html_css_files = ["css/custom.css"] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# The default sidebars (for documents that don't match any pattern) are -# defined by theme itself. Builtin themes are using these templates by -# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', -# 'searchbox.html']``. -# -# html_sidebars = {} - - -# -- Options for HTMLHelp output --------------------------------------------- - -# Output file base name for HTML help builder. -htmlhelp_basename = "detectron2doc" - - -# -- Options for LaTeX output ------------------------------------------------ - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, "detectron2.tex", "detectron2 Documentation", "detectron2 contributors", "manual") -] - - -# -- Options for manual page output ------------------------------------------ - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [(master_doc, "detectron2", "detectron2 Documentation", [author], 1)] - - -# -- Options for Texinfo output ---------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ( - master_doc, - "detectron2", - "detectron2 Documentation", - author, - "detectron2", - "One line description of project.", - "Miscellaneous", - ) -] - - -# -- Options for todo extension ---------------------------------------------- - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = True - - -def autodoc_skip_member(app, what, name, obj, skip, options): - # we hide something deliberately - if getattr(obj, "__HIDE_SPHINX_DOC__", False): - return True - - # Hide some that are deprecated or not intended to be used - HIDDEN = { - "ResNetBlockBase", - "GroupedBatchSampler", - "build_transform_gen", - "apply_transform_gens", - "TransformGen", - "apply_augmentations", - "StandardAugInput", - "build_batch_data_loader", - "draw_panoptic_seg_predictions", - "WarmupCosineLR", - "WarmupMultiStepLR", - "downgrade_config", - "upgrade_config", - "add_export_config", - } - try: - if name in HIDDEN or ( - hasattr(obj, "__doc__") and obj.__doc__.lower().strip().startswith("deprecated") - ): - print("Skipping deprecated object: {}".format(name)) - return True - except: - pass - return skip - - -_PAPER_DATA = { - "resnet": ("1512.03385", "Deep Residual Learning for Image Recognition"), - "fpn": ("1612.03144", "Feature Pyramid Networks for Object Detection"), - "mask r-cnn": ("1703.06870", "Mask R-CNN"), - "faster r-cnn": ( - "1506.01497", - "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks", - ), - "deformconv": ("1703.06211", "Deformable Convolutional Networks"), - "deformconv2": ("1811.11168", "Deformable ConvNets v2: More Deformable, Better Results"), - "panopticfpn": ("1901.02446", "Panoptic Feature Pyramid Networks"), - "retinanet": ("1708.02002", "Focal Loss for Dense Object Detection"), - "cascade r-cnn": ("1712.00726", "Cascade R-CNN: Delving into High Quality Object Detection"), - "lvis": ("1908.03195", "LVIS: A Dataset for Large Vocabulary Instance Segmentation"), - "rrpn": ("1703.01086", "Arbitrary-Oriented Scene Text Detection via Rotation Proposals"), - "imagenet in 1h": ("1706.02677", "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour"), - "xception": ("1610.02357", "Xception: Deep Learning with Depthwise Separable Convolutions"), - "mobilenet": ( - "1704.04861", - "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications", - ), - "deeplabv3+": ( - "1802.02611", - "Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation", - ), - "dds": ("2003.13678", "Designing Network Design Spaces"), - "scaling": ("2103.06877", "Fast and Accurate Model Scaling"), - "fcos": ("2006.09214", "FCOS: A Simple and Strong Anchor-free Object Detector"), - "rethinking-batchnorm": ("2105.07576", 'Rethinking "Batch" in BatchNorm'), - "vitdet": ("2203.16527", "Exploring Plain Vision Transformer Backbones for Object Detection"), - "mvitv2": ( - "2112.01526", - "MViTv2: Improved Multiscale Vision Transformers for Classification and Detection", - ), - "swin": ( - "2103.14030", - "Swin Transformer: Hierarchical Vision Transformer using Shifted Windows", - ), - "omni3d": ( - "2207.10660", - "Omni3D: A Large Benchmark and Model for 3D Object Detection in the Wild", - ), -} - - -def paper_ref_role( - typ: str, - rawtext: str, - text: str, - lineno: int, - inliner, - options: Dict = {}, - content: List[str] = [], -): - """ - Parse :paper:`xxx`. Similar to the "extlinks" sphinx extension. - """ - from docutils import nodes, utils - from sphinx.util.nodes import split_explicit_title - - text = utils.unescape(text) - has_explicit_title, title, link = split_explicit_title(text) - link = link.lower() - if link not in _PAPER_DATA: - inliner.reporter.warning("Cannot find paper " + link) - paper_url, paper_title = "#", link - else: - paper_url, paper_title = _PAPER_DATA[link] - if "/" not in paper_url: - paper_url = "https://arxiv.org/abs/" + paper_url - if not has_explicit_title: - title = paper_title - pnode = nodes.reference(title, title, internal=False, refuri=paper_url) - return [pnode], [] - - -def setup(app): - from recommonmark.transform import AutoStructify - - app.add_domain(GithubURLDomain) - app.connect("autodoc-skip-member", autodoc_skip_member) - app.add_role("paper", paper_ref_role) - app.add_config_value( - "recommonmark_config", - {"enable_math": True, "enable_inline_math": True, "enable_eval_rst": True}, - True, - ) - app.add_transform(AutoStructify) diff --git a/detectron2/docs/index.rst b/detectron2/docs/index.rst deleted file mode 100644 index 8634b7b12ab906c10a78d6053428029799282ffd..0000000000000000000000000000000000000000 --- a/detectron2/docs/index.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. detectron2 documentation master file, created by - sphinx-quickstart on Sat Sep 21 13:46:45 2019. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to detectron2's documentation! -====================================== - -.. toctree:: - :maxdepth: 2 - - tutorials/index - notes/index - modules/index diff --git a/detectron2/docs/modules/checkpoint.rst b/detectron2/docs/modules/checkpoint.rst deleted file mode 100644 index 449caaffd8a9d5e13040cb64aca073703c579a5d..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/checkpoint.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.checkpoint -============================= - -.. automodule:: detectron2.checkpoint - :members: - :undoc-members: - :show-inheritance: diff --git a/detectron2/docs/modules/config.rst b/detectron2/docs/modules/config.rst deleted file mode 100644 index c76913d83e696dfb02a8c25e8cd38bb25ad121f9..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/config.rst +++ /dev/null @@ -1,18 +0,0 @@ -detectron2.config -========================= - -Related tutorials: :doc:`../tutorials/configs`, :doc:`../tutorials/extend`. - -.. automodule:: detectron2.config - :members: - :undoc-members: - :show-inheritance: - - -Yaml Config References ------------------ - -.. literalinclude:: ../../detectron2/config/defaults.py - :language: python - :linenos: - :lines: 7- diff --git a/detectron2/docs/modules/data.rst b/detectron2/docs/modules/data.rst deleted file mode 100644 index 0d5bd89166fe6ad1d414c85055081f3fa9145764..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/data.rst +++ /dev/null @@ -1,37 +0,0 @@ -detectron2.data -======================= - -.. autodata:: detectron2.data.DatasetCatalog(dict) - :annotation: - -.. autodata:: detectron2.data.MetadataCatalog(dict) - :annotation: - -.. automodule:: detectron2.data - :members: - :undoc-members: - :show-inheritance: - -detectron2.data.detection\_utils module ---------------------------------------- - -.. automodule:: detectron2.data.detection_utils - :members: - :undoc-members: - :show-inheritance: - -detectron2.data.datasets module ---------------------------------------- - -.. automodule:: detectron2.data.datasets - :members: - :undoc-members: - :show-inheritance: - -detectron2.data.samplers module ---------------------------------------- - -.. automodule:: detectron2.data.samplers - :members: - :undoc-members: - :show-inheritance: diff --git a/detectron2/docs/modules/data_transforms.rst b/detectron2/docs/modules/data_transforms.rst deleted file mode 100644 index 1533a434bc1374a9825aa4fed0fab8abb2e8c02f..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/data_transforms.rst +++ /dev/null @@ -1,10 +0,0 @@ -detectron2.data.transforms -==================================== - -Related tutorial: :doc:`../tutorials/augmentation`. - -.. automodule:: detectron2.data.transforms - :members: - :undoc-members: - :show-inheritance: - :imported-members: diff --git a/detectron2/docs/modules/engine.rst b/detectron2/docs/modules/engine.rst deleted file mode 100644 index 7e0d2b0762a601566772b97aaedb3c55b447fab5..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/engine.rst +++ /dev/null @@ -1,26 +0,0 @@ -detectron2.engine -========================= - -Related tutorial: :doc:`../tutorials/training`. - -.. automodule:: detectron2.engine - :members: - :undoc-members: - :show-inheritance: - - -detectron2.engine.defaults module ---------------------------------- - -.. automodule:: detectron2.engine.defaults - :members: - :undoc-members: - :show-inheritance: - -detectron2.engine.hooks module ---------------------------------- - -.. automodule:: detectron2.engine.hooks - :members: - :undoc-members: - :show-inheritance: diff --git a/detectron2/docs/modules/evaluation.rst b/detectron2/docs/modules/evaluation.rst deleted file mode 100644 index 69bfc4b9ef52ed26c61ec3d3feb5aa9bfa28da26..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/evaluation.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.evaluation -============================= - -.. automodule:: detectron2.evaluation - :members: - :undoc-members: - :show-inheritance: diff --git a/detectron2/docs/modules/export.rst b/detectron2/docs/modules/export.rst deleted file mode 100644 index dcee14f869a7c0e60a1e861e07ecf1c49d272dac..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/export.rst +++ /dev/null @@ -1,9 +0,0 @@ -detectron2.export -========================= - -Related tutorial: :doc:`../tutorials/deployment`. - -.. automodule:: detectron2.export - :members: - :undoc-members: - :show-inheritance: diff --git a/detectron2/docs/modules/fvcore.rst b/detectron2/docs/modules/fvcore.rst deleted file mode 100644 index c8bf9f58aea97cfad6430dd3c30924603cecf7ce..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/fvcore.rst +++ /dev/null @@ -1,49 +0,0 @@ -fvcore documentation -==================== - -Detectron2 depends on utilities in -`fvcore `_. -We include part of fvcore documentation here for easier reference. - -fvcore.nn ------------------ - -.. automodule:: fvcore.nn - :members: - :inherited-members: - :undoc-members: - :show-inheritance: - -fvcore.common ---------------------- - -.. automodule:: fvcore.common.checkpoint - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: fvcore.common.config - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: fvcore.common.history_buffer - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: fvcore.common.param_scheduler - :members: - :inherited-members: - :undoc-members: - :show-inheritance: - -.. automodule:: fvcore.common.registry - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: fvcore.common.timer - :members: - :undoc-members: - :show-inheritance: diff --git a/detectron2/docs/modules/index.rst b/detectron2/docs/modules/index.rst deleted file mode 100644 index 14b754395bfbc581a181c7062acc47311103969d..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/index.rst +++ /dev/null @@ -1,19 +0,0 @@ -API Documentation -================== - -.. toctree:: - - checkpoint - config - data - data_transforms - engine - evaluation - layers - model_zoo - modeling - solver - structures - utils - export - fvcore diff --git a/detectron2/docs/modules/layers.rst b/detectron2/docs/modules/layers.rst deleted file mode 100644 index b43b42a7d9d01ec9fa8ef8a56019efa2bc494677..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/layers.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.layers -========================= - -.. automodule:: detectron2.layers - :members: - :undoc-members: - :show-inheritance: diff --git a/detectron2/docs/modules/model_zoo.rst b/detectron2/docs/modules/model_zoo.rst deleted file mode 100644 index 5abbad1ffe191480177e2173308cdc946159cf46..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/model_zoo.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.model_zoo -============================ - -.. automodule:: detectron2.model_zoo - :members: - :undoc-members: - :show-inheritance: diff --git a/detectron2/docs/modules/modeling.rst b/detectron2/docs/modules/modeling.rst deleted file mode 100644 index a22c7ed35f4b694264c49c854109eb2fa85c20ea..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/modeling.rst +++ /dev/null @@ -1,58 +0,0 @@ -detectron2.modeling -=========================== - -.. automodule:: detectron2.modeling - :members: - :undoc-members: - :show-inheritance: - - -detectron2.modeling.poolers module ---------------------------------------- - -.. automodule:: detectron2.modeling.poolers - :members: - :undoc-members: - :show-inheritance: - - -detectron2.modeling.sampling module ------------------------------------- - -.. automodule:: detectron2.modeling.sampling - :members: - :undoc-members: - :show-inheritance: - - -detectron2.modeling.box_regression module ------------------------------------------- - -.. automodule:: detectron2.modeling.box_regression - :members: - :undoc-members: - :show-inheritance: - - -Model Registries ------------------ - -These are different registries provided in modeling. -Each registry provide you the ability to replace it with your customized component, -without having to modify detectron2's code. - -Note that it is impossible to allow users to customize any line of code directly. -Even just to add one line at some place, -you'll likely need to find out the smallest registry which contains that line, -and register your component to that registry. - - -.. autodata:: detectron2.modeling.META_ARCH_REGISTRY -.. autodata:: detectron2.modeling.BACKBONE_REGISTRY -.. autodata:: detectron2.modeling.PROPOSAL_GENERATOR_REGISTRY -.. autodata:: detectron2.modeling.RPN_HEAD_REGISTRY -.. autodata:: detectron2.modeling.ANCHOR_GENERATOR_REGISTRY -.. autodata:: detectron2.modeling.ROI_HEADS_REGISTRY -.. autodata:: detectron2.modeling.ROI_BOX_HEAD_REGISTRY -.. autodata:: detectron2.modeling.ROI_MASK_HEAD_REGISTRY -.. autodata:: detectron2.modeling.ROI_KEYPOINT_HEAD_REGISTRY diff --git a/detectron2/docs/modules/solver.rst b/detectron2/docs/modules/solver.rst deleted file mode 100644 index 59d98c72cceca33831681b5392d8bbec53fe70ad..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/solver.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.solver -========================= - -.. automodule:: detectron2.solver - :members: - :undoc-members: - :show-inheritance: diff --git a/detectron2/docs/modules/structures.rst b/detectron2/docs/modules/structures.rst deleted file mode 100644 index 1369dc0882d387930cd4f571f80c3c3157af6de6..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/structures.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.structures -============================= - -.. automodule:: detectron2.structures - :members: - :undoc-members: - :show-inheritance: diff --git a/detectron2/docs/modules/utils.rst b/detectron2/docs/modules/utils.rst deleted file mode 100644 index ab58f2caf26b3beb08f72dd93d06485af5ace5c0..0000000000000000000000000000000000000000 --- a/detectron2/docs/modules/utils.rst +++ /dev/null @@ -1,80 +0,0 @@ -detectron2.utils -======================== - -detectron2.utils.colormap module --------------------------------- - -.. automodule:: detectron2.utils.colormap - :members: - :undoc-members: - :show-inheritance: - -detectron2.utils.comm module ----------------------------- - -.. automodule:: detectron2.utils.comm - :members: - :undoc-members: - :show-inheritance: - - -detectron2.utils.events module ------------------------------- - -.. automodule:: detectron2.utils.events - :members: - :undoc-members: - :show-inheritance: - - -detectron2.utils.logger module ------------------------------- - -.. automodule:: detectron2.utils.logger - :members: - :undoc-members: - :show-inheritance: - - -detectron2.utils.registry module --------------------------------- - -.. automodule:: detectron2.utils.registry - :members: - :undoc-members: - :show-inheritance: - -detectron2.utils.memory module ----------------------------------- - -.. automodule:: detectron2.utils.memory - :members: - :undoc-members: - :show-inheritance: - - -detectron2.utils.analysis module ----------------------------------- - -.. automodule:: detectron2.utils.analysis - :members: - :undoc-members: - :show-inheritance: - - -detectron2.utils.visualizer module ----------------------------------- - -.. automodule:: detectron2.utils.visualizer - :members: - :undoc-members: - :show-inheritance: - -detectron2.utils.video\_visualizer module ------------------------------------------ - -.. automodule:: detectron2.utils.video_visualizer - :members: - :undoc-members: - :show-inheritance: - diff --git a/detectron2/docs/notes/benchmarks.md b/detectron2/docs/notes/benchmarks.md deleted file mode 100644 index b41588daf3a039b9034e80366c2710e90ba3e056..0000000000000000000000000000000000000000 --- a/detectron2/docs/notes/benchmarks.md +++ /dev/null @@ -1,196 +0,0 @@ - -# Benchmarks - -Here we benchmark the training speed of a Mask R-CNN in detectron2, -with some other popular open source Mask R-CNN implementations. - - -### Settings - -* Hardware: 8 NVIDIA V100s with NVLink. -* Software: Python 3.7, CUDA 10.1, cuDNN 7.6.5, PyTorch 1.5, - TensorFlow 1.15.0rc2, Keras 2.2.5, MxNet 1.6.0b20190820. -* Model: an end-to-end R-50-FPN Mask-RCNN model, using the same hyperparameter as the - [Detectron baseline config](https://github.com/facebookresearch/Detectron/blob/master/configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml) - (it does not have scale augmentation). -* Metrics: We use the average throughput in iterations 100-500 to skip GPU warmup time. - Note that for R-CNN-style models, the throughput of a model typically changes during training, because - it depends on the predictions of the model. Therefore this metric is not directly comparable with - "train speed" in model zoo, which is the average speed of the entire training run. - - -### Main Results - -```eval_rst -+-------------------------------+--------------------+ -| Implementation | Throughput (img/s) | -+===============================+====================+ -| |D2| |PT| | 62 | -+-------------------------------+--------------------+ -| mmdetection_ |PT| | 53 | -+-------------------------------+--------------------+ -| maskrcnn-benchmark_ |PT| | 53 | -+-------------------------------+--------------------+ -| tensorpack_ |TF| | 50 | -+-------------------------------+--------------------+ -| simpledet_ |mxnet| | 39 | -+-------------------------------+--------------------+ -| Detectron_ |C2| | 19 | -+-------------------------------+--------------------+ -| `matterport/Mask_RCNN`__ |TF| | 14 | -+-------------------------------+--------------------+ - -.. _maskrcnn-benchmark: https://github.com/facebookresearch/maskrcnn-benchmark/ -.. _tensorpack: https://github.com/tensorpack/tensorpack/tree/master/examples/FasterRCNN -.. _mmdetection: https://github.com/open-mmlab/mmdetection/ -.. _simpledet: https://github.com/TuSimple/simpledet/ -.. _Detectron: https://github.com/facebookresearch/Detectron -__ https://github.com/matterport/Mask_RCNN/ - -.. |D2| image:: https://github.com/facebookresearch/detectron2/raw/main/.github/Detectron2-Logo-Horz.svg?sanitize=true - :height: 15pt - :target: https://github.com/facebookresearch/detectron2/ -.. |PT| image:: https://pytorch.org/assets/images/logo-icon.svg - :width: 15pt - :height: 15pt - :target: https://pytorch.org -.. |TF| image:: https://static.nvidiagrid.net/ngc/containers/tensorflow.png - :width: 15pt - :height: 15pt - :target: https://tensorflow.org -.. |mxnet| image:: https://github.com/dmlc/web-data/raw/master/mxnet/image/mxnet_favicon.png - :width: 15pt - :height: 15pt - :target: https://mxnet.apache.org/ -.. |C2| image:: https://caffe2.ai/static/logo.svg - :width: 15pt - :height: 15pt - :target: https://caffe2.ai -``` - - -Details for each implementation: - -* __Detectron2__: with release v0.1.2, run: - ``` - python tools/train_net.py --config-file configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml --num-gpus 8 - ``` - -* __mmdetection__: at commit `b0d845f`, run - ``` - ./tools/dist_train.sh configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py 8 - ``` - -* __maskrcnn-benchmark__: use commit `0ce8f6f` with `sed -i 's/torch.uint8/torch.bool/g' **/*.py; sed -i 's/AT_CHECK/TORCH_CHECK/g' **/*.cu` - to make it compatible with PyTorch 1.5. Then, run training with - ``` - python -m torch.distributed.launch --nproc_per_node=8 tools/train_net.py --config-file configs/e2e_mask_rcnn_R_50_FPN_1x.yaml - ``` - The speed we observed is faster than its model zoo, likely due to different software versions. - -* __tensorpack__: at commit `caafda`, `export TF_CUDNN_USE_AUTOTUNE=0`, then run - ``` - mpirun -np 8 ./train.py --config DATA.BASEDIR=/data/coco TRAINER=horovod BACKBONE.STRIDE_1X1=True TRAIN.STEPS_PER_EPOCH=50 --load ImageNet-R50-AlignPadding.npz - ``` - -* __SimpleDet__: at commit `9187a1`, run - ``` - python detection_train.py --config config/mask_r50v1_fpn_1x.py - ``` - -* __Detectron__: run - ``` - python tools/train_net.py --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml - ``` - Note that many of its ops run on CPUs, therefore the performance is limited. - -* __matterport/Mask_RCNN__: at commit `3deaec`, apply the following diff, `export TF_CUDNN_USE_AUTOTUNE=0`, then run - ``` - python coco.py train --dataset=/data/coco/ --model=imagenet - ``` - Note that many small details in this implementation might be different - from Detectron's standards. - -
- - (diff to make it use the same hyperparameters - click to expand) - - - ```diff - diff --git i/mrcnn/model.py w/mrcnn/model.py - index 62cb2b0..61d7779 100644 - --- i/mrcnn/model.py - +++ w/mrcnn/model.py - @@ -2367,8 +2367,8 @@ class MaskRCNN(): - epochs=epochs, - steps_per_epoch=self.config.STEPS_PER_EPOCH, - callbacks=callbacks, - - validation_data=val_generator, - - validation_steps=self.config.VALIDATION_STEPS, - + #validation_data=val_generator, - + #validation_steps=self.config.VALIDATION_STEPS, - max_queue_size=100, - workers=workers, - use_multiprocessing=True, - diff --git i/mrcnn/parallel_model.py w/mrcnn/parallel_model.py - index d2bf53b..060172a 100644 - --- i/mrcnn/parallel_model.py - +++ w/mrcnn/parallel_model.py - @@ -32,6 +32,7 @@ class ParallelModel(KM.Model): - keras_model: The Keras model to parallelize - gpu_count: Number of GPUs. Must be > 1 - """ - + super().__init__() - self.inner_model = keras_model - self.gpu_count = gpu_count - merged_outputs = self.make_parallel() - diff --git i/samples/coco/coco.py w/samples/coco/coco.py - index 5d172b5..239ed75 100644 - --- i/samples/coco/coco.py - +++ w/samples/coco/coco.py - @@ -81,7 +81,10 @@ class CocoConfig(Config): - IMAGES_PER_GPU = 2 - - # Uncomment to train on 8 GPUs (default is 1) - - # GPU_COUNT = 8 - + GPU_COUNT = 8 - + BACKBONE = "resnet50" - + STEPS_PER_EPOCH = 50 - + TRAIN_ROIS_PER_IMAGE = 512 - - # Number of classes (including background) - NUM_CLASSES = 1 + 80 # COCO has 80 classes - @@ -496,29 +499,10 @@ if __name__ == '__main__': - # *** This training schedule is an example. Update to your needs *** - - # Training - Stage 1 - - print("Training network heads") - model.train(dataset_train, dataset_val, - learning_rate=config.LEARNING_RATE, - epochs=40, - - layers='heads', - - augmentation=augmentation) - - - - # Training - Stage 2 - - # Finetune layers from ResNet stage 4 and up - - print("Fine tune Resnet stage 4 and up") - - model.train(dataset_train, dataset_val, - - learning_rate=config.LEARNING_RATE, - - epochs=120, - - layers='4+', - - augmentation=augmentation) - - - - # Training - Stage 3 - - # Fine tune all layers - - print("Fine tune all layers") - - model.train(dataset_train, dataset_val, - - learning_rate=config.LEARNING_RATE / 10, - - epochs=160, - - layers='all', - + layers='3+', - augmentation=augmentation) - - elif args.command == "evaluate": - ``` - -
diff --git a/detectron2/docs/notes/changelog.md b/detectron2/docs/notes/changelog.md deleted file mode 100644 index 000e9f8898dba53f54121a5325ba5165e45ddea2..0000000000000000000000000000000000000000 --- a/detectron2/docs/notes/changelog.md +++ /dev/null @@ -1,48 +0,0 @@ -# Change Log and Backward Compatibility - -### Releases -See release logs at -[https://github.com/facebookresearch/detectron2/releases](https://github.com/facebookresearch/detectron2/releases) -for new updates. - -### Backward Compatibility - -Due to the research nature of what the library does, there might be backward incompatible changes. -But we try to reduce users' disruption by the following ways: -* APIs listed in [API documentation](https://detectron2.readthedocs.io/modules/index.html), including - function/class names, their arguments, and documented class attributes, are considered *stable* unless - otherwise noted in the documentation. - They are less likely to be broken, but if needed, will trigger a deprecation warning for a reasonable period - before getting broken, and will be documented in release logs. -* Others functions/classses/attributes are considered internal, and are more likely to change. - However, we're aware that some of them may be already used by other projects, and in particular we may - use them for convenience among projects under `detectron2/projects`. - For such APIs, we may treat them as stable APIs and also apply the above strategies. - They may be promoted to stable when we're ready. -* Projects under "detectron2/projects" or imported with "detectron2.projects" are research projects - and are all considered experimental. -* Classes/functions that contain the word "default" or are explicitly documented to produce - "default behavior" may change their behaviors when new features are added. - -Despite of the possible breakage, if a third-party project would like to keep up with the latest updates -in detectron2, using it as a library will still be less disruptive than forking, because -the frequency and scope of API changes will be much smaller than code changes. - -To see such changes, search for "incompatible changes" in [release logs](https://github.com/facebookresearch/detectron2/releases). - -### Config Version Change Log - -Detectron2's config version has not been changed since open source. -There is no need for an open source user to worry about this. - -* v1: Rename `RPN_HEAD.NAME` to `RPN.HEAD_NAME`. -* v2: A batch of rename of many configurations before release. - -### Silent Regressions in Historical Versions: - -We list a few silent regressions, since they may silently produce incorrect results and will be hard to debug. - -* 04/01/2020 - 05/11/2020: Bad accuracy if `TRAIN_ON_PRED_BOXES` is set to True. -* 03/30/2020 - 04/01/2020: ResNets are not correctly built. -* 12/19/2019 - 12/26/2019: Using aspect ratio grouping causes a drop in accuracy. -* - 11/9/2019: Test time augmentation does not predict the last category. diff --git a/detectron2/docs/notes/compatibility.md b/detectron2/docs/notes/compatibility.md deleted file mode 100644 index 83d93f51c056c598c1209f9a21a4e04407b827f0..0000000000000000000000000000000000000000 --- a/detectron2/docs/notes/compatibility.md +++ /dev/null @@ -1,84 +0,0 @@ -# Compatibility with Other Libraries - -## Compatibility with Detectron (and maskrcnn-benchmark) - -Detectron2 addresses some legacy issues left in Detectron. As a result, their models -are not compatible: -running inference with the same model weights will produce different results in the two code bases. - -The major differences regarding inference are: - -- The height and width of a box with corners (x1, y1) and (x2, y2) is now computed more naturally as - width = x2 - x1 and height = y2 - y1; - In Detectron, a "+ 1" was added both height and width. - - Note that the relevant ops in Caffe2 have [adopted this change of convention](https://github.com/pytorch/pytorch/pull/20550) - with an extra option. - So it is still possible to run inference with a Detectron2-trained model in Caffe2. - - The change in height/width calculations most notably changes: - - encoding/decoding in bounding box regression. - - non-maximum suppression. The effect here is very negligible, though. - -- RPN now uses simpler anchors with fewer quantization artifacts. - - In Detectron, the anchors were quantized and - [do not have accurate areas](https://github.com/facebookresearch/Detectron/issues/227). - In Detectron2, the anchors are center-aligned to feature grid points and not quantized. - -- Classification layers have a different ordering of class labels. - - This involves any trainable parameter with shape (..., num_categories + 1, ...). - In Detectron2, integer labels [0, K-1] correspond to the K = num_categories object categories - and the label "K" corresponds to the special "background" category. - In Detectron, label "0" means background, and labels [1, K] correspond to the K categories. - -- ROIAlign is implemented differently. The new implementation is [available in Caffe2](https://github.com/pytorch/pytorch/pull/23706). - - 1. All the ROIs are shifted by half a pixel compared to Detectron in order to create better image-feature-map alignment. - See `layers/roi_align.py` for details. - To enable the old behavior, use `ROIAlign(aligned=False)`, or `POOLER_TYPE=ROIAlign` instead of - `ROIAlignV2` (the default). - - 1. The ROIs are not required to have a minimum size of 1. - This will lead to tiny differences in the output, but should be negligible. - -- Mask inference function is different. - - In Detectron2, the "paste_mask" function is different and should be more accurate than in Detectron. This change - can improve mask AP on COCO by ~0.5% absolute. - -There are some other differences in training as well, but they won't affect -model-level compatibility. The major ones are: - -- We fixed a [bug](https://github.com/facebookresearch/Detectron/issues/459) in - Detectron, by making `RPN.POST_NMS_TOPK_TRAIN` per-image, rather than per-batch. - The fix may lead to a small accuracy drop for a few models (e.g. keypoint - detection) and will require some parameter tuning to match the Detectron results. -- For simplicity, we change the default loss in bounding box regression to L1 loss, instead of smooth L1 loss. - We have observed that this tends to slightly decrease box AP50 while improving box AP for higher - overlap thresholds (and leading to a slight overall improvement in box AP). -- We interpret the coordinates in COCO bounding box and segmentation annotations - as coordinates in range `[0, width]` or `[0, height]`. The coordinates in - COCO keypoint annotations are interpreted as pixel indices in range `[0, width - 1]` or `[0, height - 1]`. - Note that this affects how flip augmentation is implemented. - - -[This article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) -explains more details on the above mentioned issues -about pixels, coordinates, and "+1"s. - - -## Compatibility with Caffe2 - -As mentioned above, despite the incompatibilities with Detectron, the relevant -ops have been implemented in Caffe2. -Therefore, models trained with detectron2 can be converted in Caffe2. -See [Deployment](../tutorials/deployment.md) for the tutorial. - -## Compatibility with TensorFlow - -Most ops are available in TensorFlow, although some tiny differences in -the implementation of resize / ROIAlign / padding need to be addressed. -A working conversion script is provided by [tensorpack Faster R-CNN](https://github.com/tensorpack/tensorpack/tree/master/examples/FasterRCNN/convert_d2) -to run a standard detectron2 model in TensorFlow. diff --git a/detectron2/docs/notes/contributing.md b/detectron2/docs/notes/contributing.md deleted file mode 100644 index 95181235eaff1cb5cbb2dc554e8d4991b603d0e5..0000000000000000000000000000000000000000 --- a/detectron2/docs/notes/contributing.md +++ /dev/null @@ -1 +0,0 @@ -../../.github/CONTRIBUTING.md \ No newline at end of file diff --git a/detectron2/docs/notes/index.rst b/detectron2/docs/notes/index.rst deleted file mode 100644 index 63cf907be7bb15f5316af6d44a46df601755a86b..0000000000000000000000000000000000000000 --- a/detectron2/docs/notes/index.rst +++ /dev/null @@ -1,10 +0,0 @@ -Notes -====================================== - -.. toctree:: - :maxdepth: 2 - - benchmarks - compatibility - contributing - changelog diff --git a/detectron2/docs/requirements.txt b/detectron2/docs/requirements.txt deleted file mode 100644 index 720a1b1193de23c3354c55cf7ec05cdc5974416a..0000000000000000000000000000000000000000 --- a/detectron2/docs/requirements.txt +++ /dev/null @@ -1,23 +0,0 @@ -docutils==0.16 -# https://github.com/sphinx-doc/sphinx/commit/7acd3ada3f38076af7b2b5c9f3b60bb9c2587a3d -sphinx==3.2.0 -recommonmark==0.6.0 -sphinx_rtd_theme -# Dependencies here are only those required by import -termcolor -numpy -tqdm -matplotlib -termcolor -yacs -tabulate -cloudpickle -Pillow -future -git+https://github.com/facebookresearch/fvcore.git -https://download.pytorch.org/whl/cpu/torch-1.8.1%2Bcpu-cp37-cp37m-linux_x86_64.whl -https://download.pytorch.org/whl/cpu/torchvision-0.9.1%2Bcpu-cp37-cp37m-linux_x86_64.whl -omegaconf>=2.1.0.dev24 -hydra-core>=1.1.0.dev5 -scipy -timm diff --git a/detectron2/docs/tutorials/README.md b/detectron2/docs/tutorials/README.md deleted file mode 100644 index 1ca9c94d042ef838143a45490fe6b4556c19f3c9..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# Read the docs: - -The latest documentation built from this directory is available at [detectron2.readthedocs.io](https://detectron2.readthedocs.io/). -Documents in this directory are not meant to be read on github. diff --git a/detectron2/docs/tutorials/augmentation.md b/detectron2/docs/tutorials/augmentation.md deleted file mode 100644 index 7601a082ceadf645e32468c2045dfe50c1216efc..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/augmentation.md +++ /dev/null @@ -1,186 +0,0 @@ - -# Data Augmentation - -Augmentation is an important part of training. -Detectron2's data augmentation system aims at addressing the following goals: - -1. Allow augmenting multiple data types together - (e.g., images together with their bounding boxes and masks) -2. Allow applying a sequence of statically-declared augmentation -3. Allow adding custom new data types to augment (rotated bounding boxes, video clips, etc.) -4. Process and manipulate the __operations__ that are applied by augmentations - -The first two features cover most of the common use cases, and is also -available in other libraries such as [albumentations](https://medium.com/pytorch/multi-target-in-albumentations-16a777e9006e). -Supporting other features adds some overhead to detectron2's augmentation API, -which we'll explain in this tutorial. - -This tutorial focuses on how to use augmentations when writing new data loaders, -and how to write new augmentations. -If you use the default data loader in detectron2, it already supports taking a user-provided list of custom augmentations, -as explained in the [Dataloader tutorial](data_loading). - -## Basic Usage - -The basic usage of feature (1) and (2) is like the following: -```python -from detectron2.data import transforms as T -# Define a sequence of augmentations: -augs = T.AugmentationList([ - T.RandomBrightness(0.9, 1.1), - T.RandomFlip(prob=0.5), - T.RandomCrop("absolute", (640, 640)) -]) # type: T.Augmentation - -# Define the augmentation input ("image" required, others optional): -input = T.AugInput(image, boxes=boxes, sem_seg=sem_seg) -# Apply the augmentation: -transform = augs(input) # type: T.Transform -image_transformed = input.image # new image -sem_seg_transformed = input.sem_seg # new semantic segmentation - -# For any extra data that needs to be augmented together, use transform, e.g.: -image2_transformed = transform.apply_image(image2) -polygons_transformed = transform.apply_polygons(polygons) -``` - -Three basic concepts are involved here. They are: -* [T.Augmentation](../modules/data_transforms.html#detectron2.data.transforms.Augmentation) defines the __"policy"__ to modify inputs. - * its `__call__(AugInput) -> Transform` method augments the inputs in-place, and returns the operation that is applied -* [T.Transform](../modules/data_transforms.html#detectron2.data.transforms.Transform) - implements the actual __operations__ to transform data - * it has methods such as `apply_image`, `apply_coords` that define how to transform each data type -* [T.AugInput](../modules/data_transforms.html#detectron2.data.transforms.AugInput) - stores inputs needed by `T.Augmentation` and how they should be transformed. - This concept is needed for some advanced usage. - Using this class directly should be sufficient for all common use cases, - since extra data not in `T.AugInput` can be augmented using the returned - `transform`, as shown in the above example. - -## Write New Augmentations - -Most 2D augmentations only need to know about the input image. Such augmentation can be implemented easily like this: - -```python -class MyColorAugmentation(T.Augmentation): - def get_transform(self, image): - r = np.random.rand(2) - return T.ColorTransform(lambda x: x * r[0] + r[1] * 10) - -class MyCustomResize(T.Augmentation): - def get_transform(self, image): - old_h, old_w = image.shape[:2] - new_h, new_w = int(old_h * np.random.rand()), int(old_w * 1.5) - return T.ResizeTransform(old_h, old_w, new_h, new_w) - -augs = MyCustomResize() -transform = augs(input) -``` - -In addition to image, any attributes of the given `AugInput` can be used as long -as they are part of the function signature, e.g.: - -```python -class MyCustomCrop(T.Augmentation): - def get_transform(self, image, sem_seg): - # decide where to crop using both image and sem_seg - return T.CropTransform(...) - -augs = MyCustomCrop() -assert hasattr(input, "image") and hasattr(input, "sem_seg") -transform = augs(input) -``` - -New transform operation can also be added by subclassing -[T.Transform](../modules/data_transforms.html#detectron2.data.transforms.Transform). - -## Advanced Usage - -We give a few examples of advanced usages that -are enabled by our system. -These options can be interesting to new research, -although changing them is often not needed -for standard use cases. - -### Custom transform strategy - -Instead of only returning the augmented data, detectron2's `Augmentation` returns the __operations__ as `T.Transform`. -This allows users to apply custom transform strategy on their data. -We use keypoints data as an example. - -Keypoints are (x, y) coordinates, but they are not so trivial to augment due to the semantic meaning they carry. -Such meaning is only known to the users, therefore users may want to augment them manually -by looking at the returned `transform`. -For example, when an image is horizontally flipped, we'd like to swap the keypoint annotations for "left eye" and "right eye". -This can be done like this (included by default in detectron2's default data loader): -```python -# augs, input are defined as in previous examples -transform = augs(input) # type: T.Transform -keypoints_xy = transform.apply_coords(keypoints_xy) # transform the coordinates - -# get a list of all transforms that were applied -transforms = T.TransformList([transform]).transforms -# check if it is flipped for odd number of times -do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms) % 2 == 1 -if do_hflip: - keypoints_xy = keypoints_xy[flip_indices_mapping] -``` - -As another example, keypoints annotations often have a "visibility" field. -A sequence of augmentations might augment a visible keypoint out of the image boundary (e.g. with cropping), -but then bring it back within the boundary afterwards (e.g. with image padding). -If users decide to label such keypoints "invisible", -then the visibility check has to happen after every transform step. -This can be achieved by: - -```python -transform = augs(input) # type: T.TransformList -assert isinstance(transform, T.TransformList) -for t in transform.transforms: - keypoints_xy = t.apply_coords(keypoints_xy) - visibility &= (keypoints_xy >= [0, 0] & keypoints_xy <= [W, H]).all(axis=1) - -# btw, detectron2's `transform_keypoint_annotations` function chooses to label such keypoints "visible": -# keypoints_xy = transform.apply_coords(keypoints_xy) -# visibility &= (keypoints_xy >= [0, 0] & keypoints_xy <= [W, H]).all(axis=1) -``` - - -### Geometrically invert the transform -If images are pre-processed by augmentations before inference, the predicted results -such as segmentation masks are localized on the augmented image. -We'd like to invert the applied augmentation with the [inverse()](../modules/data_transforms.html#detectron2.data.transforms.Transform.inverse) -API, to obtain results on the original image: -```python -transform = augs(input) -pred_mask = make_prediction(input.image) -inv_transform = transform.inverse() -pred_mask_orig = inv_transform.apply_segmentation(pred_mask) -``` - -### Add new data types - -[T.Transform](../modules/data_transforms.html#detectron2.data.transforms.Transform) -supports a few common data types to transform, including images, coordinates, masks, boxes, polygons. -It allows registering new data types, e.g.: -```python -@T.HFlipTransform.register_type("rotated_boxes") -def func(flip_transform: T.HFlipTransform, rotated_boxes: Any): - # do the work - return flipped_rotated_boxes - -t = HFlipTransform(width=800) -transformed_rotated_boxes = t.apply_rotated_boxes(rotated_boxes) # func will be called -``` - -### Extend T.AugInput - -An augmentation can only access attributes available in the given input. -[T.AugInput](../modules/data_transforms.html#detectron2.data.transforms.StandardAugInput) defines "image", "boxes", "sem_seg", -which are sufficient for common augmentation strategies to decide how to augment. -If not, a custom implementation is needed. - -By re-implement the "transform()" method in AugInput, it is also possible to -augment different fields in ways that are dependent on each other. -Such use case is uncommon (e.g. post-process bounding box based on augmented masks), but allowed by the system. - diff --git a/detectron2/docs/tutorials/builtin_datasets.md b/detectron2/docs/tutorials/builtin_datasets.md deleted file mode 100644 index 0ba82423ad498bdd86274ada56a201134a590d94..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/builtin_datasets.md +++ /dev/null @@ -1 +0,0 @@ -../../datasets/README.md \ No newline at end of file diff --git a/detectron2/docs/tutorials/configs.md b/detectron2/docs/tutorials/configs.md deleted file mode 100644 index 49538d0532994664584460560f4f809ff3a6e6df..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/configs.md +++ /dev/null @@ -1,62 +0,0 @@ -# Yacs Configs - -Detectron2 provides a key-value based config system that can be -used to obtain standard, common behaviors. - -This system uses YAML and [yacs](https://github.com/rbgirshick/yacs). -Yaml is a very limited language, -so we do not expect all features in detectron2 to be available through configs. -If you need something that's not available in the config space, -please write code using detectron2's API. - -With the introduction of a more powerful [LazyConfig system](lazyconfigs.md), -we no longer add functionality / new keys to the Yacs/Yaml-based config system. - -### Basic Usage - -Some basic usage of the `CfgNode` object is shown here. See more in [documentation](../modules/config.html#detectron2.config.CfgNode). -```python -from detectron2.config import get_cfg -cfg = get_cfg() # obtain detectron2's default config -cfg.xxx = yyy # add new configs for your own custom components -cfg.merge_from_file("my_cfg.yaml") # load values from a file - -cfg.merge_from_list(["MODEL.WEIGHTS", "weights.pth"]) # can also load values from a list of str -print(cfg.dump()) # print formatted configs -with open("output.yaml", "w") as f: - f.write(cfg.dump()) # save config to file -``` - -In addition to the basic Yaml syntax, the config file can -define a `_BASE_: base.yaml` field, which will load a base config file first. -Values in the base config will be overwritten in sub-configs, if there are any conflicts. -We provided several base configs for standard model architectures. - -Many builtin tools in detectron2 accept command line config overwrite: -Key-value pairs provided in the command line will overwrite the existing values in the config file. -For example, [demo.py](../../demo/demo.py) can be used with -```sh -./demo.py --config-file config.yaml [--other-options] \ - --opts MODEL.WEIGHTS /path/to/weights INPUT.MIN_SIZE_TEST 1000 -``` - -To see a list of available configs in detectron2 and what they mean, -check [Config References](../modules/config.html#config-references) - -### Configs in Projects - -A project that lives outside the detectron2 library may define its own configs, which will need to be added -for the project to be functional, e.g.: -```python -from detectron2.projects.point_rend import add_pointrend_config -cfg = get_cfg() # obtain detectron2's default config -add_pointrend_config(cfg) # add pointrend's default config -# ... ... -``` - -### Best Practice with Configs - -1. Treat the configs you write as "code": avoid copying them or duplicating them; use `_BASE_` - to share common parts between configs. - -2. Keep the configs you write simple: don't include keys that do not affect the experimental setting. diff --git a/detectron2/docs/tutorials/data_loading.md b/detectron2/docs/tutorials/data_loading.md deleted file mode 100644 index 1d2769fc513abb0981a140f3a6b6432538704261..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/data_loading.md +++ /dev/null @@ -1,95 +0,0 @@ - -# Dataloader - -Dataloader is the component that provides data to models. -A dataloader usually (but not necessarily) takes raw information from [datasets](./datasets.md), -and process them into a format needed by the model. - -## How the Existing Dataloader Works - -Detectron2 contains a builtin data loading pipeline. -It's good to understand how it works, in case you need to write a custom one. - -Detectron2 provides two functions -[build_detection_{train,test}_loader](../modules/data.html#detectron2.data.build_detection_train_loader) -that create a default data loader from a given config. -Here is how `build_detection_{train,test}_loader` work: - -1. It takes the name of a registered dataset (e.g., "coco_2017_train") and loads a `list[dict]` representing the dataset items - in a lightweight format. These dataset items are not yet ready to be used by the model (e.g., images are - not loaded into memory, random augmentations have not been applied, etc.). - Details about the dataset format and dataset registration can be found in - [datasets](./datasets.md). -2. Each dict in this list is mapped by a function ("mapper"): - * Users can customize this mapping function by specifying the "mapper" argument in - `build_detection_{train,test}_loader`. The default mapper is [DatasetMapper](../modules/data.html#detectron2.data.DatasetMapper). - * The output format of the mapper can be arbitrary, as long as it is accepted by the consumer of this data loader (usually the model). - The outputs of the default mapper, after batching, follow the default model input format documented in - [Use Models](./models.html#model-input-format). - * The role of the mapper is to transform the lightweight representation of a dataset item into a format - that is ready for the model to consume (including, e.g., read images, perform random data augmentation and convert to torch Tensors). - If you would like to perform custom transformations to data, you often want a custom mapper. -3. The outputs of the mapper are batched (simply into a list). -4. This batched data is the output of the data loader. Typically, it's also the input of - `model.forward()`. - - -## Write a Custom Dataloader - -Using a different "mapper" with `build_detection_{train,test}_loader(mapper=)` works for most use cases -of custom data loading. -For example, if you want to resize all images to a fixed size for training, use: - -```python -import detectron2.data.transforms as T -from detectron2.data import DatasetMapper # the default mapper -dataloader = build_detection_train_loader(cfg, - mapper=DatasetMapper(cfg, is_train=True, augmentations=[ - T.Resize((800, 800)) - ])) -# use this dataloader instead of the default -``` -If the arguments of the default [DatasetMapper](../modules/data.html#detectron2.data.DatasetMapper) -does not provide what you need, you may write a custom mapper function and use it instead, e.g.: - -```python -from detectron2.data import detection_utils as utils - # Show how to implement a minimal mapper, similar to the default DatasetMapper -def mapper(dataset_dict): - dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below - # can use other ways to read image - image = utils.read_image(dataset_dict["file_name"], format="BGR") - # See "Data Augmentation" tutorial for details usage - auginput = T.AugInput(image) - transform = T.Resize((800, 800))(auginput) - image = torch.from_numpy(auginput.image.transpose(2, 0, 1)) - annos = [ - utils.transform_instance_annotations(annotation, [transform], image.shape[1:]) - for annotation in dataset_dict.pop("annotations") - ] - return { - # create the format that the model expects - "image": image, - "instances": utils.annotations_to_instances(annos, image.shape[1:]) - } -dataloader = build_detection_train_loader(cfg, mapper=mapper) -``` - -If you want to change not only the mapper (e.g., in order to implement different sampling or batching logic), -`build_detection_train_loader` won't work and you will need to write a different data loader. -The data loader is simply a -python iterator that produces [the format](./models.md) that the model accepts. -You can implement it using any tools you like. - -No matter what to implement, it's recommended to -check out [API documentation of detectron2.data](../modules/data) to learn more about the APIs of -these functions. - -## Use a Custom Dataloader - -If you use [DefaultTrainer](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer), -you can overwrite its `build_{train,test}_loader` method to use your own dataloader. -See the [deeplab dataloader](../../projects/DeepLab/train_net.py) -for an example. - -If you write your own training loop, you can plug in your data loader easily. diff --git a/detectron2/docs/tutorials/datasets.md b/detectron2/docs/tutorials/datasets.md deleted file mode 100644 index 91103f64264aa6f3059611c5fe06ecd65bcb986f..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/datasets.md +++ /dev/null @@ -1,290 +0,0 @@ -# Use Custom Datasets - -This document explains how the dataset APIs -([DatasetCatalog](../modules/data.html#detectron2.data.DatasetCatalog), [MetadataCatalog](../modules/data.html#detectron2.data.MetadataCatalog)) -work, and how to use them to add custom datasets. - -Datasets that have builtin support in detectron2 are listed in [builtin datasets](builtin_datasets.md). -If you want to use a custom dataset while also reusing detectron2's data loaders, -you will need to: - -1. __Register__ your dataset (i.e., tell detectron2 how to obtain your dataset). -2. Optionally, __register metadata__ for your dataset. - -Next, we explain the above two concepts in detail. - -The [Colab tutorial](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) -has a live example of how to register and train on a dataset of custom formats. - -### Register a Dataset - -To let detectron2 know how to obtain a dataset named "my_dataset", users need to implement -a function that returns the items in your dataset and then tell detectron2 about this -function: -```python -def my_dataset_function(): - ... - return list[dict] in the following format - -from detectron2.data import DatasetCatalog -DatasetCatalog.register("my_dataset", my_dataset_function) -# later, to access the data: -data: List[Dict] = DatasetCatalog.get("my_dataset") -``` - -Here, the snippet associates a dataset named "my_dataset" with a function that returns the data. -The function must return the same data (with same order) if called multiple times. -The registration stays effective until the process exits. - -The function can do arbitrary things and should return the data in `list[dict]`, each dict in either -of the following formats: -1. Detectron2's standard dataset dict, described below. This will make it work with many other builtin - features in detectron2, so it's recommended to use it when it's sufficient. -2. Any custom format. You can also return arbitrary dicts in your own format, - such as adding extra keys for new tasks. - Then you will need to handle them properly downstream as well. - See below for more details. - -#### Standard Dataset Dicts - -For standard tasks -(instance detection, instance/semantic/panoptic segmentation, keypoint detection), -we load the original dataset into `list[dict]` with a specification similar to COCO's annotations. -This is our standard representation for a dataset. - -Each dict contains information about one image. -The dict may have the following fields, -and the required fields vary based on what the dataloader or the task needs (see more below). - -```eval_rst -.. list-table:: - :header-rows: 1 - - * - Task - - Fields - * - Common - - file_name, height, width, image_id - - * - Instance detection/segmentation - - annotations - - * - Semantic segmentation - - sem_seg_file_name - - * - Panoptic segmentation - - pan_seg_file_name, segments_info -``` - -+ `file_name`: the full path to the image file. -+ `height`, `width`: integer. The shape of the image. -+ `image_id` (str or int): a unique id that identifies this image. Required by many - evaluators to identify the images, but a dataset may use it for different purposes. -+ `annotations` (list[dict]): Required by __instance detection/segmentation or keypoint detection__ tasks. - Each dict corresponds to annotations of one instance in this image, and - may contain the following keys: - + `bbox` (list[float], required): list of 4 numbers representing the bounding box of the instance. - + `bbox_mode` (int, required): the format of bbox. It must be a member of - [structures.BoxMode](../modules/structures.html#detectron2.structures.BoxMode). - Currently supports: `BoxMode.XYXY_ABS`, `BoxMode.XYWH_ABS`. - + `category_id` (int, required): an integer in the range [0, num_categories-1] representing the category label. - The value num_categories is reserved to represent the "background" category, if applicable. - + `segmentation` (list[list[float]] or dict): the segmentation mask of the instance. - + If `list[list[float]]`, it represents a list of polygons, one for each connected component - of the object. Each `list[float]` is one simple polygon in the format of `[x1, y1, ..., xn, yn]` (nβ‰₯3). - The Xs and Ys are absolute coordinates in unit of pixels. - + If `dict`, it represents the per-pixel segmentation mask in COCO's compressed RLE format. - The dict should have keys "size" and "counts". You can convert a uint8 segmentation mask of 0s and - 1s into such dict by `pycocotools.mask.encode(np.asarray(mask, order="F"))`. - `cfg.INPUT.MASK_FORMAT` must be set to `bitmask` if using the default data loader with such format. - + `keypoints` (list[float]): in the format of [x1, y1, v1,..., xn, yn, vn]. - v[i] means the [visibility](http://cocodataset.org/#format-data) of this keypoint. - `n` must be equal to the number of keypoint categories. - The Xs and Ys are absolute real-value coordinates in range [0, W or H]. - - (Note that the keypoint coordinates in COCO format are integers in range [0, W-1 or H-1], which is different - from our standard format. Detectron2 adds 0.5 to COCO keypoint coordinates to convert them from discrete - pixel indices to floating point coordinates.) - + `iscrowd`: 0 (default) or 1. Whether this instance is labeled as COCO's "crowd - region". Don't include this field if you don't know what it means. - - If `annotations` is an empty list, it means the image is labeled to have no objects. - Such images will by default be removed from training, - but can be included using `DATALOADER.FILTER_EMPTY_ANNOTATIONS`. - -+ `sem_seg_file_name` (str): - The full path to the semantic segmentation ground truth file. - It should be a grayscale image whose pixel values are integer labels. -+ `pan_seg_file_name` (str): - The full path to panoptic segmentation ground truth file. - It should be an RGB image whose pixel values are integer ids encoded using the - [panopticapi.utils.id2rgb](https://github.com/cocodataset/panopticapi/) function. - The ids are defined by `segments_info`. - If an id does not appear in `segments_info`, the pixel is considered unlabeled - and is usually ignored in training & evaluation. -+ `segments_info` (list[dict]): defines the meaning of each id in panoptic segmentation ground truth. - Each dict has the following keys: - + `id` (int): integer that appears in the ground truth image. - + `category_id` (int): an integer in the range [0, num_categories-1] representing the category label. - + `iscrowd`: 0 (default) or 1. Whether this instance is labeled as COCO's "crowd region". - - -```eval_rst - -.. note:: - - The PanopticFPN model does not use the panoptic segmentation - format defined here, but a combination of both instance segmentation and semantic segmentation data - format. See :doc:`builtin_datasets` for instructions on COCO. - -``` - -Fast R-CNN (with pre-computed proposals) models are rarely used today. -To train a Fast R-CNN, the following extra keys are needed: - -+ `proposal_boxes` (array): 2D numpy array with shape (K, 4) representing K precomputed proposal boxes for this image. -+ `proposal_objectness_logits` (array): numpy array with shape (K, ), which corresponds to the objectness - logits of proposals in 'proposal_boxes'. -+ `proposal_bbox_mode` (int): the format of the precomputed proposal bbox. - It must be a member of - [structures.BoxMode](../modules/structures.html#detectron2.structures.BoxMode). - Default is `BoxMode.XYXY_ABS`. - - - -#### Custom Dataset Dicts for New Tasks - -In the `list[dict]` that your dataset function returns, the dictionary can also have __arbitrary custom data__. -This will be useful for a new task that needs extra information not covered -by the standard dataset dicts. In this case, you need to make sure the downstream code can handle your data -correctly. Usually this requires writing a new `mapper` for the dataloader (see [Use Custom Dataloaders](./data_loading.md)). - -When designing a custom format, note that all dicts are stored in memory -(sometimes serialized and with multiple copies). -To save memory, each dict is meant to contain __small__ but sufficient information -about each sample, such as file names and annotations. -Loading full samples typically happens in the data loader. - -For attributes shared among the entire dataset, use `Metadata` (see below). -To avoid extra memory, do not save such information inside each sample. - -### "Metadata" for Datasets - -Each dataset is associated with some metadata, accessible through -`MetadataCatalog.get(dataset_name).some_metadata`. -Metadata is a key-value mapping that contains information that's shared among -the entire dataset, and usually is used to interpret what's in the dataset, e.g., -names of classes, colors of classes, root of files, etc. -This information will be useful for augmentation, evaluation, visualization, logging, etc. -The structure of metadata depends on what is needed from the corresponding downstream code. - -If you register a new dataset through `DatasetCatalog.register`, -you may also want to add its corresponding metadata through -`MetadataCatalog.get(dataset_name).some_key = some_value`, to enable any features that need the metadata. -You can do it like this (using the metadata key "thing_classes" as an example): - -```python -from detectron2.data import MetadataCatalog -MetadataCatalog.get("my_dataset").thing_classes = ["person", "dog"] -``` - -Here is a list of metadata keys that are used by builtin features in detectron2. -If you add your own dataset without these metadata, some features may be -unavailable to you: - -* `thing_classes` (list[str]): Used by all instance detection/segmentation tasks. - A list of names for each instance/thing category. - If you load a COCO format dataset, it will be automatically set by the function `load_coco_json`. - -* `thing_colors` (list[tuple(r, g, b)]): Pre-defined color (in [0, 255]) for each thing category. - Used for visualization. If not given, random colors will be used. - -* `stuff_classes` (list[str]): Used by semantic and panoptic segmentation tasks. - A list of names for each stuff category. - -* `stuff_colors` (list[tuple(r, g, b)]): Pre-defined color (in [0, 255]) for each stuff category. - Used for visualization. If not given, random colors are used. - -* `ignore_label` (int): Used by semantic and panoptic segmentation tasks. Pixels in ground-truth - annotations with this category label should be ignored in evaluation. Typically these are "unlabeled" - pixels. - -* `keypoint_names` (list[str]): Used by keypoint detection. A list of names for each keypoint. - -* `keypoint_flip_map` (list[tuple[str]]): Used by keypoint detection. A list of pairs of names, - where each pair are the two keypoints that should be flipped if the image is - flipped horizontally during augmentation. -* `keypoint_connection_rules`: list[tuple(str, str, (r, g, b))]. Each tuple specifies a pair of keypoints - that are connected and the color (in [0, 255]) to use for the line between them when visualized. - -Some additional metadata that are specific to the evaluation of certain datasets (e.g. COCO): - -* `thing_dataset_id_to_contiguous_id` (dict[int->int]): Used by all instance detection/segmentation tasks in the COCO format. - A mapping from instance class ids in the dataset to contiguous ids in range [0, #class). - Will be automatically set by the function `load_coco_json`. - -* `stuff_dataset_id_to_contiguous_id` (dict[int->int]): Used when generating prediction json files for - semantic/panoptic segmentation. - A mapping from semantic segmentation class ids in the dataset - to contiguous ids in [0, num_categories). It is useful for evaluation only. - -* `json_file`: The COCO annotation json file. Used by COCO evaluation for COCO-format datasets. -* `panoptic_root`, `panoptic_json`: Used by COCO-format panoptic evaluation. -* `evaluator_type`: Used by the builtin main training script to select - evaluator. Don't use it in a new training script. - You can just provide the [DatasetEvaluator](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluator) - for your dataset directly in your main script. - -```eval_rst -.. note:: - - In recognition, sometimes we use the term "thing" for instance-level tasks, - and "stuff" for semantic segmentation tasks. - Both are used in panoptic segmentation tasks. - For background on the concept of "thing" and "stuff", see - `On Seeing Stuff: The Perception of Materials by Humans and Machines - `_. -``` - -### Register a COCO Format Dataset - -If your instance-level (detection, segmentation, keypoint) dataset is already a json file in the COCO format, -the dataset and its associated metadata can be registered easily with: -```python -from detectron2.data.datasets import register_coco_instances -register_coco_instances("my_dataset", {}, "json_annotation.json", "path/to/image/dir") -``` - -If your dataset is in COCO format but need to be further processed, or has extra custom per-instance annotations, -the [load_coco_json](../modules/data.html#detectron2.data.datasets.load_coco_json) -function might be useful. - -### Update the Config for New Datasets - -Once you've registered the dataset, you can use the name of the dataset (e.g., "my_dataset" in -example above) in `cfg.DATASETS.{TRAIN,TEST}`. -There are other configs you might want to change to train or evaluate on new datasets: - -* `MODEL.ROI_HEADS.NUM_CLASSES` and `MODEL.RETINANET.NUM_CLASSES` are the number of thing classes - for R-CNN and RetinaNet models, respectively. -* `MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS` sets the number of keypoints for Keypoint R-CNN. - You'll also need to set [Keypoint OKS](http://cocodataset.org/#keypoints-eval) - with `TEST.KEYPOINT_OKS_SIGMAS` for evaluation. -* `MODEL.SEM_SEG_HEAD.NUM_CLASSES` sets the number of stuff classes for Semantic FPN & Panoptic FPN. -* `TEST.DETECTIONS_PER_IMAGE` controls the maximum number of objects to be detected. - Set it to a larger number if test images may contain >100 objects. -* If you're training Fast R-CNN (with precomputed proposals), `DATASETS.PROPOSAL_FILES_{TRAIN,TEST}` - need to match the datasets. The format of proposal files are documented - [here](../modules/data.html#detectron2.data.load_proposals_into_dataset). - -New models -(e.g. [TensorMask](../../projects/TensorMask), -[PointRend](../../projects/PointRend)) -often have similar configs of their own that need to be changed as well. - -```eval_rst -.. tip:: - - After changing the number of classes, certain layers in a pre-trained model will become incompatible - and therefore cannot be loaded to the new model. - This is expected, and loading such pre-trained models will produce warnings about such layers. -``` diff --git a/detectron2/docs/tutorials/deployment.md b/detectron2/docs/tutorials/deployment.md deleted file mode 100644 index f7598880a9946402848301123d2889cfec2359e5..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/deployment.md +++ /dev/null @@ -1,137 +0,0 @@ -# Deployment - -Models written in Python need to go through an export process to become a deployable artifact. -A few basic concepts about this process: - -__"Export method"__ is how a Python model is fully serialized to a deployable format. -We support the following export methods: - -* `tracing`: see [pytorch documentation](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html) to learn about it -* `scripting`: see [pytorch documentation](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html) to learn about it -* `caffe2_tracing`: replace parts of the model by caffe2 operators, then use tracing. - -__"Format"__ is how a serialized model is described in a file, e.g. -TorchScript, Caffe2 protobuf, ONNX format. -__"Runtime"__ is an engine that loads a serialized model and executes it, -e.g., PyTorch, Caffe2, TensorFlow, onnxruntime, TensorRT, etc. -A runtime is often tied to a specific format -(e.g. PyTorch needs TorchScript format, Caffe2 needs protobuf format). -We currently support the following combination and each has some limitations: - -```eval_rst -+----------------------------+-------------+-------------+-----------------------------+ -| Export Method | tracing | scripting | caffe2_tracing | -+============================+=============+=============+=============================+ -| **Formats** | TorchScript | TorchScript | Caffe2, TorchScript, ONNX | -+----------------------------+-------------+-------------+-----------------------------+ -| **Runtime** | PyTorch | PyTorch | Caffe2, PyTorch | -+----------------------------+-------------+-------------+-----------------------------+ -| C++/Python inference | βœ… | βœ… | βœ… | -+----------------------------+-------------+-------------+-----------------------------+ -| Dynamic resolution | βœ… | βœ… | βœ… | -+----------------------------+-------------+-------------+-----------------------------+ -| Batch size requirement | Constant | Dynamic | Batch inference unsupported | -+----------------------------+-------------+-------------+-----------------------------+ -| Extra runtime deps | torchvision | torchvision | Caffe2 ops (usually already | -| | | | | -| | | | included in PyTorch) | -+----------------------------+-------------+-------------+-----------------------------+ -| Faster/Mask/Keypoint R-CNN | βœ… | βœ… | βœ… | -+----------------------------+-------------+-------------+-----------------------------+ -| RetinaNet | βœ… | βœ… | βœ… | -+----------------------------+-------------+-------------+-----------------------------+ -| PointRend R-CNN | βœ… | ❌ | ❌ | -+----------------------------+-------------+-------------+-----------------------------+ -| Cascade R-CNN | βœ… | ❌ | ❌ | -+----------------------------+-------------+-------------+-----------------------------+ - -``` - -`caffe2_tracing` is going to be deprecated. -We don't plan to work on additional support for other formats/runtime, but contributions are welcome. - - -## Deployment with Tracing or Scripting - -Models can be exported to TorchScript format, by either -[tracing or scripting](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html). -The output model file can be loaded without detectron2 dependency in either Python or C++. -The exported model often requires torchvision (or its C++ library) dependency for some custom ops. - -This feature requires PyTorch β‰₯ 1.8. - -### Coverage -Most official models under the meta architectures `GeneralizedRCNN` and `RetinaNet` -are supported in both tracing and scripting mode. -Cascade R-CNN and PointRend are currently supported in tracing. -Users' custom extensions are supported if they are also scriptable or traceable. - -For models exported with tracing, dynamic input resolution is allowed, but batch size -(number of input images) must be fixed. -Scripting can support dynamic batch size. - -### Usage - -The main export APIs for tracing and scripting are [TracingAdapter](../modules/export.html#detectron2.export.TracingAdapter) -and [scripting_with_instances](../modules/export.html#detectron2.export.scripting_with_instances). -Their usage is currently demonstrated in [test_export_torchscript.py](../../tests/test_export_torchscript.py) -(see `TestScripting` and `TestTracing`) -as well as the [deployment example](../../tools/deploy). -Please check that these examples can run, and then modify for your use cases. -The usage now requires some user effort and necessary knowledge for each model to workaround the limitation of scripting and tracing. -In the future we plan to wrap these under simpler APIs to lower the bar to use them. - -## Deployment with Caffe2-tracing -We provide [Caffe2Tracer](../modules/export.html#detectron2.export.Caffe2Tracer) -that performs the export logic. -It replaces parts of the model with Caffe2 operators, -and then export the model into Caffe2, TorchScript or ONNX format. - -The converted model is able to run in either Python or C++ without detectron2/torchvision dependency, on CPU or GPUs. -It has a runtime optimized for CPU & mobile inference, but not optimized for GPU inference. - -This feature requires ONNX β‰₯ 1.6. - -### Coverage - -Most official models under these 3 common meta architectures: `GeneralizedRCNN`, `RetinaNet`, `PanopticFPN` -are supported. Cascade R-CNN is not supported. Batch inference is not supported. - -Users' custom extensions under these architectures (added through registration) are supported -as long as they do not contain control flow or operators not available in Caffe2 (e.g. deformable convolution). -For example, custom backbones and heads are often supported out of the box. - -### Usage - -The APIs are listed at [the API documentation](../modules/export). -We provide [export_model.py](../../tools/deploy/) as an example that uses -these APIs to convert a standard model. For custom models/datasets, you can add them to this script. - -### Use the model in C++/Python - -The model can be loaded in C++ and deployed with -either Caffe2 or Pytorch runtime.. [C++ examples](../../tools/deploy/) for Mask R-CNN -are given as a reference. Note that: - -* Models exported with `caffe2_tracing` method take a special input format - described in [documentation](../modules/export.html#detectron2.export.Caffe2Tracer). - This was taken care of in the C++ example. - -* The converted models do not contain post-processing operations that - transform raw layer outputs into formatted predictions. - For example, the C++ examples only produce raw outputs (28x28 masks) from the final - layers that are not post-processed, because in actual deployment, an application often needs - its custom lightweight post-processing, so this step is left for users. - -To help use the Caffe2-format model in python, -we provide a python wrapper around the converted model, in the -[Caffe2Model.\_\_call\_\_](../modules/export.html#detectron2.export.Caffe2Model.__call__) method. -This method has an interface that's identical to the [pytorch versions of models](./models.md), -and it internally applies pre/post-processing code to match the formats. -This wrapper can serve as a reference for how to use Caffe2's python API, -or for how to implement pre/post-processing in actual deployment. - -## Conversion to TensorFlow -[tensorpack Faster R-CNN](https://github.com/tensorpack/tensorpack/tree/master/examples/FasterRCNN/convert_d2) -provides scripts to convert a few standard detectron2 R-CNN models to TensorFlow's pb format. -It works by translating configs and weights, therefore only support a few models. diff --git a/detectron2/docs/tutorials/evaluation.md b/detectron2/docs/tutorials/evaluation.md deleted file mode 100644 index 2ef94faa38cae1c5f4e49eed4887ebbcd147513c..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/evaluation.md +++ /dev/null @@ -1,68 +0,0 @@ - -# Evaluation - -Evaluation is a process that takes a number of inputs/outputs pairs and aggregate them. -You can always [use the model](./models.md) directly and just parse its inputs/outputs manually to perform -evaluation. -Alternatively, evaluation is implemented in detectron2 using the [DatasetEvaluator](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluator) -interface. - -Detectron2 includes a few `DatasetEvaluator` that computes metrics using standard dataset-specific -APIs (e.g., COCO, LVIS). -You can also implement your own `DatasetEvaluator` that performs some other jobs -using the inputs/outputs pairs. -For example, to count how many instances are detected on the validation set: - -```python -class Counter(DatasetEvaluator): - def reset(self): - self.count = 0 - def process(self, inputs, outputs): - for output in outputs: - self.count += len(output["instances"]) - def evaluate(self): - # save self.count somewhere, or print it, or return it. - return {"count": self.count} -``` - -## Use evaluators - -To evaluate using the methods of evaluators manually: -```python -def get_all_inputs_outputs(): - for data in data_loader: - yield data, model(data) - -evaluator.reset() -for inputs, outputs in get_all_inputs_outputs(): - evaluator.process(inputs, outputs) -eval_results = evaluator.evaluate() -``` - -Evaluators can also be used with [inference_on_dataset](../modules/evaluation.html#detectron2.evaluation.inference_on_dataset). -For example, - -```python -eval_results = inference_on_dataset( - model, - data_loader, - DatasetEvaluators([COCOEvaluator(...), Counter()])) -``` -This will execute `model` on all inputs from `data_loader`, and call evaluator to process them. - -Compared to running the evaluation manually using the model, the benefit of this function is that -evaluators can be merged together using [DatasetEvaluators](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluators), -and all the evaluation can finish in one forward pass over the dataset. -This function also provides accurate speed benchmarks for the given model and dataset. - -## Evaluators for custom dataset - -Many evaluators in detectron2 are made for specific datasets, -in order to obtain scores using each dataset's official API. -In addition to that, two evaluators are able to evaluate any generic dataset -that follows detectron2's [standard dataset format](./datasets.md), so they -can be used to evaluate custom datasets: - -* [COCOEvaluator](../modules/evaluation.html#detectron2.evaluation.COCOEvaluator) is able to evaluate AP (Average Precision) for box detection, - instance segmentation, keypoint detection on any custom dataset. -* [SemSegEvaluator](../modules/evaluation.html#detectron2.evaluation.SemSegEvaluator) is able to evaluate semantic segmentation metrics on any custom dataset. diff --git a/detectron2/docs/tutorials/extend.md b/detectron2/docs/tutorials/extend.md deleted file mode 100644 index a6af550fdb2aa79c818cef54b009f2fe816d46a9..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/extend.md +++ /dev/null @@ -1,141 +0,0 @@ -# Extend Detectron2's Defaults - -__Research is about doing things in new ways__. -This brings a tension in how to create abstractions in code, -which is a challenge for any research engineering project of a significant size: - -1. On one hand, it needs to have very thin abstractions to allow for the possibility of doing - everything in new ways. It should be reasonably easy to break existing - abstractions and replace them with new ones. - -2. On the other hand, such a project also needs reasonably high-level - abstractions, so that users can easily do things in standard ways, - without worrying too much about the details that only certain researchers care about. - -In detectron2, there are two types of interfaces that address this tension together: - -1. Functions and classes that take a config (`cfg`) argument - created from a yaml file - (sometimes with few extra arguments). - - Such functions and classes implement - the "standard default" behavior: it will read what it needs from a given - config and do the "standard" thing. - Users only need to load an expert-made config and pass it around, without having to worry about - which arguments are used and what they all mean. - - See [Yacs Configs](configs.md) for a detailed tutorial. - -2. Functions and classes that have well-defined explicit arguments. - - Each of these is a small building block of the entire system. - They require users' expertise to understand what each argument should be, - and require more effort to stitch together to a larger system. - But they can be stitched together in more flexible ways. - - When you need to implement something not supported by the "standard defaults" - included in detectron2, these well-defined components can be reused. - - The [LazyConfig system](lazyconfigs.md) relies on such functions and classes. - -3. A few functions and classes are implemented with the - [@configurable](../modules/config.html#detectron2.config.configurable) - decorator - they can be called with either a config, or with explicit arguments, or a mixture of both. - Their explicit argument interfaces are currently experimental. - - As an example, a Mask R-CNN model can be built in the following ways: - - 1. Config-only: - ```python - # load proper yaml config file, then - model = build_model(cfg) - ``` - - 2. Mixture of config and additional argument overrides: - ```python - model = GeneralizedRCNN( - cfg, - roi_heads=StandardROIHeads(cfg, batch_size_per_image=666), - pixel_std=[57.0, 57.0, 57.0]) - ``` - - 3. Full explicit arguments: -
- - (click to expand) - - - ```python - model = GeneralizedRCNN( - backbone=FPN( - ResNet( - BasicStem(3, 64, norm="FrozenBN"), - ResNet.make_default_stages(50, stride_in_1x1=True, norm="FrozenBN"), - out_features=["res2", "res3", "res4", "res5"], - ).freeze(2), - ["res2", "res3", "res4", "res5"], - 256, - top_block=LastLevelMaxPool(), - ), - proposal_generator=RPN( - in_features=["p2", "p3", "p4", "p5", "p6"], - head=StandardRPNHead(in_channels=256, num_anchors=3), - anchor_generator=DefaultAnchorGenerator( - sizes=[[32], [64], [128], [256], [512]], - aspect_ratios=[0.5, 1.0, 2.0], - strides=[4, 8, 16, 32, 64], - offset=0.0, - ), - anchor_matcher=Matcher([0.3, 0.7], [0, -1, 1], allow_low_quality_matches=True), - box2box_transform=Box2BoxTransform([1.0, 1.0, 1.0, 1.0]), - batch_size_per_image=256, - positive_fraction=0.5, - pre_nms_topk=(2000, 1000), - post_nms_topk=(1000, 1000), - nms_thresh=0.7, - ), - roi_heads=StandardROIHeads( - num_classes=80, - batch_size_per_image=512, - positive_fraction=0.25, - proposal_matcher=Matcher([0.5], [0, 1], allow_low_quality_matches=False), - box_in_features=["p2", "p3", "p4", "p5"], - box_pooler=ROIPooler(7, (1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), 0, "ROIAlignV2"), - box_head=FastRCNNConvFCHead( - ShapeSpec(channels=256, height=7, width=7), conv_dims=[], fc_dims=[1024, 1024] - ), - box_predictor=FastRCNNOutputLayers( - ShapeSpec(channels=1024), - test_score_thresh=0.05, - box2box_transform=Box2BoxTransform((10, 10, 5, 5)), - num_classes=80, - ), - mask_in_features=["p2", "p3", "p4", "p5"], - mask_pooler=ROIPooler(14, (1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), 0, "ROIAlignV2"), - mask_head=MaskRCNNConvUpsampleHead( - ShapeSpec(channels=256, width=14, height=14), - num_classes=80, - conv_dims=[256, 256, 256, 256, 256], - ), - ), - pixel_mean=[103.530, 116.280, 123.675], - pixel_std=[1.0, 1.0, 1.0], - input_format="BGR", - ) - ``` - -
- - -If you only need the standard behavior, the [Beginner's Tutorial](./getting_started.md) -should suffice. If you need to extend detectron2 to your own needs, -see the following tutorials for more details: - -* Detectron2 includes a few standard datasets. To use custom ones, see - [Use Custom Datasets](./datasets.md). -* Detectron2 contains the standard logic that creates a data loader for training/testing from a - dataset, but you can write your own as well. See [Use Custom Data Loaders](./data_loading.md). -* Detectron2 implements many standard detection models, and provide ways for you - to overwrite their behaviors. See [Use Models](./models.md) and [Write Models](./write-models.md). -* Detectron2 provides a default training loop that is good for common training tasks. - You can customize it with hooks, or write your own loop instead. See [training](./training.md). diff --git a/detectron2/docs/tutorials/getting_started.md b/detectron2/docs/tutorials/getting_started.md deleted file mode 100644 index e90bde77a3197b77f4cfdce86ca8f96491650acd..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/getting_started.md +++ /dev/null @@ -1 +0,0 @@ -../../GETTING_STARTED.md \ No newline at end of file diff --git a/detectron2/docs/tutorials/index.rst b/detectron2/docs/tutorials/index.rst deleted file mode 100644 index 850b95cfa873ffa0ba2d6f6e4263ad0895c08be8..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/index.rst +++ /dev/null @@ -1,20 +0,0 @@ -Tutorials -====================================== - -.. toctree:: - :maxdepth: 2 - - install - getting_started - builtin_datasets - extend - datasets - data_loading - augmentation - models - write-models - training - evaluation - configs - lazyconfigs - deployment diff --git a/detectron2/docs/tutorials/install.md b/detectron2/docs/tutorials/install.md deleted file mode 100644 index 5f52b2be3c9650cfc3e16ffb8fa374d3fcbad371..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/install.md +++ /dev/null @@ -1 +0,0 @@ -../../INSTALL.md \ No newline at end of file diff --git a/detectron2/docs/tutorials/lazyconfigs.md b/detectron2/docs/tutorials/lazyconfigs.md deleted file mode 100644 index a01101ae40ec12d25d5a3d96892b60ef32dca21e..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/lazyconfigs.md +++ /dev/null @@ -1,170 +0,0 @@ -# Lazy Configs - -The traditional yacs-based config system provides basic, standard functionalities. -However, it does not offer enough flexibility for many new projects. -We develop an alternative, non-intrusive config system that can be used with -detectron2 or potentially any other complex projects. - -## Python Syntax - -Our config objects are still dictionaries. Instead of using Yaml to define dictionaries, -we create dictionaries in Python directly. This gives users the following power that -doesn't exist in Yaml: - -* Easily manipulate the dictionary (addition & deletion) using Python. -* Write simple arithmetics or call simple functions. -* Use more data types / objects. -* Import / compose other config files, using the familiar Python import syntax. - -A Python config file can be loaded like this: -```python -# config.py: -a = dict(x=1, y=2, z=dict(xx=1)) -b = dict(x=3, y=4) - -# my_code.py: -from detectron2.config import LazyConfig -cfg = LazyConfig.load("path/to/config.py") # an omegaconf dictionary -assert cfg.a.z.xx == 1 -``` - -After [LazyConfig.load](../modules/config.html#detectron2.config.LazyConfig.load), `cfg` will be a dictionary that contains all dictionaries -defined in the global scope of the config file. Note that: -* All dictionaries are turned to an [omegaconf](https://omegaconf.readthedocs.io/) - config object during loading. This enables access to omegaconf features, - such as its [access syntax](https://omegaconf.readthedocs.io/en/2.1_branch/usage.html#access-and-manipulation) - and [interpolation](https://omegaconf.readthedocs.io/en/2.1_branch/usage.html#variable-interpolation). -* Absolute imports in `config.py` works the same as in regular Python. -* Relative imports can only import dictionaries from config files. - They are simply a syntax sugar for [LazyConfig.load_rel](../modules/config.html#detectron2.config.LazyConfig.load_rel). - They can load Python files at relative path without requiring `__init__.py`. - -[LazyConfig.save](../modules/config.html#detectron2.config.LazyConfig.save) can save a config object to yaml. -Note that this is not always successful if non-serializable objects appear in the config file (e.g. lambdas). -It is up to users whether to sacrifice the ability to save in exchange for flexibility. - -## Recursive Instantiation - -The LazyConfig system heavily uses recursive instantiation, which is a pattern that -uses a dictionary to describe a -call to a function/class. The dictionary consists of: - -1. A "\_target\_" key which contains path to the callable, such as "module.submodule.class_name". -2. Other keys that represent arguments to pass to the callable. Arguments themselves can be defined - using recursive instantiation. - -We provide a helper function [LazyCall](../modules/config.html#detectron2.config.LazyCall) that helps create such dictionaries. -The following code using `LazyCall` -```python -from detectron2.config import LazyCall as L -from my_app import Trainer, Optimizer -cfg = L(Trainer)( - optimizer=L(Optimizer)( - lr=0.01, - algo="SGD" - ) -) -``` -creates a dictionary like this: -```python -cfg = { - "_target_": "my_app.Trainer", - "optimizer": { - "_target_": "my_app.Optimizer", - "lr": 0.01, "algo": "SGD" - } -} -``` - -By representing objects using such dictionaries, a general -[instantiate](../modules/config.html#detectron2.config.instantiate) -function can turn them into actual objects, i.e.: -```python -from detectron2.config import instantiate -trainer = instantiate(cfg) -# equivalent to: -# from my_app import Trainer, Optimizer -# trainer = Trainer(optimizer=Optimizer(lr=0.01, algo="SGD")) -``` - -This pattern is powerful enough to describe very complex objects, e.g.: - -
- -A Full Mask R-CNN described in recursive instantiation (click to expand) - - -```eval_rst -.. literalinclude:: ../../configs/common/models/mask_rcnn_fpn.py - :language: python - :linenos: -``` - -
- -There are also objects or logic that cannot be described simply by a dictionary, -such as reused objects or method calls. They may require some refactoring -to work with recursive instantiation. - -## Using Model Zoo LazyConfigs - -We provide some configs in the model zoo using the LazyConfig system, for example: - -* [common baselines](../../configs/common/). -* [new Mask R-CNN baselines](../../configs/new_baselines/) - -After installing detectron2, they can be loaded by the model zoo API -[model_zoo.get_config](../modules/model_zoo.html#detectron2.model_zoo.get_config). - -Using these as references, you're free to define custom config structure / fields for your own -project, as long as your training script can understand them. -Despite of this, our model zoo configs still follow some simple conventions for consistency, e.g. -`cfg.model` defines a model object, `cfg.dataloader.{train,test}` defines dataloader objects, -and `cfg.train` contains training options in key-value form. -In addition to `print()`, a better way to view the structure of a config is like this: -```python -from detectron2.model_zoo import get_config -from detectron2.config import LazyConfig -print(LazyConfig.to_py(get_config("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py"))) -``` -From the output it's easier to find relevant options to change, e.g. -`dataloader.train.total_batch_size` for the batch size, or `optimizer.lr` for base learning rate. - -We provide a reference training script -[tools/lazyconfig_train_net.py](../../tools/lazyconfig_train_net.py), -that can train/eval our model zoo configs. -It also shows how to support command line value overrides. - -To demonstrate the power and flexibility of the new system, we show that -[a simple config file](../../configs/Misc/torchvision_imagenet_R_50.py) -can let detectron2 train an ImageNet classification model from torchvision, even though -detectron2 contains no features about ImageNet classification. -This can serve as a reference for using detectron2 in other deep learning tasks. - -## Summary - -By using recursive instantiation to create objects, -we avoid passing a giant config to many places, because `cfg` is only passed to `instantiate`. -This has the following benefits: - -* It's __non-intrusive__: objects to be constructed are config-agnostic, regular Python - functions/classes. - They can even live in other libraries. For example, - `{"_target_": "torch.nn.Conv2d", "in_channels": 10, "out_channels": 10, "kernel_size": 1}` - defines a conv layer. -* __Clarity__ of what function/classes will be called, and what arguments they use. -* `cfg` doesn't need pre-defined keys and structures. It's valid as long as it translates to valid - code. This gives a lot more __flexibility__. -* You can still pass huge dictionaries as arguments, just like the old way. - -Recursive instantiation and Python syntax are orthogonal: you can use one without the other. -But by putting them together, the config file looks a lot like the code that will be executed: - -![img](./lazyconfig.jpg) - -However, the config file just defines dictionaries, which can be easily manipulated further -by composition or overrides. -The corresponding code will only be executed -later when `instantiate` is called. In some way, -in config files we're writing "editable code" that will be "lazily executed" later when needed. -That's why we call this system "LazyConfig". diff --git a/detectron2/docs/tutorials/models.md b/detectron2/docs/tutorials/models.md deleted file mode 100644 index a2def5c715ac793e6269cbb84ef4792f91a774c1..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/models.md +++ /dev/null @@ -1,180 +0,0 @@ -# Use Models - -## Build Models from Yacs Config -From a yacs config object, -models (and their sub-models) can be built by -functions such as `build_model`, `build_backbone`, `build_roi_heads`: -```python -from detectron2.modeling import build_model -model = build_model(cfg) # returns a torch.nn.Module -``` - -`build_model` only builds the model structure and fills it with random parameters. -See below for how to load an existing checkpoint to the model and how to use the `model` object. - -### Load/Save a Checkpoint -```python -from detectron2.checkpoint import DetectionCheckpointer -DetectionCheckpointer(model).load(file_path_or_url) # load a file, usually from cfg.MODEL.WEIGHTS - -checkpointer = DetectionCheckpointer(model, save_dir="output") -checkpointer.save("model_999") # save to output/model_999.pth -``` - -Detectron2's checkpointer recognizes models in pytorch's `.pth` format, as well as the `.pkl` files -in our model zoo. -See [API doc](../modules/checkpoint.html#detectron2.checkpoint.DetectionCheckpointer) -for more details about its usage. - -The model files can be arbitrarily manipulated using `torch.{load,save}` for `.pth` files or -`pickle.{dump,load}` for `.pkl` files. - -### Use a Model - -A model can be called by `outputs = model(inputs)`, where `inputs` is a `list[dict]`. -Each dict corresponds to one image and the required keys -depend on the type of model, and whether the model is in training or evaluation mode. -For example, in order to do inference, -all existing models expect the "image" key, and optionally "height" and "width". -The detailed format of inputs and outputs of existing models are explained below. - -__Training__: When in training mode, all models are required to be used under an `EventStorage`. -The training statistics will be put into the storage: -```python -from detectron2.utils.events import EventStorage -with EventStorage() as storage: - losses = model(inputs) -``` - -__Inference__: If you only want to do simple inference using an existing model, -[DefaultPredictor](../modules/engine.html#detectron2.engine.defaults.DefaultPredictor) -is a wrapper around model that provides such basic functionality. -It includes default behavior including model loading, preprocessing, -and operates on single image rather than batches. See its documentation for usage. - -You can also run inference directly like this: -```python -model.eval() -with torch.no_grad(): - outputs = model(inputs) -``` - -### Model Input Format - -Users can implement custom models that support any arbitrary input format. -Here we describe the standard input format that all builtin models support in detectron2. -They all take a `list[dict]` as the inputs. Each dict -corresponds to information about one image. - -The dict may contain the following keys: - -* "image": `Tensor` in (C, H, W) format. The meaning of channels are defined by `cfg.INPUT.FORMAT`. - Image normalization, if any, will be performed inside the model using - `cfg.MODEL.PIXEL_{MEAN,STD}`. -* "height", "width": the **desired** output height and width **in inference**, which is not necessarily the same - as the height or width of the `image` field. - For example, the `image` field contains the resized image, if resize is used as a preprocessing step. - But you may want the outputs to be in **original** resolution. - If provided, the model will produce output in this resolution, - rather than in the resolution of the `image` as input into the model. This is more efficient and accurate. -* "instances": an [Instances](../modules/structures.html#detectron2.structures.Instances) - object for training, with the following fields: - + "gt_boxes": a [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing N boxes, one for each instance. - + "gt_classes": `Tensor` of long type, a vector of N labels, in range [0, num_categories). - + "gt_masks": a [PolygonMasks](../modules/structures.html#detectron2.structures.PolygonMasks) - or [BitMasks](../modules/structures.html#detectron2.structures.BitMasks) object storing N masks, one for each instance. - + "gt_keypoints": a [Keypoints](../modules/structures.html#detectron2.structures.Keypoints) - object storing N keypoint sets, one for each instance. -* "sem_seg": `Tensor[int]` in (H, W) format. The semantic segmentation ground truth for training. - Values represent category labels starting from 0. -* "proposals": an [Instances](../modules/structures.html#detectron2.structures.Instances) - object used only in Fast R-CNN style models, with the following fields: - + "proposal_boxes": a [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing P proposal boxes. - + "objectness_logits": `Tensor`, a vector of P scores, one for each proposal. - -For inference of builtin models, only "image" key is required, and "width/height" are optional. - -We currently don't define standard input format for panoptic segmentation training, -because models now use custom formats produced by custom data loaders. - -#### How it connects to data loader: - -The output of the default [DatasetMapper]( ../modules/data.html#detectron2.data.DatasetMapper) is a dict -that follows the above format. -After the data loader performs batching, it becomes `list[dict]` which the builtin models support. - - -### Model Output Format - -When in training mode, the builtin models output a `dict[str->ScalarTensor]` with all the losses. - -When in inference mode, the builtin models output a `list[dict]`, one dict for each image. -Based on the tasks the model is doing, each dict may contain the following fields: - -* "instances": [Instances](../modules/structures.html#detectron2.structures.Instances) - object with the following fields: - * "pred_boxes": [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing N boxes, one for each detected instance. - * "scores": `Tensor`, a vector of N confidence scores. - * "pred_classes": `Tensor`, a vector of N labels in range [0, num_categories). - + "pred_masks": a `Tensor` of shape (N, H, W), masks for each detected instance. - + "pred_keypoints": a `Tensor` of shape (N, num_keypoint, 3). - Each row in the last dimension is (x, y, score). Confidence scores are larger than 0. -* "sem_seg": `Tensor` of (num_categories, H, W), the semantic segmentation prediction. -* "proposals": [Instances](../modules/structures.html#detectron2.structures.Instances) - object with the following fields: - * "proposal_boxes": [Boxes](../modules/structures.html#detectron2.structures.Boxes) - object storing N boxes. - * "objectness_logits": a torch vector of N confidence scores. -* "panoptic_seg": A tuple of `(pred: Tensor, segments_info: Optional[list[dict]])`. - The `pred` tensor has shape (H, W), containing the segment id of each pixel. - - * If `segments_info` exists, each dict describes one segment id in `pred` and has the following fields: - - * "id": the segment id - * "isthing": whether the segment is a thing or stuff - * "category_id": the category id of this segment. - - If a pixel's id does not exist in `segments_info`, it is considered to be void label - defined in [Panoptic Segmentation](https://arxiv.org/abs/1801.00868). - - * If `segments_info` is None, all pixel values in `pred` must be β‰₯ -1. - Pixels with value -1 are assigned void labels. - Otherwise, the category id of each pixel is obtained by - `category_id = pixel // metadata.label_divisor`. - - -### Partially execute a model: - -Sometimes you may want to obtain an intermediate tensor inside a model, -such as the input of certain layer, the output before post-processing. -Since there are typically hundreds of intermediate tensors, there isn't an API that provides you -the intermediate result you need. -You have the following options: - -1. Write a (sub)model. Following the [tutorial](./write-models.md), you can - rewrite a model component (e.g. a head of a model), such that it - does the same thing as the existing component, but returns the output - you need. -2. Partially execute a model. You can create the model as usual, - but use custom code to execute it instead of its `forward()`. For example, - the following code obtains mask features before mask head. - - ```python - images = ImageList.from_tensors(...) # preprocessed input tensor - model = build_model(cfg) - model.eval() - features = model.backbone(images.tensor) - proposals, _ = model.proposal_generator(images, features) - instances, _ = model.roi_heads(images, features, proposals) - mask_features = [features[f] for f in model.roi_heads.in_features] - mask_features = model.roi_heads.mask_pooler(mask_features, [x.pred_boxes for x in instances]) - ``` - -3. Use [forward hooks](https://pytorch.org/tutorials/beginner/former_torchies/nnft_tutorial.html#forward-and-backward-function-hooks). - Forward hooks can help you obtain inputs or outputs of a certain module. - If they are not exactly what you want, they can at least be used together with partial execution - to obtain other tensors. - -All options require you to read documentation and sometimes code -of the existing models to understand the internal logic, -in order to write code to obtain the internal tensors. diff --git a/detectron2/docs/tutorials/training.md b/detectron2/docs/tutorials/training.md deleted file mode 100644 index 83a6cb0a8e38ca06bbf96201ac2595d2116523c3..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/training.md +++ /dev/null @@ -1,67 +0,0 @@ -# Training - -From the previous tutorials, you may now have a custom model and a data loader. -To run training, users typically have a preference in one of the following two styles: - -### Custom Training Loop - -With a model and a data loader ready, everything else needed to write a training loop can -be found in PyTorch, and you are free to write the training loop yourself. -This style allows researchers to manage the entire training logic more clearly and have full control. -One such example is provided in [tools/plain_train_net.py](../../tools/plain_train_net.py). - -Any customization on the training logic is then easily controlled by the user. - -### Trainer Abstraction - -We also provide a standardized "trainer" abstraction with a -hook system that helps simplify the standard training behavior. -It includes the following two instantiations: - -* [SimpleTrainer](../modules/engine.html#detectron2.engine.SimpleTrainer) - provides a minimal training loop for single-cost single-optimizer single-data-source training, with nothing else. - Other tasks (checkpointing, logging, etc) can be implemented using - [the hook system](../modules/engine.html#detectron2.engine.HookBase). -* [DefaultTrainer](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer) is a `SimpleTrainer` initialized from a - yacs config, used by - [tools/train_net.py](../../tools/train_net.py) and many scripts. - It includes more standard default behaviors that one might want to opt in, - including default configurations for optimizer, learning rate schedule, - logging, evaluation, checkpointing etc. - -To customize a `DefaultTrainer`: - -1. For simple customizations (e.g. change optimizer, evaluator, LR scheduler, data loader, etc.), overwrite [its methods](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer) in a subclass, just like [tools/train_net.py](../../tools/train_net.py). -2. For extra tasks during training, check the - [hook system](../modules/engine.html#detectron2.engine.HookBase) to see if it's supported. - - As an example, to print hello during training: - ```python - class HelloHook(HookBase): - def after_step(self): - if self.trainer.iter % 100 == 0: - print(f"Hello at iteration {self.trainer.iter}!") - ``` -3. Using a trainer+hook system means there will always be some non-standard behaviors that cannot be supported, especially in research. - For this reason, we intentionally keep the trainer & hook system minimal, rather than powerful. - If anything cannot be achieved by such a system, it's easier to start from [tools/plain_train_net.py](../../tools/plain_train_net.py) to implement custom training logic manually. - -### Logging of Metrics - -During training, detectron2 models and trainer put metrics to a centralized [EventStorage](../modules/utils.html#detectron2.utils.events.EventStorage). -You can use the following code to access it and log metrics to it: -```python -from detectron2.utils.events import get_event_storage - -# inside the model: -if self.training: - value = # compute the value from inputs - storage = get_event_storage() - storage.put_scalar("some_accuracy", value) -``` - -Refer to its documentation for more details. - -Metrics are then written to various destinations with [EventWriter](../modules/utils.html#module-detectron2.utils.events). -DefaultTrainer enables a few `EventWriter` with default configurations. -See above for how to customize them. diff --git a/detectron2/docs/tutorials/write-models.md b/detectron2/docs/tutorials/write-models.md deleted file mode 100644 index 967d126503c71b419bca94615cb1090e1a79cb49..0000000000000000000000000000000000000000 --- a/detectron2/docs/tutorials/write-models.md +++ /dev/null @@ -1,90 +0,0 @@ -# Write Models - -If you are trying to do something completely new, you may wish to implement -a model entirely from scratch. However, in many situations you may -be interested in modifying or extending some components of an existing model. -Therefore, we also provide mechanisms that let users override the -behavior of certain internal components of standard models. - - -## Register New Components - -For common concepts that users often want to customize, such as "backbone feature extractor", "box head", -we provide a registration mechanism for users to inject custom implementation that -will be immediately available to use in config files. - -For example, to add a new backbone, import this code in your code: -```python -from detectron2.modeling import BACKBONE_REGISTRY, Backbone, ShapeSpec - -@BACKBONE_REGISTRY.register() -class ToyBackbone(Backbone): - def __init__(self, cfg, input_shape): - super().__init__() - # create your own backbone - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=16, padding=3) - - def forward(self, image): - return {"conv1": self.conv1(image)} - - def output_shape(self): - return {"conv1": ShapeSpec(channels=64, stride=16)} -``` - -In this code, we implement a new backbone following the interface of the -[Backbone](../modules/modeling.html#detectron2.modeling.Backbone) class, -and register it into the [BACKBONE_REGISTRY](../modules/modeling.html#detectron2.modeling.BACKBONE_REGISTRY) -which requires subclasses of `Backbone`. -After importing this code, detectron2 can link the name of the class to its implementation. Therefore you can write the following code: - -```python -cfg = ... # read a config -cfg.MODEL.BACKBONE.NAME = 'ToyBackbone' # or set it in the config file -model = build_model(cfg) # it will find `ToyBackbone` defined above -``` - -As another example, to add new abilities to the ROI heads in the Generalized R-CNN meta-architecture, -you can implement a new -[ROIHeads](../modules/modeling.html#detectron2.modeling.ROIHeads) subclass and put it in the `ROI_HEADS_REGISTRY`. -[DensePose](../../projects/DensePose) -and [MeshRCNN](https://github.com/facebookresearch/meshrcnn) -are two examples that implement new ROIHeads to perform new tasks. -And [projects/](../../projects/) -contains more examples that implement different architectures. - -A complete list of registries can be found in [API documentation](../modules/modeling.html#model-registries). -You can register components in these registries to customize different parts of a model, or the -entire model. - -## Construct Models with Explicit Arguments - -Registry is a bridge to connect names in config files to the actual code. -They are meant to cover a few main components that users frequently need to replace. -However, the capability of a text-based config file is sometimes limited and -some deeper customization may be available only through writing code. - -Most model components in detectron2 have a clear `__init__` interface that documents -what input arguments it needs. Calling them with custom arguments will give you a custom variant -of the model. - -As an example, to use __custom loss function__ in the box head of a Faster R-CNN, we can do the following: - -1. Losses are currently computed in [FastRCNNOutputLayers](../modules/modeling.html#detectron2.modeling.FastRCNNOutputLayers). - We need to implement a variant or a subclass of it, with custom loss functions, named `MyRCNNOutput`. -2. Call `StandardROIHeads` with `box_predictor=MyRCNNOutput()` argument instead of the builtin `FastRCNNOutputLayers`. - If all other arguments should stay unchanged, this can be easily achieved by using the [configurable `__init__`](../modules/config.html#detectron2.config.configurable) mechanism: - - ```python - roi_heads = StandardROIHeads( - cfg, backbone.output_shape(), - box_predictor=MyRCNNOutput(...) - ) - ``` -3. (optional) If we want to enable this new model from a config file, registration is needed: - ```python - @ROI_HEADS_REGISTRY.register() - class MyStandardROIHeads(StandardROIHeads): - def __init__(self, cfg, input_shape): - super().__init__(cfg, input_shape, - box_predictor=MyRCNNOutput(...)) - ``` diff --git a/detectron2/projects/DeepLab/README.md b/detectron2/projects/DeepLab/README.md deleted file mode 100644 index bd03cf1c41f7b0358fb6988d6a387effbb328a50..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/README.md +++ /dev/null @@ -1,100 +0,0 @@ -# DeepLab in Detectron2 - -In this repository, we implement DeepLabV3 and DeepLabV3+ in Detectron2. - -## Installation -Install Detectron2 following [the instructions](https://detectron2.readthedocs.io/tutorials/install.html). - -## Training - -To train a model with 8 GPUs run: -```bash -cd /path/to/detectron2/projects/DeepLab -python train_net.py --config-file configs/Cityscapes-SemanticSegmentation/deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16.yaml --num-gpus 8 -``` - -## Evaluation - -Model evaluation can be done similarly: -```bash -cd /path/to/detectron2/projects/DeepLab -python train_net.py --config-file configs/Cityscapes-SemanticSegmentation/deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16.yaml --eval-only MODEL.WEIGHTS /path/to/model_checkpoint -``` - -## Cityscapes Semantic Segmentation -Cityscapes models are trained with ImageNet pretraining. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MethodBackboneOutput
resolution
mIoUmodel iddownload
DeepLabV3R101-DC51024×2048 76.7 - -  |  -
DeepLabV3R103-DC51024×2048 78.5 28041665 model | metrics
DeepLabV3+R101-DC51024×2048 78.1 - -  |  -
DeepLabV3+R103-DC51024×2048 80.0 28054032model | metrics
- -Note: -- [R103](https://dl.fbaipublicfiles.com/detectron2/DeepLab/R-103.pkl): a ResNet-101 with its first 7x7 convolution replaced by 3 3x3 convolutions. -This modification has been used in most semantic segmentation papers. We pre-train this backbone on ImageNet using the default recipe of [pytorch examples](https://github.com/pytorch/examples/tree/master/imagenet). -- DC5 means using dilated convolution in `res5`. - -## Citing DeepLab - -If you use DeepLab, please use the following BibTeX entry. - -* DeepLabv3+: - -``` -@inproceedings{deeplabv3plus2018, - title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation}, - author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam}, - booktitle={ECCV}, - year={2018} -} -``` - -* DeepLabv3: - -``` -@article{deeplabv32018, - title={Rethinking atrous convolution for semantic image segmentation}, - author={Chen, Liang-Chieh and Papandreou, George and Schroff, Florian and Adam, Hartwig}, - journal={arXiv:1706.05587}, - year={2017} -} -``` diff --git a/detectron2/projects/DeepLab/configs/Cityscapes-SemanticSegmentation/Base-DeepLabV3-OS16-Semantic.yaml b/detectron2/projects/DeepLab/configs/Cityscapes-SemanticSegmentation/Base-DeepLabV3-OS16-Semantic.yaml deleted file mode 100644 index fa6edb5dcd0e1d866058474e6627abb2674e6a34..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/configs/Cityscapes-SemanticSegmentation/Base-DeepLabV3-OS16-Semantic.yaml +++ /dev/null @@ -1,36 +0,0 @@ -_BASE_: "../../../../configs/Base-RCNN-DilatedC5.yaml" -MODEL: - META_ARCHITECTURE: "SemanticSegmentor" - BACKBONE: - FREEZE_AT: 0 - SEM_SEG_HEAD: - NAME: "DeepLabV3Head" - IN_FEATURES: ["res5"] - ASPP_CHANNELS: 256 - ASPP_DILATIONS: [6, 12, 18] - ASPP_DROPOUT: 0.1 - CONVS_DIM: 256 - COMMON_STRIDE: 16 - NUM_CLASSES: 19 - LOSS_TYPE: "hard_pixel_mining" -DATASETS: - TRAIN: ("cityscapes_fine_sem_seg_train",) - TEST: ("cityscapes_fine_sem_seg_val",) -SOLVER: - BASE_LR: 0.01 - MAX_ITER: 90000 - LR_SCHEDULER_NAME: "WarmupPolyLR" - IMS_PER_BATCH: 16 -INPUT: - MIN_SIZE_TRAIN: (512, 768, 1024, 1280, 1536, 1792, 2048) - MIN_SIZE_TRAIN_SAMPLING: "choice" - MIN_SIZE_TEST: 1024 - MAX_SIZE_TRAIN: 4096 - MAX_SIZE_TEST: 2048 - CROP: - ENABLED: True - TYPE: "absolute" - SIZE: (512, 1024) - SINGLE_CATEGORY_MAX_AREA: 1.0 -DATALOADER: - NUM_WORKERS: 10 diff --git a/detectron2/projects/DeepLab/configs/Cityscapes-SemanticSegmentation/deeplab_v3_R_103_os16_mg124_poly_90k_bs16.yaml b/detectron2/projects/DeepLab/configs/Cityscapes-SemanticSegmentation/deeplab_v3_R_103_os16_mg124_poly_90k_bs16.yaml deleted file mode 100644 index a2f5a54140189c099c39b4b737e92decb5fbe569..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/configs/Cityscapes-SemanticSegmentation/deeplab_v3_R_103_os16_mg124_poly_90k_bs16.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: Base-DeepLabV3-OS16-Semantic.yaml -MODEL: - WEIGHTS: "detectron2://DeepLab/R-103.pkl" - PIXEL_MEAN: [123.675, 116.280, 103.530] - PIXEL_STD: [58.395, 57.120, 57.375] - BACKBONE: - NAME: "build_resnet_deeplab_backbone" - RESNETS: - DEPTH: 101 - NORM: "SyncBN" - RES5_MULTI_GRID: [1, 2, 4] - STEM_TYPE: "deeplab" - STEM_OUT_CHANNELS: 128 - STRIDE_IN_1X1: False - SEM_SEG_HEAD: - NAME: "DeepLabV3Head" - NORM: "SyncBN" -INPUT: - FORMAT: "RGB" diff --git a/detectron2/projects/DeepLab/configs/Cityscapes-SemanticSegmentation/deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16.yaml b/detectron2/projects/DeepLab/configs/Cityscapes-SemanticSegmentation/deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16.yaml deleted file mode 100644 index c03a72d83dd813a94ab1d1d59f875c2428eca890..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/configs/Cityscapes-SemanticSegmentation/deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16.yaml +++ /dev/null @@ -1,24 +0,0 @@ -_BASE_: Base-DeepLabV3-OS16-Semantic.yaml -MODEL: - WEIGHTS: "detectron2://DeepLab/R-103.pkl" - PIXEL_MEAN: [123.675, 116.280, 103.530] - PIXEL_STD: [58.395, 57.120, 57.375] - BACKBONE: - NAME: "build_resnet_deeplab_backbone" - RESNETS: - DEPTH: 101 - NORM: "SyncBN" - OUT_FEATURES: ["res2", "res5"] - RES5_MULTI_GRID: [1, 2, 4] - STEM_TYPE: "deeplab" - STEM_OUT_CHANNELS: 128 - STRIDE_IN_1X1: False - SEM_SEG_HEAD: - NAME: "DeepLabV3PlusHead" - IN_FEATURES: ["res2", "res5"] - PROJECT_FEATURES: ["res2"] - PROJECT_CHANNELS: [48] - NORM: "SyncBN" - COMMON_STRIDE: 4 -INPUT: - FORMAT: "RGB" diff --git a/detectron2/projects/DeepLab/deeplab/__init__.py b/detectron2/projects/DeepLab/deeplab/__init__.py deleted file mode 100644 index dcd88ff0c09d630577e3ac9f8afb5324a80a7be4..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/deeplab/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .build_solver import build_lr_scheduler -from .config import add_deeplab_config -from .resnet import build_resnet_deeplab_backbone -from .semantic_seg import DeepLabV3Head, DeepLabV3PlusHead diff --git a/detectron2/projects/DeepLab/deeplab/build_solver.py b/detectron2/projects/DeepLab/deeplab/build_solver.py deleted file mode 100644 index a1d359c2c35baf75a835879bb4b4f902be235179..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/deeplab/build_solver.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import torch - -from detectron2.config import CfgNode -from detectron2.solver import LRScheduler -from detectron2.solver import build_lr_scheduler as build_d2_lr_scheduler - -from .lr_scheduler import WarmupPolyLR - - -def build_lr_scheduler(cfg: CfgNode, optimizer: torch.optim.Optimizer) -> LRScheduler: - """ - Build a LR scheduler from config. - """ - name = cfg.SOLVER.LR_SCHEDULER_NAME - if name == "WarmupPolyLR": - return WarmupPolyLR( - optimizer, - cfg.SOLVER.MAX_ITER, - warmup_factor=cfg.SOLVER.WARMUP_FACTOR, - warmup_iters=cfg.SOLVER.WARMUP_ITERS, - warmup_method=cfg.SOLVER.WARMUP_METHOD, - power=cfg.SOLVER.POLY_LR_POWER, - constant_ending=cfg.SOLVER.POLY_LR_CONSTANT_ENDING, - ) - else: - return build_d2_lr_scheduler(cfg, optimizer) diff --git a/detectron2/projects/DeepLab/deeplab/config.py b/detectron2/projects/DeepLab/deeplab/config.py deleted file mode 100644 index 5f5e45a9124e61c12d90cfc5032b268496891a4a..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/deeplab/config.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - - -def add_deeplab_config(cfg): - """ - Add config for DeepLab. - """ - # We retry random cropping until no single category in semantic segmentation GT occupies more - # than `SINGLE_CATEGORY_MAX_AREA` part of the crop. - cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 - # Used for `poly` learning rate schedule. - cfg.SOLVER.POLY_LR_POWER = 0.9 - cfg.SOLVER.POLY_LR_CONSTANT_ENDING = 0.0 - # Loss type, choose from `cross_entropy`, `hard_pixel_mining`. - cfg.MODEL.SEM_SEG_HEAD.LOSS_TYPE = "hard_pixel_mining" - # DeepLab settings - cfg.MODEL.SEM_SEG_HEAD.PROJECT_FEATURES = ["res2"] - cfg.MODEL.SEM_SEG_HEAD.PROJECT_CHANNELS = [48] - cfg.MODEL.SEM_SEG_HEAD.ASPP_CHANNELS = 256 - cfg.MODEL.SEM_SEG_HEAD.ASPP_DILATIONS = [6, 12, 18] - cfg.MODEL.SEM_SEG_HEAD.ASPP_DROPOUT = 0.1 - cfg.MODEL.SEM_SEG_HEAD.USE_DEPTHWISE_SEPARABLE_CONV = False - # Backbone new configs - cfg.MODEL.RESNETS.RES4_DILATION = 1 - cfg.MODEL.RESNETS.RES5_MULTI_GRID = [1, 2, 4] - # ResNet stem type from: `basic`, `deeplab` - cfg.MODEL.RESNETS.STEM_TYPE = "deeplab" diff --git a/detectron2/projects/DeepLab/deeplab/loss.py b/detectron2/projects/DeepLab/deeplab/loss.py deleted file mode 100644 index 3a43087b7c1a2b4d2b249fad117724dbd0f14fdd..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/deeplab/loss.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import torch -import torch.nn as nn - - -class DeepLabCE(nn.Module): - """ - Hard pixel mining with cross entropy loss, for semantic segmentation. - This is used in TensorFlow DeepLab frameworks. - Paper: DeeperLab: Single-Shot Image Parser - Reference: https://github.com/tensorflow/models/blob/bd488858d610e44df69da6f89277e9de8a03722c/research/deeplab/utils/train_utils.py#L33 # noqa - Arguments: - ignore_label: Integer, label to ignore. - top_k_percent_pixels: Float, the value lies in [0.0, 1.0]. When its - value < 1.0, only compute the loss for the top k percent pixels - (e.g., the top 20% pixels). This is useful for hard pixel mining. - weight: Tensor, a manual rescaling weight given to each class. - """ - - def __init__(self, ignore_label=-1, top_k_percent_pixels=1.0, weight=None): - super(DeepLabCE, self).__init__() - self.top_k_percent_pixels = top_k_percent_pixels - self.ignore_label = ignore_label - self.criterion = nn.CrossEntropyLoss( - weight=weight, ignore_index=ignore_label, reduction="none" - ) - - def forward(self, logits, labels, weights=None): - if weights is None: - pixel_losses = self.criterion(logits, labels).contiguous().view(-1) - else: - # Apply per-pixel loss weights. - pixel_losses = self.criterion(logits, labels) * weights - pixel_losses = pixel_losses.contiguous().view(-1) - if self.top_k_percent_pixels == 1.0: - return pixel_losses.mean() - - top_k_pixels = int(self.top_k_percent_pixels * pixel_losses.numel()) - pixel_losses, _ = torch.topk(pixel_losses, top_k_pixels) - return pixel_losses.mean() diff --git a/detectron2/projects/DeepLab/deeplab/lr_scheduler.py b/detectron2/projects/DeepLab/deeplab/lr_scheduler.py deleted file mode 100644 index b754b59750ed7fea1e2d24d40f019d26bd562bf5..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/deeplab/lr_scheduler.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import math -from typing import List -import torch - -from detectron2.solver.lr_scheduler import LRScheduler, _get_warmup_factor_at_iter - -# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes -# only on epoch boundaries. We typically use iteration based schedules instead. -# As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean -# "iteration" instead. - -# FIXME: ideally this would be achieved with a CombinedLRScheduler, separating -# MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it. - - -class WarmupPolyLR(LRScheduler): - """ - Poly learning rate schedule used to train DeepLab. - Paper: DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, - Atrous Convolution, and Fully Connected CRFs. - Reference: https://github.com/tensorflow/models/blob/21b73d22f3ed05b650e85ac50849408dd36de32e/research/deeplab/utils/train_utils.py#L337 # noqa - """ - - def __init__( - self, - optimizer: torch.optim.Optimizer, - max_iters: int, - warmup_factor: float = 0.001, - warmup_iters: int = 1000, - warmup_method: str = "linear", - last_epoch: int = -1, - power: float = 0.9, - constant_ending: float = 0.0, - ): - self.max_iters = max_iters - self.warmup_factor = warmup_factor - self.warmup_iters = warmup_iters - self.warmup_method = warmup_method - self.power = power - self.constant_ending = constant_ending - super().__init__(optimizer, last_epoch) - - def get_lr(self) -> List[float]: - warmup_factor = _get_warmup_factor_at_iter( - self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor - ) - if self.constant_ending > 0 and warmup_factor == 1.0: - # Constant ending lr. - if ( - math.pow((1.0 - self.last_epoch / self.max_iters), self.power) - < self.constant_ending - ): - return [base_lr * self.constant_ending for base_lr in self.base_lrs] - return [ - base_lr * warmup_factor * math.pow((1.0 - self.last_epoch / self.max_iters), self.power) - for base_lr in self.base_lrs - ] - - def _compute_values(self) -> List[float]: - # The new interface - return self.get_lr() diff --git a/detectron2/projects/DeepLab/deeplab/resnet.py b/detectron2/projects/DeepLab/deeplab/resnet.py deleted file mode 100644 index 2cc277b24630a9425f4c37e1abc3352b49e1a031..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/deeplab/resnet.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import fvcore.nn.weight_init as weight_init -import torch.nn.functional as F - -from detectron2.layers import CNNBlockBase, Conv2d, get_norm -from detectron2.modeling import BACKBONE_REGISTRY -from detectron2.modeling.backbone.resnet import ( - BasicStem, - BottleneckBlock, - DeformBottleneckBlock, - ResNet, -) - - -class DeepLabStem(CNNBlockBase): - """ - The DeepLab ResNet stem (layers before the first residual block). - """ - - def __init__(self, in_channels=3, out_channels=128, norm="BN"): - """ - Args: - norm (str or callable): norm after the first conv layer. - See :func:`layers.get_norm` for supported format. - """ - super().__init__(in_channels, out_channels, 4) - self.in_channels = in_channels - self.conv1 = Conv2d( - in_channels, - out_channels // 2, - kernel_size=3, - stride=2, - padding=1, - bias=False, - norm=get_norm(norm, out_channels // 2), - ) - self.conv2 = Conv2d( - out_channels // 2, - out_channels // 2, - kernel_size=3, - stride=1, - padding=1, - bias=False, - norm=get_norm(norm, out_channels // 2), - ) - self.conv3 = Conv2d( - out_channels // 2, - out_channels, - kernel_size=3, - stride=1, - padding=1, - bias=False, - norm=get_norm(norm, out_channels), - ) - weight_init.c2_msra_fill(self.conv1) - weight_init.c2_msra_fill(self.conv2) - weight_init.c2_msra_fill(self.conv3) - - def forward(self, x): - x = self.conv1(x) - x = F.relu_(x) - x = self.conv2(x) - x = F.relu_(x) - x = self.conv3(x) - x = F.relu_(x) - x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) - return x - - -@BACKBONE_REGISTRY.register() -def build_resnet_deeplab_backbone(cfg, input_shape): - """ - Create a ResNet instance from config. - Returns: - ResNet: a :class:`ResNet` instance. - """ - # need registration of new blocks/stems? - norm = cfg.MODEL.RESNETS.NORM - if cfg.MODEL.RESNETS.STEM_TYPE == "basic": - stem = BasicStem( - in_channels=input_shape.channels, - out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, - norm=norm, - ) - elif cfg.MODEL.RESNETS.STEM_TYPE == "deeplab": - stem = DeepLabStem( - in_channels=input_shape.channels, - out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, - norm=norm, - ) - else: - raise ValueError("Unknown stem type: {}".format(cfg.MODEL.RESNETS.STEM_TYPE)) - - # fmt: off - freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT - out_features = cfg.MODEL.RESNETS.OUT_FEATURES - depth = cfg.MODEL.RESNETS.DEPTH - num_groups = cfg.MODEL.RESNETS.NUM_GROUPS - width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP - bottleneck_channels = num_groups * width_per_group - in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS - out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS - stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 - res4_dilation = cfg.MODEL.RESNETS.RES4_DILATION - res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION - deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE - deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED - deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS - res5_multi_grid = cfg.MODEL.RESNETS.RES5_MULTI_GRID - # fmt: on - assert res4_dilation in {1, 2}, "res4_dilation cannot be {}.".format(res4_dilation) - assert res5_dilation in {1, 2, 4}, "res5_dilation cannot be {}.".format(res5_dilation) - if res4_dilation == 2: - # Always dilate res5 if res4 is dilated. - assert res5_dilation == 4 - - num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth] - - stages = [] - - # Avoid creating variables without gradients - # It consumes extra memory and may cause allreduce to fail - out_stage_idx = [{"res2": 2, "res3": 3, "res4": 4, "res5": 5}[f] for f in out_features] - max_stage_idx = max(out_stage_idx) - for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)): - if stage_idx == 4: - dilation = res4_dilation - elif stage_idx == 5: - dilation = res5_dilation - else: - dilation = 1 - first_stride = 1 if idx == 0 or dilation > 1 else 2 - stage_kargs = { - "num_blocks": num_blocks_per_stage[idx], - "stride_per_block": [first_stride] + [1] * (num_blocks_per_stage[idx] - 1), - "in_channels": in_channels, - "out_channels": out_channels, - "norm": norm, - } - stage_kargs["bottleneck_channels"] = bottleneck_channels - stage_kargs["stride_in_1x1"] = stride_in_1x1 - stage_kargs["dilation"] = dilation - stage_kargs["num_groups"] = num_groups - if deform_on_per_stage[idx]: - stage_kargs["block_class"] = DeformBottleneckBlock - stage_kargs["deform_modulated"] = deform_modulated - stage_kargs["deform_num_groups"] = deform_num_groups - else: - stage_kargs["block_class"] = BottleneckBlock - if stage_idx == 5: - stage_kargs.pop("dilation") - stage_kargs["dilation_per_block"] = [dilation * mg for mg in res5_multi_grid] - blocks = ResNet.make_stage(**stage_kargs) - in_channels = out_channels - out_channels *= 2 - bottleneck_channels *= 2 - stages.append(blocks) - return ResNet(stem, stages, out_features=out_features).freeze(freeze_at) diff --git a/detectron2/projects/DeepLab/deeplab/semantic_seg.py b/detectron2/projects/DeepLab/deeplab/semantic_seg.py deleted file mode 100644 index d4625c52d96b2a700d828112c2a2ea80f5028330..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/deeplab/semantic_seg.py +++ /dev/null @@ -1,348 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from typing import Callable, Dict, List, Optional, Tuple, Union -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.layers import ASPP, Conv2d, DepthwiseSeparableConv2d, ShapeSpec, get_norm -from detectron2.modeling import SEM_SEG_HEADS_REGISTRY - -from .loss import DeepLabCE - - -@SEM_SEG_HEADS_REGISTRY.register() -class DeepLabV3PlusHead(nn.Module): - """ - A semantic segmentation head described in :paper:`DeepLabV3+`. - """ - - @configurable - def __init__( - self, - input_shape: Dict[str, ShapeSpec], - *, - project_channels: List[int], - aspp_dilations: List[int], - aspp_dropout: float, - decoder_channels: List[int], - common_stride: int, - norm: Union[str, Callable], - train_size: Optional[Tuple], - loss_weight: float = 1.0, - loss_type: str = "cross_entropy", - ignore_value: int = -1, - num_classes: Optional[int] = None, - use_depthwise_separable_conv: bool = False, - ): - """ - NOTE: this interface is experimental. - - Args: - input_shape: shape of the input features. They will be ordered by stride - and the last one (with largest stride) is used as the input to the - decoder (i.e. the ASPP module); the rest are low-level feature for - the intermediate levels of decoder. - project_channels (list[int]): a list of low-level feature channels. - The length should be len(in_features) - 1. - aspp_dilations (list(int)): a list of 3 dilations in ASPP. - aspp_dropout (float): apply dropout on the output of ASPP. - decoder_channels (list[int]): a list of output channels of each - decoder stage. It should have the same length as "in_features" - (each element in "in_features" corresponds to one decoder stage). - common_stride (int): output stride of decoder. - norm (str or callable): normalization for all conv layers. - train_size (tuple): (height, width) of training images. - loss_weight (float): loss weight. - loss_type (str): type of loss function, 2 opptions: - (1) "cross_entropy" is the standard cross entropy loss. - (2) "hard_pixel_mining" is the loss in DeepLab that samples - top k% hardest pixels. - ignore_value (int): category to be ignored during training. - num_classes (int): number of classes, if set to None, the decoder - will not construct a predictor. - use_depthwise_separable_conv (bool): use DepthwiseSeparableConv2d - in ASPP and decoder. - """ - super().__init__() - input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride) - - # fmt: off - self.in_features = [k for k, v in input_shape] # starting from "res2" to "res5" - in_channels = [x[1].channels for x in input_shape] - in_strides = [x[1].stride for x in input_shape] - aspp_channels = decoder_channels[-1] - self.ignore_value = ignore_value - self.common_stride = common_stride # output stride - self.loss_weight = loss_weight - self.loss_type = loss_type - self.decoder_only = num_classes is None - self.use_depthwise_separable_conv = use_depthwise_separable_conv - # fmt: on - - assert ( - len(project_channels) == len(self.in_features) - 1 - ), "Expected {} project_channels, got {}".format( - len(self.in_features) - 1, len(project_channels) - ) - assert len(decoder_channels) == len( - self.in_features - ), "Expected {} decoder_channels, got {}".format( - len(self.in_features), len(decoder_channels) - ) - self.decoder = nn.ModuleDict() - - use_bias = norm == "" - for idx, in_channel in enumerate(in_channels): - decoder_stage = nn.ModuleDict() - - if idx == len(self.in_features) - 1: - # ASPP module - if train_size is not None: - train_h, train_w = train_size - encoder_stride = in_strides[-1] - if train_h % encoder_stride or train_w % encoder_stride: - raise ValueError("Crop size need to be divisible by encoder stride.") - pool_h = train_h // encoder_stride - pool_w = train_w // encoder_stride - pool_kernel_size = (pool_h, pool_w) - else: - pool_kernel_size = None - project_conv = ASPP( - in_channel, - aspp_channels, - aspp_dilations, - norm=norm, - activation=F.relu, - pool_kernel_size=pool_kernel_size, - dropout=aspp_dropout, - use_depthwise_separable_conv=use_depthwise_separable_conv, - ) - fuse_conv = None - else: - project_conv = Conv2d( - in_channel, - project_channels[idx], - kernel_size=1, - bias=use_bias, - norm=get_norm(norm, project_channels[idx]), - activation=F.relu, - ) - weight_init.c2_xavier_fill(project_conv) - if use_depthwise_separable_conv: - # We use a single 5x5 DepthwiseSeparableConv2d to replace - # 2 3x3 Conv2d since they have the same receptive field, - # proposed in :paper:`Panoptic-DeepLab`. - fuse_conv = DepthwiseSeparableConv2d( - project_channels[idx] + decoder_channels[idx + 1], - decoder_channels[idx], - kernel_size=5, - padding=2, - norm1=norm, - activation1=F.relu, - norm2=norm, - activation2=F.relu, - ) - else: - fuse_conv = nn.Sequential( - Conv2d( - project_channels[idx] + decoder_channels[idx + 1], - decoder_channels[idx], - kernel_size=3, - padding=1, - bias=use_bias, - norm=get_norm(norm, decoder_channels[idx]), - activation=F.relu, - ), - Conv2d( - decoder_channels[idx], - decoder_channels[idx], - kernel_size=3, - padding=1, - bias=use_bias, - norm=get_norm(norm, decoder_channels[idx]), - activation=F.relu, - ), - ) - weight_init.c2_xavier_fill(fuse_conv[0]) - weight_init.c2_xavier_fill(fuse_conv[1]) - - decoder_stage["project_conv"] = project_conv - decoder_stage["fuse_conv"] = fuse_conv - - self.decoder[self.in_features[idx]] = decoder_stage - - if not self.decoder_only: - self.predictor = Conv2d( - decoder_channels[0], num_classes, kernel_size=1, stride=1, padding=0 - ) - nn.init.normal_(self.predictor.weight, 0, 0.001) - nn.init.constant_(self.predictor.bias, 0) - - if self.loss_type == "cross_entropy": - self.loss = nn.CrossEntropyLoss(reduction="mean", ignore_index=self.ignore_value) - elif self.loss_type == "hard_pixel_mining": - self.loss = DeepLabCE(ignore_label=self.ignore_value, top_k_percent_pixels=0.2) - else: - raise ValueError("Unexpected loss type: %s" % self.loss_type) - - @classmethod - def from_config(cls, cfg, input_shape): - if cfg.INPUT.CROP.ENABLED: - assert cfg.INPUT.CROP.TYPE == "absolute" - train_size = cfg.INPUT.CROP.SIZE - else: - train_size = None - decoder_channels = [cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM] * ( - len(cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES) - 1 - ) + [cfg.MODEL.SEM_SEG_HEAD.ASPP_CHANNELS] - ret = dict( - input_shape={ - k: v for k, v in input_shape.items() if k in cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES - }, - project_channels=cfg.MODEL.SEM_SEG_HEAD.PROJECT_CHANNELS, - aspp_dilations=cfg.MODEL.SEM_SEG_HEAD.ASPP_DILATIONS, - aspp_dropout=cfg.MODEL.SEM_SEG_HEAD.ASPP_DROPOUT, - decoder_channels=decoder_channels, - common_stride=cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE, - norm=cfg.MODEL.SEM_SEG_HEAD.NORM, - train_size=train_size, - loss_weight=cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT, - loss_type=cfg.MODEL.SEM_SEG_HEAD.LOSS_TYPE, - ignore_value=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, - num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, - use_depthwise_separable_conv=cfg.MODEL.SEM_SEG_HEAD.USE_DEPTHWISE_SEPARABLE_CONV, - ) - return ret - - def forward(self, features, targets=None): - """ - Returns: - In training, returns (None, dict of losses) - In inference, returns (CxHxW logits, {}) - """ - y = self.layers(features) - if self.decoder_only: - # Output from self.layers() only contains decoder feature. - return y - if self.training: - return None, self.losses(y, targets) - else: - y = F.interpolate( - y, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - return y, {} - - def layers(self, features): - # Reverse feature maps into top-down order (from low to high resolution) - for f in self.in_features[::-1]: - x = features[f] - proj_x = self.decoder[f]["project_conv"](x) - if self.decoder[f]["fuse_conv"] is None: - # This is aspp module - y = proj_x - else: - # Upsample y - y = F.interpolate(y, size=proj_x.size()[2:], mode="bilinear", align_corners=False) - y = torch.cat([proj_x, y], dim=1) - y = self.decoder[f]["fuse_conv"](y) - if not self.decoder_only: - y = self.predictor(y) - return y - - def losses(self, predictions, targets): - predictions = F.interpolate( - predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - loss = self.loss(predictions, targets) - losses = {"loss_sem_seg": loss * self.loss_weight} - return losses - - -@SEM_SEG_HEADS_REGISTRY.register() -class DeepLabV3Head(nn.Module): - """ - A semantic segmentation head described in :paper:`DeepLabV3`. - """ - - def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): - super().__init__() - - # fmt: off - self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES - in_channels = [input_shape[f].channels for f in self.in_features] - aspp_channels = cfg.MODEL.SEM_SEG_HEAD.ASPP_CHANNELS - aspp_dilations = cfg.MODEL.SEM_SEG_HEAD.ASPP_DILATIONS - self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE - num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES - conv_dims = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM - self.common_stride = cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE # output stride - norm = cfg.MODEL.SEM_SEG_HEAD.NORM - self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT - self.loss_type = cfg.MODEL.SEM_SEG_HEAD.LOSS_TYPE - train_crop_size = cfg.INPUT.CROP.SIZE - aspp_dropout = cfg.MODEL.SEM_SEG_HEAD.ASPP_DROPOUT - use_depthwise_separable_conv = cfg.MODEL.SEM_SEG_HEAD.USE_DEPTHWISE_SEPARABLE_CONV - # fmt: on - - assert len(self.in_features) == 1 - assert len(in_channels) == 1 - - # ASPP module - if cfg.INPUT.CROP.ENABLED: - assert cfg.INPUT.CROP.TYPE == "absolute" - train_crop_h, train_crop_w = train_crop_size - if train_crop_h % self.common_stride or train_crop_w % self.common_stride: - raise ValueError("Crop size need to be divisible by output stride.") - pool_h = train_crop_h // self.common_stride - pool_w = train_crop_w // self.common_stride - pool_kernel_size = (pool_h, pool_w) - else: - pool_kernel_size = None - self.aspp = ASPP( - in_channels[0], - aspp_channels, - aspp_dilations, - norm=norm, - activation=F.relu, - pool_kernel_size=pool_kernel_size, - dropout=aspp_dropout, - use_depthwise_separable_conv=use_depthwise_separable_conv, - ) - - self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0) - nn.init.normal_(self.predictor.weight, 0, 0.001) - nn.init.constant_(self.predictor.bias, 0) - - if self.loss_type == "cross_entropy": - self.loss = nn.CrossEntropyLoss(reduction="mean", ignore_index=self.ignore_value) - elif self.loss_type == "hard_pixel_mining": - self.loss = DeepLabCE(ignore_label=self.ignore_value, top_k_percent_pixels=0.2) - else: - raise ValueError("Unexpected loss type: %s" % self.loss_type) - - def forward(self, features, targets=None): - """ - Returns: - In training, returns (None, dict of losses) - In inference, returns (CxHxW logits, {}) - """ - x = features[self.in_features[0]] - x = self.aspp(x) - x = self.predictor(x) - if self.training: - return None, self.losses(x, targets) - else: - x = F.interpolate( - x, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - return x, {} - - def losses(self, predictions, targets): - predictions = F.interpolate( - predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - loss = self.loss(predictions, targets) - losses = {"loss_sem_seg": loss * self.loss_weight} - return losses diff --git a/detectron2/projects/DeepLab/train_net.py b/detectron2/projects/DeepLab/train_net.py deleted file mode 100644 index 3de57fa02861a68e633d90dd9e6c1828b4204bea..0000000000000000000000000000000000000000 --- a/detectron2/projects/DeepLab/train_net.py +++ /dev/null @@ -1,140 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -DeepLab Training Script. - -This script is a simplified version of the training script in detectron2/tools. -""" - -import os - -import detectron2.data.transforms as T -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import DatasetMapper, MetadataCatalog, build_detection_train_loader -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch -from detectron2.evaluation import CityscapesSemSegEvaluator, DatasetEvaluators, SemSegEvaluator -from detectron2.projects.deeplab import add_deeplab_config, build_lr_scheduler - - -def build_sem_seg_train_aug(cfg): - augs = [ - T.ResizeShortestEdge( - cfg.INPUT.MIN_SIZE_TRAIN, - cfg.INPUT.MAX_SIZE_TRAIN, - cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING, - ) - ] - if cfg.INPUT.CROP.ENABLED: - augs.append( - T.RandomCrop_CategoryAreaConstraint( - cfg.INPUT.CROP.TYPE, - cfg.INPUT.CROP.SIZE, - cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA, - cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, - ) - ) - augs.append(T.RandomFlip()) - return augs - - -class Trainer(DefaultTrainer): - """ - We use the "DefaultTrainer" which contains a number pre-defined logic for - standard training workflow. They may not work for you, especially if you - are working on a new research project. In that case you can use the cleaner - "SimpleTrainer", or write your own training loop. - """ - - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - """ - Create evaluator(s) for a given dataset. - This uses the special metadata "evaluator_type" associated with each builtin dataset. - For your own dataset, you can simply create an evaluator manually in your - script and do not have to worry about the hacky if-else logic here. - """ - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluator_list = [] - evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - if evaluator_type == "sem_seg": - return SemSegEvaluator( - dataset_name, - distributed=True, - output_dir=output_folder, - ) - if evaluator_type == "cityscapes_sem_seg": - return CityscapesSemSegEvaluator(dataset_name) - if len(evaluator_list) == 0: - raise NotImplementedError( - "no Evaluator for the dataset {} with the type {}".format( - dataset_name, evaluator_type - ) - ) - if len(evaluator_list) == 1: - return evaluator_list[0] - return DatasetEvaluators(evaluator_list) - - @classmethod - def build_train_loader(cls, cfg): - if "SemanticSegmentor" in cfg.MODEL.META_ARCHITECTURE: - mapper = DatasetMapper(cfg, is_train=True, augmentations=build_sem_seg_train_aug(cfg)) - else: - mapper = None - return build_detection_train_loader(cfg, mapper=mapper) - - @classmethod - def build_lr_scheduler(cls, cfg, optimizer): - """ - It now calls :func:`detectron2.solver.build_lr_scheduler`. - Overwrite it if you'd like a different scheduler. - """ - return build_lr_scheduler(cfg, optimizer) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - add_deeplab_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - return res - - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - return trainer.train() - - -def invoke_main() -> None: - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) - - -if __name__ == "__main__": - invoke_main() # pragma: no cover diff --git a/detectron2/projects/DensePose/README.md b/detectron2/projects/DensePose/README.md deleted file mode 100644 index 38f4f834adfcd5490a790a715b24c9ad26ab4dde..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/README.md +++ /dev/null @@ -1,64 +0,0 @@ -# DensePose in Detectron2 - -DensePose aims at learning and establishing dense correspondences between image pixels -and 3D object geometry for deformable objects, such as humans or animals. -In this repository, we provide the code to train and evaluate DensePose R-CNN and -various tools to visualize DensePose annotations and results. - -There are two main paradigms that are used within DensePose project. - -## [Chart-based Dense Pose Estimation for Humans and Animals](doc/DENSEPOSE_IUV.md) - -
- -
- -For chart-based estimation, 3D object mesh is split into charts and -for each pixel the model estimates chart index `I` and local chart coordinates `(U, V)`. -Please follow the link above to find a [detailed overview](doc/DENSEPOSE_IUV.md#Overview) -of the method, links to trained models along with their performance evaluation in the -[Model Zoo](doc/DENSEPOSE_IUV.md#ModelZoo) and -[references](doc/DENSEPOSE_IUV.md#References) to the corresponding papers. - -## [Continuous Surface Embeddings for Dense Pose Estimation for Humans and Animals](doc/DENSEPOSE_CSE.md) - -
- -
- -To establish continuous surface embeddings, the model simultaneously learns -descriptors for mesh vertices and for image pixels. -The embeddings are put into correspondence, thus the location -of each pixel on the 3D model is derived. -Please follow the link above to find a [detailed overview](doc/DENSEPOSE_CSE.md#Overview) -of the method, links to trained models along with their performance evaluation in the -[Model Zoo](doc/DENSEPOSE_CSE.md#ModelZoo) and -[references](doc/DENSEPOSE_CSE.md#References) to the corresponding papers. - -# Quick Start - -See [ Getting Started ](doc/GETTING_STARTED.md) - -# Model Zoo - -Please check the dedicated pages -for [chart-based model zoo](doc/DENSEPOSE_IUV.md#ModelZoo) -and for [continuous surface embeddings model zoo](doc/DENSEPOSE_CSE.md#ModelZoo). - -# What's New - -* June 2021: [DensePose CSE with Cycle Losses](doc/RELEASE_2021_06.md) -* March 2021: [DensePose CSE (a framework to extend DensePose to various categories using 3D models) - and DensePose Evolution (a framework to bootstrap DensePose on unlabeled data) released](doc/RELEASE_2021_03.md) -* April 2020: [DensePose Confidence Estimation and Model Zoo Improvements](doc/RELEASE_2020_04.md) - -# License - -Detectron2 is released under the [Apache 2.0 license](../../LICENSE) - -## Citing DensePose - -If you use DensePose, please refer to the BibTeX entries -for [chart-based models](doc/DENSEPOSE_IUV.md#References) -and for [continuous surface embeddings](doc/DENSEPOSE_CSE.md#References). - diff --git a/detectron2/projects/DensePose/apply_net.py b/detectron2/projects/DensePose/apply_net.py deleted file mode 100644 index a20d740ae075311a9a7235fa3c32fbdd373c9e68..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/apply_net.py +++ /dev/null @@ -1,353 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. - -import argparse -import glob -import logging -import os -import sys -from typing import Any, ClassVar, Dict, List -import torch - -from detectron2.config import CfgNode, get_cfg -from detectron2.data.detection_utils import read_image -from detectron2.engine.defaults import DefaultPredictor -from detectron2.structures.instances import Instances -from detectron2.utils.logger import setup_logger - -from densepose import add_densepose_config -from densepose.structures import DensePoseChartPredictorOutput, DensePoseEmbeddingPredictorOutput -from densepose.utils.logger import verbosity_to_level -from densepose.vis.base import CompoundVisualizer -from densepose.vis.bounding_box import ScoredBoundingBoxVisualizer -from densepose.vis.densepose_outputs_vertex import ( - DensePoseOutputsTextureVisualizer, - DensePoseOutputsVertexVisualizer, - get_texture_atlases, -) -from densepose.vis.densepose_results import ( - DensePoseResultsContourVisualizer, - DensePoseResultsFineSegmentationVisualizer, - DensePoseResultsUVisualizer, - DensePoseResultsVVisualizer, -) -from densepose.vis.densepose_results_textures import ( - DensePoseResultsVisualizerWithTexture, - get_texture_atlas, -) -from densepose.vis.extractor import ( - CompoundExtractor, - DensePoseOutputsExtractor, - DensePoseResultExtractor, - create_extractor, -) - -DOC = """Apply Net - a tool to print / visualize DensePose results -""" - -LOGGER_NAME = "apply_net" -logger = logging.getLogger(LOGGER_NAME) - -_ACTION_REGISTRY: Dict[str, "Action"] = {} - - -class Action: - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - parser.add_argument( - "-v", - "--verbosity", - action="count", - help="Verbose mode. Multiple -v options increase the verbosity.", - ) - - -def register_action(cls: type): - """ - Decorator for action classes to automate action registration - """ - global _ACTION_REGISTRY - _ACTION_REGISTRY[cls.COMMAND] = cls - return cls - - -class InferenceAction(Action): - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(InferenceAction, cls).add_arguments(parser) - parser.add_argument("cfg", metavar="", help="Config file") - parser.add_argument("model", metavar="", help="Model file") - parser.add_argument("input", metavar="", help="Input data") - parser.add_argument( - "--opts", - help="Modify config options using the command-line 'KEY VALUE' pairs", - default=[], - nargs=argparse.REMAINDER, - ) - - @classmethod - def execute(cls: type, args: argparse.Namespace): - logger.info(f"Loading config from {args.cfg}") - opts = [] - cfg = cls.setup_config(args.cfg, args.model, args, opts) - logger.info(f"Loading model from {args.model}") - predictor = DefaultPredictor(cfg) - logger.info(f"Loading data from {args.input}") - file_list = cls._get_input_file_list(args.input) - if len(file_list) == 0: - logger.warning(f"No input images for {args.input}") - return - context = cls.create_context(args, cfg) - for file_name in file_list: - img = read_image(file_name, format="BGR") # predictor expects BGR image. - with torch.no_grad(): - outputs = predictor(img)["instances"] - cls.execute_on_outputs(context, {"file_name": file_name, "image": img}, outputs) - cls.postexecute(context) - - @classmethod - def setup_config( - cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str] - ): - cfg = get_cfg() - add_densepose_config(cfg) - cfg.merge_from_file(config_fpath) - cfg.merge_from_list(args.opts) - if opts: - cfg.merge_from_list(opts) - cfg.MODEL.WEIGHTS = model_fpath - cfg.freeze() - return cfg - - @classmethod - def _get_input_file_list(cls: type, input_spec: str): - if os.path.isdir(input_spec): - file_list = [ - os.path.join(input_spec, fname) - for fname in os.listdir(input_spec) - if os.path.isfile(os.path.join(input_spec, fname)) - ] - elif os.path.isfile(input_spec): - file_list = [input_spec] - else: - file_list = glob.glob(input_spec) - return file_list - - -@register_action -class DumpAction(InferenceAction): - """ - Dump action that outputs results to a pickle file - """ - - COMMAND: ClassVar[str] = "dump" - - @classmethod - def add_parser(cls: type, subparsers: argparse._SubParsersAction): - parser = subparsers.add_parser(cls.COMMAND, help="Dump model outputs to a file.") - cls.add_arguments(parser) - parser.set_defaults(func=cls.execute) - - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(DumpAction, cls).add_arguments(parser) - parser.add_argument( - "--output", - metavar="", - default="results.pkl", - help="File name to save dump to", - ) - - @classmethod - def execute_on_outputs( - cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances - ): - image_fpath = entry["file_name"] - logger.info(f"Processing {image_fpath}") - result = {"file_name": image_fpath} - if outputs.has("scores"): - result["scores"] = outputs.get("scores").cpu() - if outputs.has("pred_boxes"): - result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu() - if outputs.has("pred_densepose"): - if isinstance(outputs.pred_densepose, DensePoseChartPredictorOutput): - extractor = DensePoseResultExtractor() - elif isinstance(outputs.pred_densepose, DensePoseEmbeddingPredictorOutput): - extractor = DensePoseOutputsExtractor() - result["pred_densepose"] = extractor(outputs)[0] - context["results"].append(result) - - @classmethod - def create_context(cls: type, args: argparse.Namespace, cfg: CfgNode): - context = {"results": [], "out_fname": args.output} - return context - - @classmethod - def postexecute(cls: type, context: Dict[str, Any]): - out_fname = context["out_fname"] - out_dir = os.path.dirname(out_fname) - if len(out_dir) > 0 and not os.path.exists(out_dir): - os.makedirs(out_dir) - with open(out_fname, "wb") as hFile: - torch.save(context["results"], hFile) - logger.info(f"Output saved to {out_fname}") - - -@register_action -class ShowAction(InferenceAction): - """ - Show action that visualizes selected entries on an image - """ - - COMMAND: ClassVar[str] = "show" - VISUALIZERS: ClassVar[Dict[str, object]] = { - "dp_contour": DensePoseResultsContourVisualizer, - "dp_segm": DensePoseResultsFineSegmentationVisualizer, - "dp_u": DensePoseResultsUVisualizer, - "dp_v": DensePoseResultsVVisualizer, - "dp_iuv_texture": DensePoseResultsVisualizerWithTexture, - "dp_cse_texture": DensePoseOutputsTextureVisualizer, - "dp_vertex": DensePoseOutputsVertexVisualizer, - "bbox": ScoredBoundingBoxVisualizer, - } - - @classmethod - def add_parser(cls: type, subparsers: argparse._SubParsersAction): - parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries") - cls.add_arguments(parser) - parser.set_defaults(func=cls.execute) - - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(ShowAction, cls).add_arguments(parser) - parser.add_argument( - "visualizations", - metavar="", - help="Comma separated list of visualizations, possible values: " - "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))), - ) - parser.add_argument( - "--min_score", - metavar="", - default=0.8, - type=float, - help="Minimum detection score to visualize", - ) - parser.add_argument( - "--nms_thresh", metavar="", default=None, type=float, help="NMS threshold" - ) - parser.add_argument( - "--texture_atlas", - metavar="", - default=None, - help="Texture atlas file (for IUV texture transfer)", - ) - parser.add_argument( - "--texture_atlases_map", - metavar="", - default=None, - help="JSON string of a dict containing texture atlas files for each mesh", - ) - parser.add_argument( - "--output", - metavar="", - default="outputres.png", - help="File name to save output to", - ) - - @classmethod - def setup_config( - cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str] - ): - opts.append("MODEL.ROI_HEADS.SCORE_THRESH_TEST") - opts.append(str(args.min_score)) - if args.nms_thresh is not None: - opts.append("MODEL.ROI_HEADS.NMS_THRESH_TEST") - opts.append(str(args.nms_thresh)) - cfg = super(ShowAction, cls).setup_config(config_fpath, model_fpath, args, opts) - return cfg - - @classmethod - def execute_on_outputs( - cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances - ): - import cv2 - import numpy as np - - visualizer = context["visualizer"] - extractor = context["extractor"] - image_fpath = entry["file_name"] - logger.info(f"Processing {image_fpath}") - image = cv2.cvtColor(entry["image"], cv2.COLOR_BGR2GRAY) - image = np.tile(image[:, :, np.newaxis], [1, 1, 3]) - data = extractor(outputs) - image_vis = visualizer.visualize(image, data) - entry_idx = context["entry_idx"] + 1 - out_fname = cls._get_out_fname(entry_idx, context["out_fname"]) - out_dir = os.path.dirname(out_fname) - if len(out_dir) > 0 and not os.path.exists(out_dir): - os.makedirs(out_dir) - cv2.imwrite(out_fname, image_vis) - logger.info(f"Output saved to {out_fname}") - context["entry_idx"] += 1 - - @classmethod - def postexecute(cls: type, context: Dict[str, Any]): - pass - - @classmethod - def _get_out_fname(cls: type, entry_idx: int, fname_base: str): - base, ext = os.path.splitext(fname_base) - return base + ".{0:04d}".format(entry_idx) + ext - - @classmethod - def create_context(cls: type, args: argparse.Namespace, cfg: CfgNode) -> Dict[str, Any]: - vis_specs = args.visualizations.split(",") - visualizers = [] - extractors = [] - for vis_spec in vis_specs: - texture_atlas = get_texture_atlas(args.texture_atlas) - texture_atlases_dict = get_texture_atlases(args.texture_atlases_map) - vis = cls.VISUALIZERS[vis_spec]( - cfg=cfg, - texture_atlas=texture_atlas, - texture_atlases_dict=texture_atlases_dict, - ) - visualizers.append(vis) - extractor = create_extractor(vis) - extractors.append(extractor) - visualizer = CompoundVisualizer(visualizers) - extractor = CompoundExtractor(extractors) - context = { - "extractor": extractor, - "visualizer": visualizer, - "out_fname": args.output, - "entry_idx": 0, - } - return context - - -def create_argument_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - description=DOC, - formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120), - ) - parser.set_defaults(func=lambda _: parser.print_help(sys.stdout)) - subparsers = parser.add_subparsers(title="Actions") - for _, action in _ACTION_REGISTRY.items(): - action.add_parser(subparsers) - return parser - - -def main(): - parser = create_argument_parser() - args = parser.parse_args() - verbosity = getattr(args, "verbosity", None) - global logger - logger = setup_logger(name=LOGGER_NAME) - logger.setLevel(verbosity_to_level(verbosity)) - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml b/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml deleted file mode 100644 index 1579187a7004e716eb3a86dbbfebb092d7aca84b..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml +++ /dev/null @@ -1,48 +0,0 @@ -VERSION: 2 -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - BACKBONE: - NAME: "build_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res2", "res3", "res4", "res5"] - FPN: - IN_FEATURES: ["res2", "res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map - ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) - RPN: - IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] - PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level - PRE_NMS_TOPK_TEST: 1000 # Per FPN level - # Detectron1 uses 2000 proposals per-batch, - # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) - # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. - POST_NMS_TOPK_TRAIN: 1000 - POST_NMS_TOPK_TEST: 1000 - - DENSEPOSE_ON: True - ROI_HEADS: - NAME: "DensePoseROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - NUM_CLASSES: 1 - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_FC: 2 - POOLER_RESOLUTION: 7 - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - POOLER_TYPE: "ROIAlign" - NUM_COARSE_SEGM_CHANNELS: 2 -DATASETS: - TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival") - TEST: ("densepose_coco_2014_minival",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.01 - STEPS: (60000, 80000) - MAX_ITER: 90000 - WARMUP_FACTOR: 0.1 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) diff --git a/detectron2/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml b/detectron2/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml deleted file mode 100644 index 36eabfed984b360907f5782d4e8b0232784f8a40..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33dYBMemi9xOUFR0w" - BACKBONE: - NAME: "build_hrfpn_backbone" - RPN: - IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] - ROI_HEADS: - IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) - CLIP_GRADIENTS: - ENABLED: True - CLIP_TYPE: "norm" - BASE_LR: 0.03 diff --git a/detectron2/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml b/detectron2/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml deleted file mode 100644 index 0ca8085e154c40a5b0f42a17575d2d48328619f0..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml +++ /dev/null @@ -1,23 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33ck0gvo5jfoWBOPo" - BACKBONE: - NAME: "build_hrfpn_backbone" - RPN: - IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] - ROI_HEADS: - IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] - HRNET: - STAGE2: - NUM_CHANNELS: [40, 80] - STAGE3: - NUM_CHANNELS: [40, 80, 160] - STAGE4: - NUM_CHANNELS: [40, 80, 160, 320] -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) - CLIP_GRADIENTS: - ENABLED: True - CLIP_TYPE: "norm" - BASE_LR: 0.03 diff --git a/detectron2/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml b/detectron2/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml deleted file mode 100644 index a3f437ab57ae0ff48cd4a97cbda987346f9a5a24..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml +++ /dev/null @@ -1,23 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33dKvqI6pBZlifgJk" - BACKBONE: - NAME: "build_hrfpn_backbone" - RPN: - IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] - ROI_HEADS: - IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] - HRNET: - STAGE2: - NUM_CHANNELS: [48, 96] - STAGE3: - NUM_CHANNELS: [48, 96, 192] - STAGE4: - NUM_CHANNELS: [48, 96, 192, 384] -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) - CLIP_GRADIENTS: - ENABLED: True - CLIP_TYPE: "norm" - BASE_LR: 0.03 diff --git a/detectron2/projects/DensePose/configs/cse/Base-DensePose-RCNN-FPN-Human.yaml b/detectron2/projects/DensePose/configs/cse/Base-DensePose-RCNN-FPN-Human.yaml deleted file mode 100644 index e92340ee0cdba2abd0a35114cbf3e78b04435dfe..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/Base-DensePose-RCNN-FPN-Human.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - ROI_DENSEPOSE_HEAD: - CSE: - EMBEDDERS: - "smpl_27554": - TYPE: vertex_feature - NUM_VERTICES: 27554 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_smpl_27554_256.pkl" -DATASETS: - TRAIN: - - "densepose_coco_2014_train_cse" - - "densepose_coco_2014_valminusminival_cse" - TEST: - - "densepose_coco_2014_minival_cse" - CLASS_TO_MESH_NAME_MAPPING: - "0": "smpl_27554" diff --git a/detectron2/projects/DensePose/configs/cse/Base-DensePose-RCNN-FPN.yaml b/detectron2/projects/DensePose/configs/cse/Base-DensePose-RCNN-FPN.yaml deleted file mode 100644 index de3b26009bdee95666248f99cd243fe37e7fd8bd..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/Base-DensePose-RCNN-FPN.yaml +++ /dev/null @@ -1,60 +0,0 @@ -VERSION: 2 -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - BACKBONE: - NAME: "build_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res2", "res3", "res4", "res5"] - FPN: - IN_FEATURES: ["res2", "res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map - ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) - RPN: - IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] - PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level - PRE_NMS_TOPK_TEST: 1000 # Per FPN level - # Detectron1 uses 2000 proposals per-batch, - # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) - # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. - POST_NMS_TOPK_TRAIN: 1000 - POST_NMS_TOPK_TEST: 1000 - - DENSEPOSE_ON: True - ROI_HEADS: - NAME: "DensePoseROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - NUM_CLASSES: 1 - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_FC: 2 - POOLER_RESOLUTION: 7 - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - POOLER_TYPE: "ROIAlign" - NUM_COARSE_SEGM_CHANNELS: 2 - PREDICTOR_NAME: "DensePoseEmbeddingPredictor" - LOSS_NAME: "DensePoseCseLoss" - CSE: - # embedding loss, possible values: - # - "EmbeddingLoss" - # - "SoftEmbeddingLoss" - EMBED_LOSS_NAME: "EmbeddingLoss" -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.01 - STEPS: (60000, 80000) - MAX_ITER: 90000 - WARMUP_FACTOR: 0.1 - CLIP_GRADIENTS: - CLIP_TYPE: norm - CLIP_VALUE: 1.0 - ENABLED: true - NORM_TYPE: 2.0 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -DENSEPOSE_EVALUATION: - TYPE: cse - STORAGE: file diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_DL_s1x.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_DL_s1x.yaml deleted file mode 100644 index 69d858902671e683b884b32c3c1448a44dc3995e..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_DL_s1x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN-Human.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - CSE: - EMBED_LOSS_NAME: "EmbeddingLoss" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml deleted file mode 100644 index 141657cdab24a2f591eeef763aef29543c43108e..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN-Human.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_s1x.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_s1x.yaml deleted file mode 100644 index d2eea1e2c3cecc7bba1bfd6f2332227bd3d0f5ed..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_s1x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN-Human.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - CSE: - EMBED_LOSS_NAME: "EmbeddingLoss" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_soft_s1x.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_soft_s1x.yaml deleted file mode 100644 index 1c362e1f9e93f9b9b458532f5318518396404d9f..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_soft_s1x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN-Human.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_DL_s1x.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_DL_s1x.yaml deleted file mode 100644 index 26684deaa9c72aab1408dbe3abb6ac3a9b6a17ac..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_DL_s1x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN-Human.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - CSE: - EMBED_LOSS_NAME: "EmbeddingLoss" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_DL_soft_s1x.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_DL_soft_s1x.yaml deleted file mode 100644 index b53501d29b84e9ff4088ce98bc83688e89e546ed..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_DL_soft_s1x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN-Human.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_s1x.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_s1x.yaml deleted file mode 100644 index c186625a86cc76441b9edeefeabd7caf44af7755..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_s1x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN-Human.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - CSE: - EMBED_LOSS_NAME: "EmbeddingLoss" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_16k.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_16k.yaml deleted file mode 100644 index 69ab22669e2176b6ec661fc982be7412abb5e0e8..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_16k.yaml +++ /dev/null @@ -1,133 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_s1x/250533982/model_final_2c4512.pkl" - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 1 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - COARSE_SEGM_TRAINED_BY_MASKS: True - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" - EMBEDDING_DIST_GAUSS_SIGMA: 0.1 - GEODESIC_DIST_GAUSS_SIGMA: 0.1 - EMBEDDERS: - "cat_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_7466_256.pkl" - "dog_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_7466_256.pkl" - "sheep_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl" - "horse_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl" - "zebra_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl" - "giraffe_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl" - "elephant_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl" - "cow_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl" - "bear_4936": - TYPE: vertex_feature - NUM_VERTICES: 4936 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl" -DATASETS: - TRAIN: - - "densepose_lvis_v1_ds2_train_v1" - TEST: - - "densepose_lvis_v1_ds2_val_v1" - WHITELISTED_CATEGORIES: - "densepose_lvis_v1_ds2_train_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - "densepose_lvis_v1_ds2_val_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - CATEGORY_MAPS: - "densepose_lvis_v1_ds2_train_v1": - "1202": 943 # zebra -> sheep - "569": 943 # horse -> sheep - "496": 943 # giraffe -> sheep - "422": 943 # elephant -> sheep - "80": 943 # cow -> sheep - "76": 943 # bear -> sheep - "225": 943 # cat -> sheep - "378": 943 # dog -> sheep - "densepose_lvis_v1_ds2_val_v1": - "1202": 943 # zebra -> sheep - "569": 943 # horse -> sheep - "496": 943 # giraffe -> sheep - "422": 943 # elephant -> sheep - "80": 943 # cow -> sheep - "76": 943 # bear -> sheep - "225": 943 # cat -> sheep - "378": 943 # dog -> sheep - CLASS_TO_MESH_NAME_MAPPING: - # Note: different classes are mapped to a single class - # mesh is chosen based on GT data, so this is just some - # value which has no particular meaning - "0": "sheep_5004" -SOLVER: - MAX_ITER: 16000 - STEPS: (12000, 14000) -DENSEPOSE_EVALUATION: - EVALUATE_MESH_ALIGNMENT: True diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml deleted file mode 100644 index 921a9c125d9da982fb88172acc7825ba3c583370..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_CA_finetune_4k.yaml +++ /dev/null @@ -1,133 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_s1x/250533982/model_final_2c4512.pkl" - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 1 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - COARSE_SEGM_TRAINED_BY_MASKS: True - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" - EMBEDDING_DIST_GAUSS_SIGMA: 0.1 - GEODESIC_DIST_GAUSS_SIGMA: 0.1 - EMBEDDERS: - "cat_5001": - TYPE: vertex_feature - NUM_VERTICES: 5001 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_5001_256.pkl" - "dog_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_5002_256.pkl" - "sheep_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl" - "horse_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl" - "zebra_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl" - "giraffe_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl" - "elephant_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl" - "cow_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl" - "bear_4936": - TYPE: vertex_feature - NUM_VERTICES: 4936 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl" -DATASETS: - TRAIN: - - "densepose_lvis_v1_ds1_train_v1" - TEST: - - "densepose_lvis_v1_ds1_val_v1" - WHITELISTED_CATEGORIES: - "densepose_lvis_v1_ds1_train_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - "densepose_lvis_v1_ds1_val_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - CATEGORY_MAPS: - "densepose_lvis_v1_ds1_train_v1": - "1202": 943 # zebra -> sheep - "569": 943 # horse -> sheep - "496": 943 # giraffe -> sheep - "422": 943 # elephant -> sheep - "80": 943 # cow -> sheep - "76": 943 # bear -> sheep - "225": 943 # cat -> sheep - "378": 943 # dog -> sheep - "densepose_lvis_v1_ds1_val_v1": - "1202": 943 # zebra -> sheep - "569": 943 # horse -> sheep - "496": 943 # giraffe -> sheep - "422": 943 # elephant -> sheep - "80": 943 # cow -> sheep - "76": 943 # bear -> sheep - "225": 943 # cat -> sheep - "378": 943 # dog -> sheep - CLASS_TO_MESH_NAME_MAPPING: - # Note: different classes are mapped to a single class - # mesh is chosen based on GT data, so this is just some - # value which has no particular meaning - "0": "sheep_5004" -SOLVER: - MAX_ITER: 4000 - STEPS: (3000, 3500) -DENSEPOSE_EVALUATION: - EVALUATE_MESH_ALIGNMENT: True diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_I0_finetune_16k.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_I0_finetune_16k.yaml deleted file mode 100644 index 1b5a098d171e508fcb9dd8088ecc1799c3068efc..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_I0_finetune_16k.yaml +++ /dev/null @@ -1,119 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_maskonly_24k/270668502/model_final_21b1d2.pkl" - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 9 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - COARSE_SEGM_TRAINED_BY_MASKS: True - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" - EMBEDDING_DIST_GAUSS_SIGMA: 0.1 - GEODESIC_DIST_GAUSS_SIGMA: 0.1 - EMBEDDERS: - "cat_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_7466_256.pkl" - "dog_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_7466_256.pkl" - "sheep_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl" - "horse_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl" - "zebra_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl" - "giraffe_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl" - "elephant_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl" - "cow_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl" - "bear_4936": - TYPE: vertex_feature - NUM_VERTICES: 4936 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl" -DATASETS: - TRAIN: - - "densepose_lvis_v1_ds2_train_v1" - TEST: - - "densepose_lvis_v1_ds2_val_v1" - WHITELISTED_CATEGORIES: - "densepose_lvis_v1_ds2_train_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - "densepose_lvis_v1_ds2_val_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - CLASS_TO_MESH_NAME_MAPPING: - "0": "bear_4936" - "1": "cow_5002" - "2": "cat_7466" - "3": "dog_7466" - "4": "elephant_5002" - "5": "giraffe_5002" - "6": "horse_5004" - "7": "sheep_5004" - "8": "zebra_5002" -SOLVER: - MAX_ITER: 16000 - STEPS: (12000, 14000) -DENSEPOSE_EVALUATION: - EVALUATE_MESH_ALIGNMENT: True diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_I0_finetune_i2m_16k.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_I0_finetune_i2m_16k.yaml deleted file mode 100644 index 18d6dacf4b62e609aa85735a87daa8d2506000d7..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_I0_finetune_i2m_16k.yaml +++ /dev/null @@ -1,121 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_maskonly_24k/270668502/model_final_21b1d2.pkl" - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 9 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - COARSE_SEGM_TRAINED_BY_MASKS: True - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" - EMBEDDING_DIST_GAUSS_SIGMA: 0.1 - GEODESIC_DIST_GAUSS_SIGMA: 0.1 - PIX_TO_SHAPE_CYCLE_LOSS: - ENABLED: True - EMBEDDERS: - "cat_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_7466_256.pkl" - "dog_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_7466_256.pkl" - "sheep_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl" - "horse_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl" - "zebra_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl" - "giraffe_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl" - "elephant_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl" - "cow_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl" - "bear_4936": - TYPE: vertex_feature - NUM_VERTICES: 4936 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl" -DATASETS: - TRAIN: - - "densepose_lvis_v1_ds2_train_v1" - TEST: - - "densepose_lvis_v1_ds2_val_v1" - WHITELISTED_CATEGORIES: - "densepose_lvis_v1_ds2_train_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - "densepose_lvis_v1_ds2_val_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - CLASS_TO_MESH_NAME_MAPPING: - "0": "bear_4936" - "1": "cow_5002" - "2": "cat_7466" - "3": "dog_7466" - "4": "elephant_5002" - "5": "giraffe_5002" - "6": "horse_5004" - "7": "sheep_5004" - "8": "zebra_5002" -SOLVER: - MAX_ITER: 16000 - STEPS: (12000, 14000) -DENSEPOSE_EVALUATION: - EVALUATE_MESH_ALIGNMENT: True diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_I0_finetune_m2m_16k.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_I0_finetune_m2m_16k.yaml deleted file mode 100644 index 6b798ae21204b9310adae33040c870253edc68ee..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_I0_finetune_m2m_16k.yaml +++ /dev/null @@ -1,138 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_maskonly_24k/267687159/model_final_354e61.pkl" - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 9 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - COARSE_SEGM_TRAINED_BY_MASKS: True - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" - EMBEDDING_DIST_GAUSS_SIGMA: 0.1 - GEODESIC_DIST_GAUSS_SIGMA: 0.1 - SHAPE_TO_SHAPE_CYCLE_LOSS: - ENABLED: True - EMBEDDERS: - "cat_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_7466_256.pkl" - "dog_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_7466_256.pkl" - "sheep_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl" - "horse_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl" - "zebra_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl" - "giraffe_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl" - "elephant_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl" - "cow_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl" - "bear_4936": - TYPE: vertex_feature - NUM_VERTICES: 4936 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl" - "smpl_27554": - TYPE: vertex_feature - NUM_VERTICES: 27554 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_smpl_27554_256.pkl" -DATASETS: - TRAIN: - - "densepose_lvis_v1_ds2_train_v1" - TEST: - - "densepose_lvis_v1_ds2_val_v1" - WHITELISTED_CATEGORIES: - "densepose_lvis_v1_ds2_train_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - "densepose_lvis_v1_ds2_val_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - CLASS_TO_MESH_NAME_MAPPING: - "0": "bear_4936" - "1": "cow_5002" - "2": "cat_7466" - "3": "dog_7466" - "4": "elephant_5002" - "5": "giraffe_5002" - "6": "horse_5004" - "7": "sheep_5004" - "8": "zebra_5002" -SOLVER: - MAX_ITER: 16000 - STEPS: (12000, 14000) -DENSEPOSE_EVALUATION: - EVALUATE_MESH_ALIGNMENT: True - MESH_ALIGNMENT_MESH_NAMES: - - bear_4936 - - cow_5002 - - cat_7466 - - dog_7466 - - elephant_5002 - - giraffe_5002 - - horse_5004 - - sheep_5004 - - zebra_5002 diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_16k.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_16k.yaml deleted file mode 100644 index b1462e374377fbf448e176951794face175b5002..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_16k.yaml +++ /dev/null @@ -1,119 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_s1x/250533982/model_final_2c4512.pkl" - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 9 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - COARSE_SEGM_TRAINED_BY_MASKS: True - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" - EMBEDDING_DIST_GAUSS_SIGMA: 0.1 - GEODESIC_DIST_GAUSS_SIGMA: 0.1 - EMBEDDERS: - "cat_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_7466_256.pkl" - "dog_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_7466_256.pkl" - "sheep_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl" - "horse_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl" - "zebra_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl" - "giraffe_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl" - "elephant_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl" - "cow_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl" - "bear_4936": - TYPE: vertex_feature - NUM_VERTICES: 4936 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl" -DATASETS: - TRAIN: - - "densepose_lvis_v1_ds2_train_v1" - TEST: - - "densepose_lvis_v1_ds2_val_v1" - WHITELISTED_CATEGORIES: - "densepose_lvis_v1_ds2_train_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - "densepose_lvis_v1_ds2_val_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - CLASS_TO_MESH_NAME_MAPPING: - "0": "bear_4936" - "1": "cow_5002" - "2": "cat_7466" - "3": "dog_7466" - "4": "elephant_5002" - "5": "giraffe_5002" - "6": "horse_5004" - "7": "sheep_5004" - "8": "zebra_5002" -SOLVER: - MAX_ITER: 16000 - STEPS: (12000, 14000) -DENSEPOSE_EVALUATION: - EVALUATE_MESH_ALIGNMENT: True diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_4k.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_4k.yaml deleted file mode 100644 index ba4b81dde2ef53749b096f137ac658563fdad857..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_4k.yaml +++ /dev/null @@ -1,119 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_s1x/250533982/model_final_2c4512.pkl" - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 9 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - COARSE_SEGM_TRAINED_BY_MASKS: True - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" - EMBEDDING_DIST_GAUSS_SIGMA: 0.1 - GEODESIC_DIST_GAUSS_SIGMA: 0.1 - EMBEDDERS: - "cat_5001": - TYPE: vertex_feature - NUM_VERTICES: 5001 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_5001_256.pkl" - "dog_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_5002_256.pkl" - "sheep_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl" - "horse_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl" - "zebra_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl" - "giraffe_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl" - "elephant_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl" - "cow_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl" - "bear_4936": - TYPE: vertex_feature - NUM_VERTICES: 4936 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl" -DATASETS: - TRAIN: - - "densepose_lvis_v1_ds1_train_v1" - TEST: - - "densepose_lvis_v1_ds1_val_v1" - WHITELISTED_CATEGORIES: - "densepose_lvis_v1_ds1_train_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - "densepose_lvis_v1_ds1_val_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - CLASS_TO_MESH_NAME_MAPPING: - "0": "bear_4936" - "1": "cow_5002" - "2": "cat_5001" - "3": "dog_5002" - "4": "elephant_5002" - "5": "giraffe_5002" - "6": "horse_5004" - "7": "sheep_5004" - "8": "zebra_5002" -SOLVER: - MAX_ITER: 4000 - STEPS: (3000, 3500) -DENSEPOSE_EVALUATION: - EVALUATE_MESH_ALIGNMENT: True diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_maskonly_24k.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_maskonly_24k.yaml deleted file mode 100644 index bb6136e274ca64aa2285698664d3243519d1979f..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_maskonly_24k.yaml +++ /dev/null @@ -1,118 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_s1x/250533982/model_final_2c4512.pkl" - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 9 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - COARSE_SEGM_TRAINED_BY_MASKS: True - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" - EMBED_LOSS_WEIGHT: 0.0 - EMBEDDING_DIST_GAUSS_SIGMA: 0.1 - GEODESIC_DIST_GAUSS_SIGMA: 0.1 - EMBEDDERS: - "cat_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_7466_256.pkl" - "dog_7466": - TYPE: vertex_feature - NUM_VERTICES: 7466 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_7466_256.pkl" - "sheep_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl" - "horse_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl" - "zebra_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl" - "giraffe_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl" - "elephant_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl" - "cow_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl" - "bear_4936": - TYPE: vertex_feature - NUM_VERTICES: 4936 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl" -DATASETS: - TRAIN: - - "densepose_lvis_v1_ds2_train_v1" - TEST: - - "densepose_lvis_v1_ds2_val_v1" - WHITELISTED_CATEGORIES: - "densepose_lvis_v1_ds2_train_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - "densepose_lvis_v1_ds2_val_v1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - CLASS_TO_MESH_NAME_MAPPING: - "0": "bear_4936" - "1": "cow_5002" - "2": "cat_7466" - "3": "dog_7466" - "4": "elephant_5002" - "5": "giraffe_5002" - "6": "horse_5004" - "7": "sheep_5004" - "8": "zebra_5002" -SOLVER: - MAX_ITER: 24000 - STEPS: (20000, 22000) diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_chimps_finetune_4k.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_chimps_finetune_4k.yaml deleted file mode 100644 index 3bccb7837a2e4b905b4e3c7af465c3be3a44452d..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_chimps_finetune_4k.yaml +++ /dev/null @@ -1,29 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_soft_s1x/250533982/model_final_2c4512.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" - EMBEDDING_DIST_GAUSS_SIGMA: 0.1 - GEODESIC_DIST_GAUSS_SIGMA: 0.1 - EMBEDDERS: - "chimp_5029": - TYPE: vertex_feature - NUM_VERTICES: 5029 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_chimp_5029_256.pkl" -DATASETS: - TRAIN: - - "densepose_chimps_cse_train" - TEST: - - "densepose_chimps_cse_val" - CLASS_TO_MESH_NAME_MAPPING: - "0": "chimp_5029" -SOLVER: - MAX_ITER: 4000 - STEPS: (3000, 3500) diff --git a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_s1x.yaml b/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_s1x.yaml deleted file mode 100644 index 9662fb8f8a4e9f7b01f41ddb79a3469ecab7032b..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_soft_s1x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN-Human.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml deleted file mode 100644 index 3c16763c532499c1a0c62fb8c81a2ab97be3a1ec..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml deleted file mode 100644 index 15475b1ac3bb7272a7ebc0061a55119ffd2591b9..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml deleted file mode 100644 index 0cbe07f3bb0027bb7ecdc86f96d60790382b477b..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml deleted file mode 100644 index 7546b967ab89129c9a276f19b1cf2d6b59f1a462..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml deleted file mode 100644 index 045f7f02f1b4eb0c0ef1733c3ac65e3aa70168de..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml deleted file mode 100644 index 9334e18655d4451457a58c6ce945e01855f95105..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml deleted file mode 100644 index ace62094fbc4ce2024810333c11c7a955d8eeb22..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml deleted file mode 100644 index 90f0be2805cd04e83c25d041d35ae66c90ce2b95..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml deleted file mode 100644 index 766c098f6dcdd1fb3f67957d7d1d982b37747b96..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml deleted file mode 100644 index af44fb767edf9bf093463e62f93e070d0d019c5a..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml deleted file mode 100644 index 8e79a1b9549cf19ed4a43cf9caf3dc88f6133310..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml +++ /dev/null @@ -1,17 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NUM_COARSE_SEGM_CHANNELS: 15 - POOLER_RESOLUTION: 14 - HEATMAP_SIZE: 56 - INDEX_WEIGHTS: 2.0 - PART_WEIGHTS: 0.3 - POINT_REGRESSION_WEIGHTS: 0.1 - DECODER_ON: False -SOLVER: - BASE_LR: 0.002 - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml deleted file mode 100644 index 18a417a9a76d388810d46d1ee738d8b19abf0db0..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml deleted file mode 100644 index f3720eff56ce042a68da6c99f484b963cae2c7d9..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml deleted file mode 100644 index 8a413d2a0d1549702fb45a2e50056fe0abde941f..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml deleted file mode 100644 index 5a47cc05e6e9dc882778c6b502d93cbcec88fb88..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml deleted file mode 100644 index 52a170b4a28289ad943314f77256e34800d23121..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml deleted file mode 100644 index 8a81f2a143cbfcd2dbc92f0fc5c86f951b9b7adf..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - CLIP_TYPE: norm - CLIP_VALUE: 100.0 - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml deleted file mode 100644 index d36e54256ac22f1b01604e54430da24972f06eeb..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml deleted file mode 100644 index 5cf29eacd57626c676ed4c960a3e97e552b6dbdf..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml deleted file mode 100644 index e880d469564a3757ba3f4d708054074cefda49b6..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml deleted file mode 100644 index d2dd14c6f92f3850b99e6f1c828c0fcee52120e1..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml b/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml deleted file mode 100644 index 6c5391f3b3c3d437312a290d29b0656cb3804b25..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml +++ /dev/null @@ -1,17 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NUM_COARSE_SEGM_CHANNELS: 15 - POOLER_RESOLUTION: 14 - HEATMAP_SIZE: 56 - INDEX_WEIGHTS: 2.0 - PART_WEIGHTS: 0.3 - POINT_REGRESSION_WEIGHTS: 0.1 - DECODER_ON: False -SOLVER: - BASE_LR: 0.002 - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-Atop10P_CA.yaml b/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-Atop10P_CA.yaml deleted file mode 100644 index f09d723f3cb9eef94223c5926dbb7731397304c9..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-Atop10P_CA.yaml +++ /dev/null @@ -1,91 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - BACKBONE: - NAME: "build_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res2", "res3", "res4", "res5"] - FPN: - IN_FEATURES: ["res2", "res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map - ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) - RPN: - IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] - PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level - PRE_NMS_TOPK_TEST: 1000 # Per FPN level - # Detectron1 uses 2000 proposals per-batch, - # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) - # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. - POST_NMS_TOPK_TRAIN: 1000 - POST_NMS_TOPK_TEST: 1000 - ROI_HEADS: - NAME: "StandardROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - NUM_CLASSES: 1 - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_FC: 2 - POOLER_RESOLUTION: 7 - ROI_MASK_HEAD: - NAME: "MaskRCNNConvUpsampleHead" - NUM_CONV: 4 - POOLER_RESOLUTION: 14 -DATASETS: - TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") - TEST: ("densepose_chimps",) - CATEGORY_MAPS: - "base_coco_2017_train": - "16": 1 # bird -> person - "17": 1 # cat -> person - "18": 1 # dog -> person - "19": 1 # horse -> person - "20": 1 # sheep -> person - "21": 1 # cow -> person - "22": 1 # elephant -> person - "23": 1 # bear -> person - "24": 1 # zebra -> person - "25": 1 # girafe -> person - "base_coco_2017_val": - "16": 1 # bird -> person - "17": 1 # cat -> person - "18": 1 # dog -> person - "19": 1 # horse -> person - "20": 1 # sheep -> person - "21": 1 # cow -> person - "22": 1 # elephant -> person - "23": 1 # bear -> person - "24": 1 # zebra -> person - "25": 1 # girafe -> person - WHITELISTED_CATEGORIES: - "base_coco_2017_train": - - 1 # person - - 16 # bird - - 17 # cat - - 18 # dog - - 19 # horse - - 20 # sheep - - 21 # cow - - 22 # elephant - - 23 # bear - - 24 # zebra - - 25 # girafe - "base_coco_2017_val": - - 1 # person - - 16 # bird - - 17 # cat - - 18 # dog - - 19 # horse - - 20 # sheep - - 21 # cow - - 22 # elephant - - 23 # bear - - 24 # zebra - - 25 # girafe -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA.yaml b/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA.yaml deleted file mode 100644 index 6296692d5ff15da24f87adb6327a62d9f4a34892..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA.yaml +++ /dev/null @@ -1,28 +0,0 @@ -_BASE_: "Base-RCNN-FPN-Atop10P_CA.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - DENSEPOSE_ON: True - ROI_HEADS: - NAME: "DensePoseROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - NUM_CLASSES: 1 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 - POOLER_TYPE: "ROIAlign" - NUM_COARSE_SEGM_CHANNELS: 2 - COARSE_SEGM_TRAINED_BY_MASKS: True - INDEX_WEIGHTS: 1.0 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - WARMUP_FACTOR: 0.025 - MAX_ITER: 270000 - STEPS: (210000, 250000) diff --git a/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_coarsesegm.yaml b/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_coarsesegm.yaml deleted file mode 100644 index 033918e0daec8c225306dafac3a5fe9923189e53..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_coarsesegm.yaml +++ /dev/null @@ -1,56 +0,0 @@ -_BASE_: "Base-RCNN-FPN-Atop10P_CA.yaml" -MODEL: - WEIGHTS: https://dl.fbaipublicfiles.com/densepose/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA/217578784/model_final_9fe1cc.pkl - RESNETS: - DEPTH: 50 - DENSEPOSE_ON: True - ROI_HEADS: - NAME: "DensePoseROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - NUM_CLASSES: 1 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 - POOLER_TYPE: "ROIAlign" - NUM_COARSE_SEGM_CHANNELS: 2 - COARSE_SEGM_TRAINED_BY_MASKS: True -BOOTSTRAP_DATASETS: - - DATASET: "chimpnsee" - RATIO: 1.0 - IMAGE_LOADER: - TYPE: "video_keyframe" - SELECT: - STRATEGY: "random_k" - NUM_IMAGES: 4 - TRANSFORM: - TYPE: "resize" - MIN_SIZE: 800 - MAX_SIZE: 1333 - BATCH_SIZE: 8 - NUM_WORKERS: 1 - INFERENCE: - INPUT_BATCH_SIZE: 1 - OUTPUT_BATCH_SIZE: 1 - DATA_SAMPLER: - # supported types: - # densepose_uniform - # densepose_UV_confidence - # densepose_fine_segm_confidence - # densepose_coarse_segm_confidence - TYPE: "densepose_coarse_segm_confidence" - COUNT_PER_CLASS: 8 - FILTER: - TYPE: "detection_score" - MIN_VALUE: 0.8 -BOOTSTRAP_MODEL: - WEIGHTS: https://dl.fbaipublicfiles.com/densepose/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA/217578784/model_final_9fe1cc.pkl -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 270000 - STEPS: (210000, 250000) diff --git a/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_finesegm.yaml b/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_finesegm.yaml deleted file mode 100644 index 5814a4a01fd772674fa40c0cba34666aed87b33a..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_finesegm.yaml +++ /dev/null @@ -1,56 +0,0 @@ -_BASE_: "Base-RCNN-FPN-Atop10P_CA.yaml" -MODEL: - WEIGHTS: https://dl.fbaipublicfiles.com/densepose/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA/217578784/model_final_9fe1cc.pkl - RESNETS: - DEPTH: 50 - DENSEPOSE_ON: True - ROI_HEADS: - NAME: "DensePoseROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - NUM_CLASSES: 1 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 - POOLER_TYPE: "ROIAlign" - NUM_COARSE_SEGM_CHANNELS: 2 - COARSE_SEGM_TRAINED_BY_MASKS: True -BOOTSTRAP_DATASETS: - - DATASET: "chimpnsee" - RATIO: 1.0 - IMAGE_LOADER: - TYPE: "video_keyframe" - SELECT: - STRATEGY: "random_k" - NUM_IMAGES: 4 - TRANSFORM: - TYPE: "resize" - MIN_SIZE: 800 - MAX_SIZE: 1333 - BATCH_SIZE: 8 - NUM_WORKERS: 1 - INFERENCE: - INPUT_BATCH_SIZE: 1 - OUTPUT_BATCH_SIZE: 1 - DATA_SAMPLER: - # supported types: - # densepose_uniform - # densepose_UV_confidence - # densepose_fine_segm_confidence - # densepose_coarse_segm_confidence - TYPE: "densepose_fine_segm_confidence" - COUNT_PER_CLASS: 8 - FILTER: - TYPE: "detection_score" - MIN_VALUE: 0.8 -BOOTSTRAP_MODEL: - WEIGHTS: https://dl.fbaipublicfiles.com/densepose/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA/217578784/model_final_9fe1cc.pkl -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 270000 - STEPS: (210000, 250000) diff --git a/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_uniform.yaml b/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_uniform.yaml deleted file mode 100644 index d591ea6e22282f43fff0b44131e0913aa7261276..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_uniform.yaml +++ /dev/null @@ -1,56 +0,0 @@ -_BASE_: "Base-RCNN-FPN-Atop10P_CA.yaml" -MODEL: - WEIGHTS: https://dl.fbaipublicfiles.com/densepose/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA/217578784/model_final_9fe1cc.pkl - RESNETS: - DEPTH: 50 - DENSEPOSE_ON: True - ROI_HEADS: - NAME: "DensePoseROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - NUM_CLASSES: 1 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 - POOLER_TYPE: "ROIAlign" - NUM_COARSE_SEGM_CHANNELS: 2 - COARSE_SEGM_TRAINED_BY_MASKS: True -BOOTSTRAP_DATASETS: - - DATASET: "chimpnsee" - RATIO: 1.0 - IMAGE_LOADER: - TYPE: "video_keyframe" - SELECT: - STRATEGY: "random_k" - NUM_IMAGES: 4 - TRANSFORM: - TYPE: "resize" - MIN_SIZE: 800 - MAX_SIZE: 1333 - BATCH_SIZE: 8 - NUM_WORKERS: 1 - INFERENCE: - INPUT_BATCH_SIZE: 1 - OUTPUT_BATCH_SIZE: 1 - DATA_SAMPLER: - # supported types: - # densepose_uniform - # densepose_UV_confidence - # densepose_fine_segm_confidence - # densepose_coarse_segm_confidence - TYPE: "densepose_uniform" - COUNT_PER_CLASS: 8 - FILTER: - TYPE: "detection_score" - MIN_VALUE: 0.8 -BOOTSTRAP_MODEL: - WEIGHTS: https://dl.fbaipublicfiles.com/densepose/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA/217578784/model_final_9fe1cc.pkl -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 270000 - STEPS: (210000, 250000) diff --git a/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_uv.yaml b/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_uv.yaml deleted file mode 100644 index 110acff5a54247abb7b344672038b71e24167f33..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_uv.yaml +++ /dev/null @@ -1,56 +0,0 @@ -_BASE_: "Base-RCNN-FPN-Atop10P_CA.yaml" -MODEL: - WEIGHTS: https://dl.fbaipublicfiles.com/densepose/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA/217578784/model_final_9fe1cc.pkl - RESNETS: - DEPTH: 50 - DENSEPOSE_ON: True - ROI_HEADS: - NAME: "DensePoseROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - NUM_CLASSES: 1 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - SEGM_CONFIDENCE: - ENABLED: True - POINT_REGRESSION_WEIGHTS: 0.0005 - POOLER_TYPE: "ROIAlign" - NUM_COARSE_SEGM_CHANNELS: 2 - COARSE_SEGM_TRAINED_BY_MASKS: True -BOOTSTRAP_DATASETS: - - DATASET: "chimpnsee" - RATIO: 1.0 - IMAGE_LOADER: - TYPE: "video_keyframe" - SELECT: - STRATEGY: "random_k" - NUM_IMAGES: 4 - TRANSFORM: - TYPE: "resize" - MIN_SIZE: 800 - MAX_SIZE: 1333 - BATCH_SIZE: 8 - NUM_WORKERS: 1 - INFERENCE: - INPUT_BATCH_SIZE: 1 - OUTPUT_BATCH_SIZE: 1 - DATA_SAMPLER: - # supported types: - # densepose_uniform - # densepose_UV_confidence - # densepose_fine_segm_confidence - # densepose_coarse_segm_confidence - TYPE: "densepose_UV_confidence" - COUNT_PER_CLASS: 8 - FILTER: - TYPE: "detection_score" - MIN_VALUE: 0.8 -BOOTSTRAP_MODEL: - WEIGHTS: https://dl.fbaipublicfiles.com/densepose/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA/217578784/model_final_9fe1cc.pkl -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 270000 - STEPS: (210000, 250000) diff --git a/detectron2/projects/DensePose/configs/quick_schedules/cse/densepose_rcnn_R_50_FPN_DL_instant_test.yaml b/detectron2/projects/DensePose/configs/quick_schedules/cse/densepose_rcnn_R_50_FPN_DL_instant_test.yaml deleted file mode 100644 index 3b43f75da549a9e5148c8528b5d375317680d738..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/quick_schedules/cse/densepose_rcnn_R_50_FPN_DL_instant_test.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "../../cse/Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" -DATASETS: - TRAIN: ("densepose_coco_2014_minival_100_cse",) - TEST: ("densepose_coco_2014_minival_100_cse",) -SOLVER: - MAX_ITER: 40 - STEPS: (30,) diff --git a/detectron2/projects/DensePose/configs/quick_schedules/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_instant_test.yaml b/detectron2/projects/DensePose/configs/quick_schedules/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_instant_test.yaml deleted file mode 100644 index a2c49a2d14e5665af117972d126e25422e37b2b9..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/quick_schedules/cse/densepose_rcnn_R_50_FPN_soft_animals_finetune_instant_test.yaml +++ /dev/null @@ -1,126 +0,0 @@ -_BASE_: "../../cse/Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 9 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - CSE: - EMBED_LOSS_NAME: "SoftEmbeddingLoss" - EMBEDDING_DIST_GAUSS_SIGMA: 0.1 - EMBEDDERS: - "cat_5001": - TYPE: vertex_feature - NUM_VERTICES: 5001 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cat_5001_256.pkl" - "dog_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_dog_5002_256.pkl" - "sheep_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_sheep_5004_256.pkl" - "horse_5004": - TYPE: vertex_feature - NUM_VERTICES: 5004 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_horse_5004_256.pkl" - "zebra_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_zebra_5002_256.pkl" - "giraffe_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_giraffe_5002_256.pkl" - "elephant_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_elephant_5002_256.pkl" - "cow_5002": - TYPE: vertex_feature - NUM_VERTICES: 5002 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_cow_5002_256.pkl" - "bear_4936": - TYPE: vertex_feature - NUM_VERTICES: 4936 - FEATURE_DIM: 256 - FEATURES_TRAINABLE: False - IS_TRAINABLE: True - INIT_FILE: "https://dl.fbaipublicfiles.com/densepose/data/cse/lbo/phi_bear_4936_256.pkl" -DATASETS: - TRAIN: - - "densepose_lvis_v1_train1" - - "densepose_lvis_v1_train2" - TEST: - - "densepose_lvis_v1_val_animals_100" - WHITELISTED_CATEGORIES: - "densepose_lvis_v1_train1": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - "densepose_lvis_v1_train2": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - "densepose_lvis_v1_val_animals_100": - - 943 # sheep - - 1202 # zebra - - 569 # horse - - 496 # giraffe - - 422 # elephant - - 80 # cow - - 76 # bear - - 225 # cat - - 378 # dog - CLASS_TO_MESH_NAME_MAPPING: - "0": "bear_4936" - "1": "cow_5002" - "2": "cat_5001" - "3": "dog_5002" - "4": "elephant_5002" - "5": "giraffe_5002" - "6": "horse_5004" - "7": "sheep_5004" - "8": "zebra_5002" -SOLVER: - MAX_ITER: 40 - STEPS: (30,) diff --git a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_instant_test.yaml b/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_instant_test.yaml deleted file mode 100644 index 95677ce9a7ff426a9051737876e7424908b1423f..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_instant_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml" -DATASETS: - TRAIN: ("densepose_coco_2014_minival_100",) - TEST: ("densepose_coco_2014_minival_100",) -SOLVER: - MAX_ITER: 40 - STEPS: (30,) - IMS_PER_BATCH: 2 diff --git a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml b/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml deleted file mode 100644 index b90989eef81e27d23119d2cd4627e8cea211ac51..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" -DATASETS: - TRAIN: ("densepose_coco_2014_minival_100",) - TEST: ("densepose_coco_2014_minival_100",) -SOLVER: - MAX_ITER: 40 - STEPS: (30,) diff --git a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml b/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml deleted file mode 100644 index b124da19140f564258b583ec109eeeeaff8fd78a..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl" -DATASETS: - TRAIN: () - TEST: ("densepose_coco_2014_minival_100",) -TEST: - AUG: - ENABLED: True - MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200) - MAX_SIZE: 4000 - FLIP: True - EXPECTED_RESULTS: [["bbox_TTA", "AP", 61.74, 0.03], ["densepose_gps_TTA", "AP", 60.22, 0.03], ["densepose_gpsm_TTA", "AP", 63.59, 0.03]] diff --git a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml b/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml deleted file mode 100644 index f0fe61151adf255baba717f3e65ff6fab52829a6..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - POINT_REGRESSION_WEIGHTS: 0.0005 -DATASETS: - TRAIN: ("densepose_coco_2014_minival_100",) - TEST: ("densepose_coco_2014_minival_100",) -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 40 - STEPS: (30,) - WARMUP_FACTOR: 0.025 diff --git a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml b/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml deleted file mode 100644 index f0d9358c8846452314697a19b5e2ea9e075ddaeb..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - POINT_REGRESSION_WEIGHTS: 0.0005 -DATASETS: - TRAIN: ("densepose_coco_2014_minival_100",) - TEST: ("densepose_coco_2014_minival_100",) -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 40 - STEPS: (30,) - WARMUP_FACTOR: 0.025 diff --git a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml b/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index d607c98813d045c1e19875bdfe45fbc1c3fdb292..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl" -DATASETS: - TRAIN: () - TEST: ("densepose_coco_2014_minival_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 59.27, 0.025], ["densepose_gps", "AP", 60.11, 0.02], ["densepose_gpsm", "AP", 64.09, 0.02]] diff --git a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml b/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml deleted file mode 100644 index 057c8768186e8a818228aa2f028ba3007374c571..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" -DATASETS: - TRAIN: ("densepose_coco_2014_minival_100",) - TEST: ("densepose_coco_2014_minival_100",) -SOLVER: - MAX_ITER: 40 - STEPS: (30,) diff --git a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml b/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml deleted file mode 100644 index 0053c9d7d41af0ee7262804838d8edcde10ed40d..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - ROI_HEADS: - NUM_CLASSES: 1 -DATASETS: - TRAIN: ("densepose_coco_2014_minival",) - TEST: ("densepose_coco_2014_minival",) -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - CLIP_TYPE: norm - CLIP_VALUE: 1.0 - MAX_ITER: 6000 - STEPS: (5500, 5800) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 76.2477, 1.0], ["densepose_gps", "AP", 79.6090, 1.5], ["densepose_gpsm", "AP", 80.0061, 1.5]] - diff --git a/detectron2/projects/DensePose/densepose/__init__.py b/detectron2/projects/DensePose/densepose/__init__.py deleted file mode 100644 index 0fc9e977ed3174e244414378dd85d48ea02e635e..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -from .data.datasets import builtin # just to register data -from .converters import builtin as builtin_converters # register converters -from .config import ( - add_densepose_config, - add_densepose_head_config, - add_hrnet_config, - add_dataset_category_config, - add_bootstrap_config, - load_bootstrap_config, -) -from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData -from .evaluation import DensePoseCOCOEvaluator -from .modeling.roi_heads import DensePoseROIHeads -from .modeling.test_time_augmentation import ( - DensePoseGeneralizedRCNNWithTTA, - DensePoseDatasetMapperTTA, -) -from .utils.transform import load_from_cfg -from .modeling.hrfpn import build_hrfpn_backbone diff --git a/detectron2/projects/DensePose/densepose/config.py b/detectron2/projects/DensePose/densepose/config.py deleted file mode 100644 index 2a06a09c80865ab987773511b2acc71e232b26ac..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/config.py +++ /dev/null @@ -1,277 +0,0 @@ -# -*- coding = utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. -# pyre-ignore-all-errors - -from detectron2.config import CfgNode as CN - - -def add_dataset_category_config(cfg: CN) -> None: - """ - Add config for additional category-related dataset options - - category whitelisting - - category mapping - """ - _C = cfg - _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True) - _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True) - # class to mesh mapping - _C.DATASETS.CLASS_TO_MESH_NAME_MAPPING = CN(new_allowed=True) - - -def add_evaluation_config(cfg: CN) -> None: - _C = cfg - _C.DENSEPOSE_EVALUATION = CN() - # evaluator type, possible values: - # - "iou": evaluator for models that produce iou data - # - "cse": evaluator for models that produce cse data - _C.DENSEPOSE_EVALUATION.TYPE = "iou" - # storage for DensePose results, possible values: - # - "none": no explicit storage, all the results are stored in the - # dictionary with predictions, memory intensive; - # historically the default storage type - # - "ram": RAM storage, uses per-process RAM storage, which is - # reduced to a single process storage on later stages, - # less memory intensive - # - "file": file storage, uses per-process file-based storage, - # the least memory intensive, but may create bottlenecks - # on file system accesses - _C.DENSEPOSE_EVALUATION.STORAGE = "none" - # minimum threshold for IOU values: the lower its values is, - # the more matches are produced (and the higher the AP score) - _C.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD = 0.5 - # Non-distributed inference is slower (at inference time) but can avoid RAM OOM - _C.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE = True - # evaluate mesh alignment based on vertex embeddings, only makes sense in CSE context - _C.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT = False - # meshes to compute mesh alignment for - _C.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES = [] - - -def add_bootstrap_config(cfg: CN) -> None: - """ """ - _C = cfg - _C.BOOTSTRAP_DATASETS = [] - _C.BOOTSTRAP_MODEL = CN() - _C.BOOTSTRAP_MODEL.WEIGHTS = "" - _C.BOOTSTRAP_MODEL.DEVICE = "cuda" - - -def get_bootstrap_dataset_config() -> CN: - _C = CN() - _C.DATASET = "" - # ratio used to mix data loaders - _C.RATIO = 0.1 - # image loader - _C.IMAGE_LOADER = CN(new_allowed=True) - _C.IMAGE_LOADER.TYPE = "" - _C.IMAGE_LOADER.BATCH_SIZE = 4 - _C.IMAGE_LOADER.NUM_WORKERS = 4 - _C.IMAGE_LOADER.CATEGORIES = [] - _C.IMAGE_LOADER.MAX_COUNT_PER_CATEGORY = 1_000_000 - _C.IMAGE_LOADER.CATEGORY_TO_CLASS_MAPPING = CN(new_allowed=True) - # inference - _C.INFERENCE = CN() - # batch size for model inputs - _C.INFERENCE.INPUT_BATCH_SIZE = 4 - # batch size to group model outputs - _C.INFERENCE.OUTPUT_BATCH_SIZE = 2 - # sampled data - _C.DATA_SAMPLER = CN(new_allowed=True) - _C.DATA_SAMPLER.TYPE = "" - _C.DATA_SAMPLER.USE_GROUND_TRUTH_CATEGORIES = False - # filter - _C.FILTER = CN(new_allowed=True) - _C.FILTER.TYPE = "" - return _C - - -def load_bootstrap_config(cfg: CN) -> None: - """ - Bootstrap datasets are given as a list of `dict` that are not automatically - converted into CfgNode. This method processes all bootstrap dataset entries - and ensures that they are in CfgNode format and comply with the specification - """ - if not cfg.BOOTSTRAP_DATASETS: - return - - bootstrap_datasets_cfgnodes = [] - for dataset_cfg in cfg.BOOTSTRAP_DATASETS: - _C = get_bootstrap_dataset_config().clone() - _C.merge_from_other_cfg(CN(dataset_cfg)) - bootstrap_datasets_cfgnodes.append(_C) - cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes - - -def add_densepose_head_cse_config(cfg: CN) -> None: - """ - Add configuration options for Continuous Surface Embeddings (CSE) - """ - _C = cfg - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE = CN() - # Dimensionality D of the embedding space - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE = 16 - # Embedder specifications for various mesh IDs - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS = CN(new_allowed=True) - # normalization coefficient for embedding distances - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA = 0.01 - # normalization coefficient for geodesic distances - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA = 0.01 - # embedding loss weight - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT = 0.6 - # embedding loss name, currently the following options are supported: - # - EmbeddingLoss: cross-entropy on vertex labels - # - SoftEmbeddingLoss: cross-entropy on vertex label combined with - # Gaussian penalty on distance between vertices - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME = "EmbeddingLoss" - # optimizer hyperparameters - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR = 1.0 - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR = 1.0 - # Shape to shape cycle consistency loss parameters: - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False}) - # shape to shape cycle consistency loss weight - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.025 - # norm type used for loss computation - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P = 2 - # normalization term for embedding similarity matrices - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE = 0.05 - # maximum number of vertices to include into shape to shape cycle loss - # if negative or zero, all vertices are considered - # if positive, random subset of vertices of given size is considered - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES = 4936 - # Pixel to shape cycle consistency loss parameters: - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False}) - # pixel to shape cycle consistency loss weight - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.0001 - # norm type used for loss computation - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P = 2 - # map images to all meshes and back (if false, use only gt meshes from the batch) - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY = False - # Randomly select at most this number of pixels from every instance - # if negative or zero, all vertices are considered - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE = 100 - # normalization factor for pixel to pixel distances (higher value = smoother distribution) - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA = 5.0 - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX = 0.05 - _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL = 0.05 - - -def add_densepose_head_config(cfg: CN) -> None: - """ - Add config for densepose head. - """ - _C = cfg - - _C.MODEL.DENSEPOSE_ON = True - - _C.MODEL.ROI_DENSEPOSE_HEAD = CN() - _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = "" - _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8 - # Number of parts used for point labels - _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24 - _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4 - _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512 - _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3 - _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2 - _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112 - _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2" - _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28 - _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2 - _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2 - # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD) - _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7 - # Loss weights for annotation masks.(14 Parts) - _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0 - # Loss weights for surface parts. (24 Parts) - _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0 - # Loss weights for UV regression. - _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01 - # Coarse segmentation is trained using instance segmentation task data - _C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False - # For Decoder - _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True - _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256 - _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256 - _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = "" - _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4 - # For DeepLab head - _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN() - _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN" - _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0 - # Predictor class name, must be registered in DENSEPOSE_PREDICTOR_REGISTRY - # Some registered predictors: - # "DensePoseChartPredictor": predicts segmentation and UV coordinates for predefined charts - # "DensePoseChartWithConfidencePredictor": predicts segmentation, UV coordinates - # and associated confidences for predefined charts (default) - # "DensePoseEmbeddingWithConfidencePredictor": predicts segmentation, embeddings - # and associated confidences for CSE - _C.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME = "DensePoseChartWithConfidencePredictor" - # Loss class name, must be registered in DENSEPOSE_LOSS_REGISTRY - # Some registered losses: - # "DensePoseChartLoss": loss for chart-based models that estimate - # segmentation and UV coordinates - # "DensePoseChartWithConfidenceLoss": loss for chart-based models that estimate - # segmentation, UV coordinates and the corresponding confidences (default) - _C.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME = "DensePoseChartWithConfidenceLoss" - # Confidences - # Enable learning UV confidences (variances) along with the actual values - _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False}) - # UV confidence lower bound - _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01 - # Enable learning segmentation confidences (variances) along with the actual values - _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False}) - # Segmentation confidence lower bound - _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01 - # Statistical model type for confidence learning, possible values: - # - "iid_iso": statistically independent identically distributed residuals - # with isotropic covariance - # - "indep_aniso": statistically independent residuals with anisotropic - # covariances - _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso" - # List of angles for rotation in data augmentation during training - _C.INPUT.ROTATION_ANGLES = [0] - _C.TEST.AUG.ROTATION_ANGLES = () # Rotation TTA - - add_densepose_head_cse_config(cfg) - - -def add_hrnet_config(cfg: CN) -> None: - """ - Add config for HRNet backbone. - """ - _C = cfg - - # For HigherHRNet w32 - _C.MODEL.HRNET = CN() - _C.MODEL.HRNET.STEM_INPLANES = 64 - _C.MODEL.HRNET.STAGE2 = CN() - _C.MODEL.HRNET.STAGE2.NUM_MODULES = 1 - _C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2 - _C.MODEL.HRNET.STAGE2.BLOCK = "BASIC" - _C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4] - _C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64] - _C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM" - _C.MODEL.HRNET.STAGE3 = CN() - _C.MODEL.HRNET.STAGE3.NUM_MODULES = 4 - _C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3 - _C.MODEL.HRNET.STAGE3.BLOCK = "BASIC" - _C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4] - _C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128] - _C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM" - _C.MODEL.HRNET.STAGE4 = CN() - _C.MODEL.HRNET.STAGE4.NUM_MODULES = 3 - _C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4 - _C.MODEL.HRNET.STAGE4.BLOCK = "BASIC" - _C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] - _C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] - _C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM" - - _C.MODEL.HRNET.HRFPN = CN() - _C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256 - - -def add_densepose_config(cfg: CN) -> None: - add_densepose_head_config(cfg) - add_hrnet_config(cfg) - add_bootstrap_config(cfg) - add_dataset_category_config(cfg) - add_evaluation_config(cfg) diff --git a/detectron2/projects/DensePose/densepose/converters/__init__.py b/detectron2/projects/DensePose/densepose/converters/__init__.py deleted file mode 100644 index 9b700f44437bd4e68be358ed5aae62a22df8d88a..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/converters/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .hflip import HFlipConverter -from .to_mask import ToMaskConverter -from .to_chart_result import ToChartResultConverter, ToChartResultConverterWithConfidences -from .segm_to_mask import ( - predictor_output_with_fine_and_coarse_segm_to_mask, - predictor_output_with_coarse_segm_to_mask, - resample_fine_and_coarse_segm_to_bbox, -) -from .chart_output_to_chart_result import ( - densepose_chart_predictor_output_to_result, - densepose_chart_predictor_output_to_result_with_confidences, -) -from .chart_output_hflip import densepose_chart_predictor_output_hflip diff --git a/detectron2/projects/DensePose/densepose/converters/base.py b/detectron2/projects/DensePose/densepose/converters/base.py deleted file mode 100644 index 25e3155a87b819fe526b7b2735e006aeb3a56dda..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/converters/base.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Any, Tuple, Type -import torch - - -class BaseConverter: - """ - Converter base class to be reused by various converters. - Converter allows one to convert data from various source types to a particular - destination type. Each source type needs to register its converter. The - registration for each source type is valid for all descendants of that type. - """ - - @classmethod - def register(cls, from_type: Type, converter: Any = None): - """ - Registers a converter for the specified type. - Can be used as a decorator (if converter is None), or called as a method. - - Args: - from_type (type): type to register the converter for; - all instances of this type will use the same converter - converter (callable): converter to be registered for the given - type; if None, this method is assumed to be a decorator for the converter - """ - - if converter is not None: - cls._do_register(from_type, converter) - - def wrapper(converter: Any) -> Any: - cls._do_register(from_type, converter) - return converter - - return wrapper - - @classmethod - def _do_register(cls, from_type: Type, converter: Any): - cls.registry[from_type] = converter # pyre-ignore[16] - - @classmethod - def _lookup_converter(cls, from_type: Type) -> Any: - """ - Perform recursive lookup for the given type - to find registered converter. If a converter was found for some base - class, it gets registered for this class to save on further lookups. - - Args: - from_type: type for which to find a converter - Return: - callable or None - registered converter or None - if no suitable entry was found in the registry - """ - if from_type in cls.registry: # pyre-ignore[16] - return cls.registry[from_type] - for base in from_type.__bases__: - converter = cls._lookup_converter(base) - if converter is not None: - cls._do_register(from_type, converter) - return converter - return None - - @classmethod - def convert(cls, instance: Any, *args, **kwargs): - """ - Convert an instance to the destination type using some registered - converter. Does recursive lookup for base classes, so there's no need - for explicit registration for derived classes. - - Args: - instance: source instance to convert to the destination type - Return: - An instance of the destination type obtained from the source instance - Raises KeyError, if no suitable converter found - """ - instance_type = type(instance) - converter = cls._lookup_converter(instance_type) - if converter is None: - if cls.dst_type is None: # pyre-ignore[16] - output_type_str = "itself" - else: - output_type_str = cls.dst_type - raise KeyError(f"Could not find converter from {instance_type} to {output_type_str}") - return converter(instance, *args, **kwargs) - - -IntTupleBox = Tuple[int, int, int, int] - - -def make_int_box(box: torch.Tensor) -> IntTupleBox: - int_box = [0, 0, 0, 0] - int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist()) - return int_box[0], int_box[1], int_box[2], int_box[3] diff --git a/detectron2/projects/DensePose/densepose/converters/builtin.py b/detectron2/projects/DensePose/densepose/converters/builtin.py deleted file mode 100644 index 5234410307d7bfff932da982ca44926afb729c23..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/converters/builtin.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from ..structures import DensePoseChartPredictorOutput, DensePoseEmbeddingPredictorOutput -from . import ( - HFlipConverter, - ToChartResultConverter, - ToChartResultConverterWithConfidences, - ToMaskConverter, - densepose_chart_predictor_output_hflip, - densepose_chart_predictor_output_to_result, - densepose_chart_predictor_output_to_result_with_confidences, - predictor_output_with_coarse_segm_to_mask, - predictor_output_with_fine_and_coarse_segm_to_mask, -) - -ToMaskConverter.register( - DensePoseChartPredictorOutput, predictor_output_with_fine_and_coarse_segm_to_mask -) -ToMaskConverter.register( - DensePoseEmbeddingPredictorOutput, predictor_output_with_coarse_segm_to_mask -) - -ToChartResultConverter.register( - DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result -) - -ToChartResultConverterWithConfidences.register( - DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result_with_confidences -) - -HFlipConverter.register(DensePoseChartPredictorOutput, densepose_chart_predictor_output_hflip) diff --git a/detectron2/projects/DensePose/densepose/converters/chart_output_hflip.py b/detectron2/projects/DensePose/densepose/converters/chart_output_hflip.py deleted file mode 100644 index f7f0061c858c80b083d40807c0bdfb4dfcc5d86b..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/converters/chart_output_hflip.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -from dataclasses import fields -import torch - -from densepose.structures import DensePoseChartPredictorOutput, DensePoseTransformData - - -def densepose_chart_predictor_output_hflip( - densepose_predictor_output: DensePoseChartPredictorOutput, - transform_data: DensePoseTransformData, -) -> DensePoseChartPredictorOutput: - """ - Change to take into account a Horizontal flip. - """ - if len(densepose_predictor_output) > 0: - - PredictorOutput = type(densepose_predictor_output) - output_dict = {} - - for field in fields(densepose_predictor_output): - field_value = getattr(densepose_predictor_output, field.name) - # flip tensors - if isinstance(field_value, torch.Tensor): - setattr(densepose_predictor_output, field.name, torch.flip(field_value, [3])) - - densepose_predictor_output = _flip_iuv_semantics_tensor( - densepose_predictor_output, transform_data - ) - densepose_predictor_output = _flip_segm_semantics_tensor( - densepose_predictor_output, transform_data - ) - - for field in fields(densepose_predictor_output): - output_dict[field.name] = getattr(densepose_predictor_output, field.name) - - return PredictorOutput(**output_dict) - else: - return densepose_predictor_output - - -def _flip_iuv_semantics_tensor( - densepose_predictor_output: DensePoseChartPredictorOutput, - dp_transform_data: DensePoseTransformData, -) -> DensePoseChartPredictorOutput: - point_label_symmetries = dp_transform_data.point_label_symmetries - uv_symmetries = dp_transform_data.uv_symmetries - - N, C, H, W = densepose_predictor_output.u.shape - u_loc = (densepose_predictor_output.u[:, 1:, :, :].clamp(0, 1) * 255).long() - v_loc = (densepose_predictor_output.v[:, 1:, :, :].clamp(0, 1) * 255).long() - Iindex = torch.arange(C - 1, device=densepose_predictor_output.u.device)[ - None, :, None, None - ].expand(N, C - 1, H, W) - densepose_predictor_output.u[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc] - densepose_predictor_output.v[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc] - - for el in ["fine_segm", "u", "v"]: - densepose_predictor_output.__dict__[el] = densepose_predictor_output.__dict__[el][ - :, point_label_symmetries, :, : - ] - return densepose_predictor_output - - -def _flip_segm_semantics_tensor( - densepose_predictor_output: DensePoseChartPredictorOutput, dp_transform_data -): - if densepose_predictor_output.coarse_segm.shape[1] > 2: - densepose_predictor_output.coarse_segm = densepose_predictor_output.coarse_segm[ - :, dp_transform_data.mask_label_symmetries, :, : - ] - return densepose_predictor_output diff --git a/detectron2/projects/DensePose/densepose/converters/chart_output_to_chart_result.py b/detectron2/projects/DensePose/densepose/converters/chart_output_to_chart_result.py deleted file mode 100644 index b2e9c2280a60f80d2e32861a392fc78b3148cac8..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/converters/chart_output_to_chart_result.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Dict -import torch -from torch.nn import functional as F - -from detectron2.structures.boxes import Boxes, BoxMode - -from ..structures import ( - DensePoseChartPredictorOutput, - DensePoseChartResult, - DensePoseChartResultWithConfidences, -) -from . import resample_fine_and_coarse_segm_to_bbox -from .base import IntTupleBox, make_int_box - - -def resample_uv_tensors_to_bbox( - u: torch.Tensor, - v: torch.Tensor, - labels: torch.Tensor, - box_xywh_abs: IntTupleBox, -) -> torch.Tensor: - """ - Resamples U and V coordinate estimates for the given bounding box - - Args: - u (tensor [1, C, H, W] of float): U coordinates - v (tensor [1, C, H, W] of float): V coordinates - labels (tensor [H, W] of long): labels obtained by resampling segmentation - outputs for the given bounding box - box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs - Return: - Resampled U and V coordinates - a tensor [2, H, W] of float - """ - x, y, w, h = box_xywh_abs - w = max(int(w), 1) - h = max(int(h), 1) - u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False) - v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False) - uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device) - for part_id in range(1, u_bbox.size(1)): - uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id] - uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id] - return uv - - -def resample_uv_to_bbox( - predictor_output: DensePoseChartPredictorOutput, - labels: torch.Tensor, - box_xywh_abs: IntTupleBox, -) -> torch.Tensor: - """ - Resamples U and V coordinate estimates for the given bounding box - - Args: - predictor_output (DensePoseChartPredictorOutput): DensePose predictor - output to be resampled - labels (tensor [H, W] of long): labels obtained by resampling segmentation - outputs for the given bounding box - box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs - Return: - Resampled U and V coordinates - a tensor [2, H, W] of float - """ - return resample_uv_tensors_to_bbox( - predictor_output.u, - predictor_output.v, - labels, - box_xywh_abs, - ) - - -def densepose_chart_predictor_output_to_result( - predictor_output: DensePoseChartPredictorOutput, boxes: Boxes -) -> DensePoseChartResult: - """ - Convert densepose chart predictor outputs to results - - Args: - predictor_output (DensePoseChartPredictorOutput): DensePose predictor - output to be converted to results, must contain only 1 output - boxes (Boxes): bounding box that corresponds to the predictor output, - must contain only 1 bounding box - Return: - DensePose chart-based result (DensePoseChartResult) - """ - assert len(predictor_output) == 1 and len(boxes) == 1, ( - f"Predictor output to result conversion can operate only single outputs" - f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes" - ) - - boxes_xyxy_abs = boxes.tensor.clone() - boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - box_xywh = make_int_box(boxes_xywh_abs[0]) - - labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0) - uv = resample_uv_to_bbox(predictor_output, labels, box_xywh) - return DensePoseChartResult(labels=labels, uv=uv) - - -def resample_confidences_to_bbox( - predictor_output: DensePoseChartPredictorOutput, - labels: torch.Tensor, - box_xywh_abs: IntTupleBox, -) -> Dict[str, torch.Tensor]: - """ - Resamples confidences for the given bounding box - - Args: - predictor_output (DensePoseChartPredictorOutput): DensePose predictor - output to be resampled - labels (tensor [H, W] of long): labels obtained by resampling segmentation - outputs for the given bounding box - box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs - Return: - Resampled confidences - a dict of [H, W] tensors of float - """ - - x, y, w, h = box_xywh_abs - w = max(int(w), 1) - h = max(int(h), 1) - - confidence_names = [ - "sigma_1", - "sigma_2", - "kappa_u", - "kappa_v", - "fine_segm_confidence", - "coarse_segm_confidence", - ] - confidence_results = {key: None for key in confidence_names} - confidence_names = [ - key for key in confidence_names if getattr(predictor_output, key) is not None - ] - confidence_base = torch.zeros([h, w], dtype=torch.float32, device=predictor_output.u.device) - - # assign data from channels that correspond to the labels - for key in confidence_names: - resampled_confidence = F.interpolate( - getattr(predictor_output, key), - (h, w), - mode="bilinear", - align_corners=False, - ) - result = confidence_base.clone() - for part_id in range(1, predictor_output.u.size(1)): - if resampled_confidence.size(1) != predictor_output.u.size(1): - # confidence is not part-based, don't try to fill it part by part - continue - result[labels == part_id] = resampled_confidence[0, part_id][labels == part_id] - - if resampled_confidence.size(1) != predictor_output.u.size(1): - # confidence is not part-based, fill the data with the first channel - # (targeted for segmentation confidences that have only 1 channel) - result = resampled_confidence[0, 0] - - confidence_results[key] = result - - return confidence_results # pyre-ignore[7] - - -def densepose_chart_predictor_output_to_result_with_confidences( - predictor_output: DensePoseChartPredictorOutput, boxes: Boxes -) -> DensePoseChartResultWithConfidences: - """ - Convert densepose chart predictor outputs to results - - Args: - predictor_output (DensePoseChartPredictorOutput): DensePose predictor - output with confidences to be converted to results, must contain only 1 output - boxes (Boxes): bounding box that corresponds to the predictor output, - must contain only 1 bounding box - Return: - DensePose chart-based result with confidences (DensePoseChartResultWithConfidences) - """ - assert len(predictor_output) == 1 and len(boxes) == 1, ( - f"Predictor output to result conversion can operate only single outputs" - f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes" - ) - - boxes_xyxy_abs = boxes.tensor.clone() - boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - box_xywh = make_int_box(boxes_xywh_abs[0]) - - labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0) - uv = resample_uv_to_bbox(predictor_output, labels, box_xywh) - confidences = resample_confidences_to_bbox(predictor_output, labels, box_xywh) - return DensePoseChartResultWithConfidences(labels=labels, uv=uv, **confidences) diff --git a/detectron2/projects/DensePose/densepose/converters/hflip.py b/detectron2/projects/DensePose/densepose/converters/hflip.py deleted file mode 100644 index 711b73b3701adfd0217132519aea46f30f9ed74a..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/converters/hflip.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Any - -from .base import BaseConverter - - -class HFlipConverter(BaseConverter): - """ - Converts various DensePose predictor outputs to DensePose results. - Each DensePose predictor output type has to register its convertion strategy. - """ - - registry = {} - dst_type = None - - @classmethod - # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter` - # inconsistently. - def convert(cls, predictor_outputs: Any, transform_data: Any, *args, **kwargs): - """ - Performs an horizontal flip on DensePose predictor outputs. - Does recursive lookup for base classes, so there's no need - for explicit registration for derived classes. - - Args: - predictor_outputs: DensePose predictor output to be converted to BitMasks - transform_data: Anything useful for the flip - Return: - An instance of the same type as predictor_outputs - """ - return super(HFlipConverter, cls).convert( - predictor_outputs, transform_data, *args, **kwargs - ) diff --git a/detectron2/projects/DensePose/densepose/converters/segm_to_mask.py b/detectron2/projects/DensePose/densepose/converters/segm_to_mask.py deleted file mode 100644 index e5843a2186f441aa9cb48b680fd67051aa1236f6..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/converters/segm_to_mask.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Any -import torch -from torch.nn import functional as F - -from detectron2.structures import BitMasks, Boxes, BoxMode - -from .base import IntTupleBox, make_int_box -from .to_mask import ImageSizeType - - -def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox): - """ - Resample coarse segmentation tensor to the given - bounding box and derive labels for each pixel of the bounding box - - Args: - coarse_segm: float tensor of shape [1, K, Hout, Wout] - box_xywh_abs (tuple of 4 int): bounding box given by its upper-left - corner coordinates, width (W) and height (H) - Return: - Labels for each pixel of the bounding box, a long tensor of size [1, H, W] - """ - x, y, w, h = box_xywh_abs - w = max(int(w), 1) - h = max(int(h), 1) - labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1) - return labels - - -def resample_fine_and_coarse_segm_tensors_to_bbox( - fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox -): - """ - Resample fine and coarse segmentation tensors to the given - bounding box and derive labels for each pixel of the bounding box - - Args: - fine_segm: float tensor of shape [1, C, Hout, Wout] - coarse_segm: float tensor of shape [1, K, Hout, Wout] - box_xywh_abs (tuple of 4 int): bounding box given by its upper-left - corner coordinates, width (W) and height (H) - Return: - Labels for each pixel of the bounding box, a long tensor of size [1, H, W] - """ - x, y, w, h = box_xywh_abs - w = max(int(w), 1) - h = max(int(h), 1) - # coarse segmentation - coarse_segm_bbox = F.interpolate( - coarse_segm, - (h, w), - mode="bilinear", - align_corners=False, - ).argmax(dim=1) - # combined coarse and fine segmentation - labels = ( - F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1) - * (coarse_segm_bbox > 0).long() - ) - return labels - - -def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox): - """ - Resample fine and coarse segmentation outputs from a predictor to the given - bounding box and derive labels for each pixel of the bounding box - - Args: - predictor_output: DensePose predictor output that contains segmentation - results to be resampled - box_xywh_abs (tuple of 4 int): bounding box given by its upper-left - corner coordinates, width (W) and height (H) - Return: - Labels for each pixel of the bounding box, a long tensor of size [1, H, W] - """ - return resample_fine_and_coarse_segm_tensors_to_bbox( - predictor_output.fine_segm, - predictor_output.coarse_segm, - box_xywh_abs, - ) - - -def predictor_output_with_coarse_segm_to_mask( - predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType -) -> BitMasks: - """ - Convert predictor output with coarse and fine segmentation to a mask. - Assumes that predictor output has the following attributes: - - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation - unnormalized scores for N instances; D is the number of coarse - segmentation labels, H and W is the resolution of the estimate - - Args: - predictor_output: DensePose predictor output to be converted to mask - boxes (Boxes): bounding boxes that correspond to the DensePose - predictor outputs - image_size_hw (tuple [int, int]): image height Himg and width Wimg - Return: - BitMasks that contain a bool tensor of size [N, Himg, Wimg] with - a mask of the size of the image for each instance - """ - H, W = image_size_hw - boxes_xyxy_abs = boxes.tensor.clone() - boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - N = len(boxes_xywh_abs) - masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device) - for i in range(len(boxes_xywh_abs)): - box_xywh = make_int_box(boxes_xywh_abs[i]) - box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh) - x, y, w, h = box_xywh - masks[i, y : y + h, x : x + w] = box_mask - - return BitMasks(masks) - - -def predictor_output_with_fine_and_coarse_segm_to_mask( - predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType -) -> BitMasks: - """ - Convert predictor output with coarse and fine segmentation to a mask. - Assumes that predictor output has the following attributes: - - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation - unnormalized scores for N instances; D is the number of coarse - segmentation labels, H and W is the resolution of the estimate - - fine_segm (tensor of size [N, C, H, W]): fine segmentation - unnormalized scores for N instances; C is the number of fine - segmentation labels, H and W is the resolution of the estimate - - Args: - predictor_output: DensePose predictor output to be converted to mask - boxes (Boxes): bounding boxes that correspond to the DensePose - predictor outputs - image_size_hw (tuple [int, int]): image height Himg and width Wimg - Return: - BitMasks that contain a bool tensor of size [N, Himg, Wimg] with - a mask of the size of the image for each instance - """ - H, W = image_size_hw - boxes_xyxy_abs = boxes.tensor.clone() - boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - N = len(boxes_xywh_abs) - masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device) - for i in range(len(boxes_xywh_abs)): - box_xywh = make_int_box(boxes_xywh_abs[i]) - labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh) - x, y, w, h = box_xywh - masks[i, y : y + h, x : x + w] = labels_i > 0 - return BitMasks(masks) diff --git a/detectron2/projects/DensePose/densepose/converters/to_chart_result.py b/detectron2/projects/DensePose/densepose/converters/to_chart_result.py deleted file mode 100644 index 82e126a922ff8ac4d8ebc3008f67d3928b982c25..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/converters/to_chart_result.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Any - -from detectron2.structures import Boxes - -from ..structures import DensePoseChartResult, DensePoseChartResultWithConfidences -from .base import BaseConverter - - -class ToChartResultConverter(BaseConverter): - """ - Converts various DensePose predictor outputs to DensePose results. - Each DensePose predictor output type has to register its convertion strategy. - """ - - registry = {} - dst_type = DensePoseChartResult - - @classmethod - # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter` - # inconsistently. - def convert(cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs) -> DensePoseChartResult: - """ - Convert DensePose predictor outputs to DensePoseResult using some registered - converter. Does recursive lookup for base classes, so there's no need - for explicit registration for derived classes. - - Args: - densepose_predictor_outputs: DensePose predictor output to be - converted to BitMasks - boxes (Boxes): bounding boxes that correspond to the DensePose - predictor outputs - Return: - An instance of DensePoseResult. If no suitable converter was found, raises KeyError - """ - return super(ToChartResultConverter, cls).convert(predictor_outputs, boxes, *args, **kwargs) - - -class ToChartResultConverterWithConfidences(BaseConverter): - """ - Converts various DensePose predictor outputs to DensePose results. - Each DensePose predictor output type has to register its convertion strategy. - """ - - registry = {} - dst_type = DensePoseChartResultWithConfidences - - @classmethod - # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter` - # inconsistently. - def convert( - cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs - ) -> DensePoseChartResultWithConfidences: - """ - Convert DensePose predictor outputs to DensePoseResult with confidences - using some registered converter. Does recursive lookup for base classes, - so there's no need for explicit registration for derived classes. - - Args: - densepose_predictor_outputs: DensePose predictor output with confidences - to be converted to BitMasks - boxes (Boxes): bounding boxes that correspond to the DensePose - predictor outputs - Return: - An instance of DensePoseResult. If no suitable converter was found, raises KeyError - """ - return super(ToChartResultConverterWithConfidences, cls).convert( - predictor_outputs, boxes, *args, **kwargs - ) diff --git a/detectron2/projects/DensePose/densepose/converters/to_mask.py b/detectron2/projects/DensePose/densepose/converters/to_mask.py deleted file mode 100644 index 7a47e2a7d7aa5f0d9c41ab46a4f1806184b7b4ba..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/converters/to_mask.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Any, Tuple - -from detectron2.structures import BitMasks, Boxes - -from .base import BaseConverter - -ImageSizeType = Tuple[int, int] - - -class ToMaskConverter(BaseConverter): - """ - Converts various DensePose predictor outputs to masks - in bit mask format (see `BitMasks`). Each DensePose predictor output type - has to register its convertion strategy. - """ - - registry = {} - dst_type = BitMasks - - @classmethod - # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter` - # inconsistently. - def convert( - cls, - densepose_predictor_outputs: Any, - boxes: Boxes, - image_size_hw: ImageSizeType, - *args, - **kwargs - ) -> BitMasks: - """ - Convert DensePose predictor outputs to BitMasks using some registered - converter. Does recursive lookup for base classes, so there's no need - for explicit registration for derived classes. - - Args: - densepose_predictor_outputs: DensePose predictor output to be - converted to BitMasks - boxes (Boxes): bounding boxes that correspond to the DensePose - predictor outputs - image_size_hw (tuple [int, int]): image height and width - Return: - An instance of `BitMasks`. If no suitable converter was found, raises KeyError - """ - return super(ToMaskConverter, cls).convert( - densepose_predictor_outputs, boxes, image_size_hw, *args, **kwargs - ) diff --git a/detectron2/projects/DensePose/densepose/data/__init__.py b/detectron2/projects/DensePose/densepose/data/__init__.py deleted file mode 100644 index 5278887bd723f1606debd3de09b7e3e0ff5b3a03..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .meshes import builtin -from .build import ( - build_detection_test_loader, - build_detection_train_loader, - build_combined_loader, - build_frame_selector, - build_inference_based_loaders, - has_inference_based_loaders, - BootstrapDatasetFactoryCatalog, -) -from .combined_loader import CombinedDataLoader -from .dataset_mapper import DatasetMapper -from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter -from .image_list_dataset import ImageListDataset -from .utils import is_relative_local_path, maybe_prepend_base_path - -# ensure the builtin datasets are registered -from . import datasets - -# ensure the bootstrap datasets builders are registered -from . import build - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/detectron2/projects/DensePose/densepose/data/build.py b/detectron2/projects/DensePose/densepose/data/build.py deleted file mode 100644 index 06e8e8f782e75b27b8bb1ec387dd49ccdae8dbb3..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/build.py +++ /dev/null @@ -1,738 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import itertools -import logging -import numpy as np -from collections import UserDict, defaultdict -from dataclasses import dataclass -from typing import Any, Callable, Collection, Dict, Iterable, List, Optional, Sequence, Tuple -import torch -from torch.utils.data.dataset import Dataset - -from detectron2.config import CfgNode -from detectron2.data.build import build_detection_test_loader as d2_build_detection_test_loader -from detectron2.data.build import build_detection_train_loader as d2_build_detection_train_loader -from detectron2.data.build import ( - load_proposals_into_dataset, - print_instances_class_histogram, - trivial_batch_collator, - worker_init_reset_seed, -) -from detectron2.data.catalog import DatasetCatalog, Metadata, MetadataCatalog -from detectron2.data.samplers import TrainingSampler -from detectron2.utils.comm import get_world_size - -from densepose.config import get_bootstrap_dataset_config -from densepose.modeling import build_densepose_embedder - -from .combined_loader import CombinedDataLoader, Loader -from .dataset_mapper import DatasetMapper -from .datasets.coco import DENSEPOSE_CSE_KEYS_WITHOUT_MASK, DENSEPOSE_IUV_KEYS_WITHOUT_MASK -from .datasets.dataset_type import DatasetType -from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter -from .samplers import ( - DensePoseConfidenceBasedSampler, - DensePoseCSEConfidenceBasedSampler, - DensePoseCSEUniformSampler, - DensePoseUniformSampler, - MaskFromDensePoseSampler, - PredictionToGroundTruthSampler, -) -from .transform import ImageResizeTransform -from .utils import get_category_to_class_mapping, get_class_to_mesh_name_mapping -from .video import ( - FirstKFramesSelector, - FrameSelectionStrategy, - LastKFramesSelector, - RandomKFramesSelector, - VideoKeyframeDataset, - video_list_from_file, -) - -__all__ = ["build_detection_train_loader", "build_detection_test_loader"] - - -Instance = Dict[str, Any] -InstancePredicate = Callable[[Instance], bool] - - -def _compute_num_images_per_worker(cfg: CfgNode) -> int: - num_workers = get_world_size() - images_per_batch = cfg.SOLVER.IMS_PER_BATCH - assert ( - images_per_batch % num_workers == 0 - ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format( - images_per_batch, num_workers - ) - assert ( - images_per_batch >= num_workers - ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format( - images_per_batch, num_workers - ) - images_per_worker = images_per_batch // num_workers - return images_per_worker - - -def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]) -> None: - meta = MetadataCatalog.get(dataset_name) - for dataset_dict in dataset_dicts: - for ann in dataset_dict["annotations"]: - ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]] - - -@dataclass -class _DatasetCategory: - """ - Class representing category data in a dataset: - - id: category ID, as specified in the dataset annotations file - - name: category name, as specified in the dataset annotations file - - mapped_id: category ID after applying category maps (DATASETS.CATEGORY_MAPS config option) - - mapped_name: category name after applying category maps - - dataset_name: dataset in which the category is defined - - For example, when training models in a class-agnostic manner, one could take LVIS 1.0 - dataset and map the animal categories to the same category as human data from COCO: - id = 225 - name = "cat" - mapped_id = 1 - mapped_name = "person" - dataset_name = "lvis_v1_animals_dp_train" - """ - - id: int - name: str - mapped_id: int - mapped_name: str - dataset_name: str - - -_MergedCategoriesT = Dict[int, List[_DatasetCategory]] - - -def _add_category_id_to_contiguous_id_maps_to_metadata( - merged_categories: _MergedCategoriesT, -) -> None: - merged_categories_per_dataset = {} - for contiguous_cat_id, cat_id in enumerate(sorted(merged_categories.keys())): - for cat in merged_categories[cat_id]: - if cat.dataset_name not in merged_categories_per_dataset: - merged_categories_per_dataset[cat.dataset_name] = defaultdict(list) - merged_categories_per_dataset[cat.dataset_name][cat_id].append( - ( - contiguous_cat_id, - cat, - ) - ) - - logger = logging.getLogger(__name__) - for dataset_name, merged_categories in merged_categories_per_dataset.items(): - meta = MetadataCatalog.get(dataset_name) - if not hasattr(meta, "thing_classes"): - meta.thing_classes = [] - meta.thing_dataset_id_to_contiguous_id = {} - meta.thing_dataset_id_to_merged_id = {} - else: - meta.thing_classes.clear() - meta.thing_dataset_id_to_contiguous_id.clear() - meta.thing_dataset_id_to_merged_id.clear() - logger.info(f"Dataset {dataset_name}: category ID to contiguous ID mapping:") - for _cat_id, categories in sorted(merged_categories.items()): - added_to_thing_classes = False - for contiguous_cat_id, cat in categories: - if not added_to_thing_classes: - meta.thing_classes.append(cat.mapped_name) - added_to_thing_classes = True - meta.thing_dataset_id_to_contiguous_id[cat.id] = contiguous_cat_id - meta.thing_dataset_id_to_merged_id[cat.id] = cat.mapped_id - logger.info(f"{cat.id} ({cat.name}) -> {contiguous_cat_id}") - - -def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: - def has_annotations(instance: Instance) -> bool: - return "annotations" in instance - - def has_only_crowd_anotations(instance: Instance) -> bool: - for ann in instance["annotations"]: - if ann.get("is_crowd", 0) == 0: - return False - return True - - def general_keep_instance_predicate(instance: Instance) -> bool: - return has_annotations(instance) and not has_only_crowd_anotations(instance) - - if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS: - return None - return general_keep_instance_predicate - - -def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: - - min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE - - def has_sufficient_num_keypoints(instance: Instance) -> bool: - num_kpts = sum( - (np.array(ann["keypoints"][2::3]) > 0).sum() - for ann in instance["annotations"] - if "keypoints" in ann - ) - return num_kpts >= min_num_keypoints - - if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0): - return has_sufficient_num_keypoints - return None - - -def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: - if not cfg.MODEL.MASK_ON: - return None - - def has_mask_annotations(instance: Instance) -> bool: - return any("segmentation" in ann for ann in instance["annotations"]) - - return has_mask_annotations - - -def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: - if not cfg.MODEL.DENSEPOSE_ON: - return None - - use_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS - - def has_densepose_annotations(instance: Instance) -> bool: - for ann in instance["annotations"]: - if all(key in ann for key in DENSEPOSE_IUV_KEYS_WITHOUT_MASK) or all( - key in ann for key in DENSEPOSE_CSE_KEYS_WITHOUT_MASK - ): - return True - if use_masks and "segmentation" in ann: - return True - return False - - return has_densepose_annotations - - -def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: - specific_predicate_creators = [ - _maybe_create_keypoints_keep_instance_predicate, - _maybe_create_mask_keep_instance_predicate, - _maybe_create_densepose_keep_instance_predicate, - ] - predicates = [creator(cfg) for creator in specific_predicate_creators] - predicates = [p for p in predicates if p is not None] - if not predicates: - return None - - def combined_predicate(instance: Instance) -> bool: - return any(p(instance) for p in predicates) - - return combined_predicate - - -def _get_train_keep_instance_predicate(cfg: CfgNode): - general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg) - combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg) - - def combined_general_specific_keep_predicate(instance: Instance) -> bool: - return general_keep_predicate(instance) and combined_specific_keep_predicate(instance) - - if (general_keep_predicate is None) and (combined_specific_keep_predicate is None): - return None - if general_keep_predicate is None: - return combined_specific_keep_predicate - if combined_specific_keep_predicate is None: - return general_keep_predicate - return combined_general_specific_keep_predicate - - -def _get_test_keep_instance_predicate(cfg: CfgNode): - general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg) - return general_keep_predicate - - -def _maybe_filter_and_map_categories( - dataset_name: str, dataset_dicts: List[Instance] -) -> List[Instance]: - meta = MetadataCatalog.get(dataset_name) - category_id_map = meta.thing_dataset_id_to_contiguous_id - filtered_dataset_dicts = [] - for dataset_dict in dataset_dicts: - anns = [] - for ann in dataset_dict["annotations"]: - cat_id = ann["category_id"] - if cat_id not in category_id_map: - continue - ann["category_id"] = category_id_map[cat_id] - anns.append(ann) - dataset_dict["annotations"] = anns - filtered_dataset_dicts.append(dataset_dict) - return filtered_dataset_dicts - - -def _add_category_whitelists_to_metadata(cfg: CfgNode) -> None: - for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items(): - meta = MetadataCatalog.get(dataset_name) - meta.whitelisted_categories = whitelisted_cat_ids - logger = logging.getLogger(__name__) - logger.info( - "Whitelisted categories for dataset {}: {}".format( - dataset_name, meta.whitelisted_categories - ) - ) - - -def _add_category_maps_to_metadata(cfg: CfgNode) -> None: - for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items(): - category_map = { - int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items() - } - meta = MetadataCatalog.get(dataset_name) - meta.category_map = category_map - logger = logging.getLogger(__name__) - logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map)) - - -def _add_category_info_to_bootstrapping_metadata(dataset_name: str, dataset_cfg: CfgNode) -> None: - meta = MetadataCatalog.get(dataset_name) - meta.category_to_class_mapping = get_category_to_class_mapping(dataset_cfg) - meta.categories = dataset_cfg.CATEGORIES - meta.max_count_per_category = dataset_cfg.MAX_COUNT_PER_CATEGORY - logger = logging.getLogger(__name__) - logger.info( - "Category to class mapping for dataset {}: {}".format( - dataset_name, meta.category_to_class_mapping - ) - ) - - -def _maybe_add_class_to_mesh_name_map_to_metadata(dataset_names: List[str], cfg: CfgNode) -> None: - for dataset_name in dataset_names: - meta = MetadataCatalog.get(dataset_name) - if not hasattr(meta, "class_to_mesh_name"): - meta.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg) - - -def _merge_categories(dataset_names: Collection[str]) -> _MergedCategoriesT: - merged_categories = defaultdict(list) - category_names = {} - for dataset_name in dataset_names: - meta = MetadataCatalog.get(dataset_name) - whitelisted_categories = meta.get("whitelisted_categories") - category_map = meta.get("category_map", {}) - cat_ids = ( - whitelisted_categories if whitelisted_categories is not None else meta.categories.keys() - ) - for cat_id in cat_ids: - cat_name = meta.categories[cat_id] - cat_id_mapped = category_map.get(cat_id, cat_id) - if cat_id_mapped == cat_id or cat_id_mapped in cat_ids: - category_names[cat_id] = cat_name - else: - category_names[cat_id] = str(cat_id_mapped) - # assign temporary mapped category name, this name can be changed - # during the second pass, since mapped ID can correspond to a category - # from a different dataset - cat_name_mapped = meta.categories[cat_id_mapped] - merged_categories[cat_id_mapped].append( - _DatasetCategory( - id=cat_id, - name=cat_name, - mapped_id=cat_id_mapped, - mapped_name=cat_name_mapped, - dataset_name=dataset_name, - ) - ) - # second pass to assign proper mapped category names - for cat_id, categories in merged_categories.items(): - for cat in categories: - if cat_id in category_names and cat.mapped_name != category_names[cat_id]: - cat.mapped_name = category_names[cat_id] - - return merged_categories - - -def _warn_if_merged_different_categories(merged_categories: _MergedCategoriesT) -> None: - logger = logging.getLogger(__name__) - for cat_id in merged_categories: - merged_categories_i = merged_categories[cat_id] - first_cat_name = merged_categories_i[0].name - if len(merged_categories_i) > 1 and not all( - cat.name == first_cat_name for cat in merged_categories_i[1:] - ): - cat_summary_str = ", ".join( - [f"{cat.id} ({cat.name}) from {cat.dataset_name}" for cat in merged_categories_i] - ) - logger.warning( - f"Merged category {cat_id} corresponds to the following categories: " - f"{cat_summary_str}" - ) - - -def combine_detection_dataset_dicts( - dataset_names: Collection[str], - keep_instance_predicate: Optional[InstancePredicate] = None, - proposal_files: Optional[Collection[str]] = None, -) -> List[Instance]: - """ - Load and prepare dataset dicts for training / testing - - Args: - dataset_names (Collection[str]): a list of dataset names - keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate - applied to instance dicts which defines whether to keep the instance - proposal_files (Collection[str]): if given, a list of object proposal files - that match each dataset in `dataset_names`. - """ - assert len(dataset_names) - if proposal_files is None: - proposal_files = [None] * len(dataset_names) - assert len(dataset_names) == len(proposal_files) - # load datasets and metadata - dataset_name_to_dicts = {} - for dataset_name in dataset_names: - dataset_name_to_dicts[dataset_name] = DatasetCatalog.get(dataset_name) - assert len(dataset_name_to_dicts), f"Dataset '{dataset_name}' is empty!" - # merge categories, requires category metadata to be loaded - # cat_id -> [(orig_cat_id, cat_name, dataset_name)] - merged_categories = _merge_categories(dataset_names) - _warn_if_merged_different_categories(merged_categories) - merged_category_names = [ - merged_categories[cat_id][0].mapped_name for cat_id in sorted(merged_categories) - ] - # map to contiguous category IDs - _add_category_id_to_contiguous_id_maps_to_metadata(merged_categories) - # load annotations and dataset metadata - for dataset_name, proposal_file in zip(dataset_names, proposal_files): - dataset_dicts = dataset_name_to_dicts[dataset_name] - assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!" - if proposal_file is not None: - dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file) - dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts) - print_instances_class_histogram(dataset_dicts, merged_category_names) - dataset_name_to_dicts[dataset_name] = dataset_dicts - - if keep_instance_predicate is not None: - all_datasets_dicts_plain = [ - d - for d in itertools.chain.from_iterable(dataset_name_to_dicts.values()) - if keep_instance_predicate(d) - ] - else: - all_datasets_dicts_plain = list( - itertools.chain.from_iterable(dataset_name_to_dicts.values()) - ) - return all_datasets_dicts_plain - - -def build_detection_train_loader(cfg: CfgNode, mapper=None): - """ - A data loader is created in a way similar to that of Detectron2. - The main differences are: - - it allows to combine datasets with different but compatible object category sets - - The data loader is created by the following steps: - 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts. - 2. Start workers to work on the dicts. Each worker will: - * Map each metadata dict into another format to be consumed by the model. - * Batch them by simply putting dicts into a list. - The batched ``list[mapped_dict]`` is what this dataloader will return. - - Args: - cfg (CfgNode): the config - mapper (callable): a callable which takes a sample (dict) from dataset and - returns the format to be consumed by the model. - By default it will be `DatasetMapper(cfg, True)`. - - Returns: - an infinite iterator of training data - """ - - _add_category_whitelists_to_metadata(cfg) - _add_category_maps_to_metadata(cfg) - _maybe_add_class_to_mesh_name_map_to_metadata(cfg.DATASETS.TRAIN, cfg) - dataset_dicts = combine_detection_dataset_dicts( - cfg.DATASETS.TRAIN, - keep_instance_predicate=_get_train_keep_instance_predicate(cfg), - proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, - ) - if mapper is None: - mapper = DatasetMapper(cfg, True) - return d2_build_detection_train_loader(cfg, dataset=dataset_dicts, mapper=mapper) - - -def build_detection_test_loader(cfg, dataset_name, mapper=None): - """ - Similar to `build_detection_train_loader`. - But this function uses the given `dataset_name` argument (instead of the names in cfg), - and uses batch size 1. - - Args: - cfg: a detectron2 CfgNode - dataset_name (str): a name of the dataset that's available in the DatasetCatalog - mapper (callable): a callable which takes a sample (dict) from dataset - and returns the format to be consumed by the model. - By default it will be `DatasetMapper(cfg, False)`. - - Returns: - DataLoader: a torch DataLoader, that loads the given detection - dataset, with test-time transformation and batching. - """ - _add_category_whitelists_to_metadata(cfg) - _add_category_maps_to_metadata(cfg) - _maybe_add_class_to_mesh_name_map_to_metadata([dataset_name], cfg) - dataset_dicts = combine_detection_dataset_dicts( - [dataset_name], - keep_instance_predicate=_get_test_keep_instance_predicate(cfg), - proposal_files=( - [cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]] - if cfg.MODEL.LOAD_PROPOSALS - else None - ), - ) - sampler = None - if not cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE: - sampler = torch.utils.data.SequentialSampler(dataset_dicts) - if mapper is None: - mapper = DatasetMapper(cfg, False) - return d2_build_detection_test_loader( - dataset_dicts, mapper=mapper, num_workers=cfg.DATALOADER.NUM_WORKERS, sampler=sampler - ) - - -def build_frame_selector(cfg: CfgNode): - strategy = FrameSelectionStrategy(cfg.STRATEGY) - if strategy == FrameSelectionStrategy.RANDOM_K: - frame_selector = RandomKFramesSelector(cfg.NUM_IMAGES) - elif strategy == FrameSelectionStrategy.FIRST_K: - frame_selector = FirstKFramesSelector(cfg.NUM_IMAGES) - elif strategy == FrameSelectionStrategy.LAST_K: - frame_selector = LastKFramesSelector(cfg.NUM_IMAGES) - elif strategy == FrameSelectionStrategy.ALL: - frame_selector = None - # pyre-fixme[61]: `frame_selector` may not be initialized here. - return frame_selector - - -def build_transform(cfg: CfgNode, data_type: str): - if cfg.TYPE == "resize": - if data_type == "image": - return ImageResizeTransform(cfg.MIN_SIZE, cfg.MAX_SIZE) - raise ValueError(f"Unknown transform {cfg.TYPE} for data type {data_type}") - - -def build_combined_loader(cfg: CfgNode, loaders: Collection[Loader], ratios: Sequence[float]): - images_per_worker = _compute_num_images_per_worker(cfg) - return CombinedDataLoader(loaders, images_per_worker, ratios) - - -def build_bootstrap_dataset(dataset_name: str, cfg: CfgNode) -> Sequence[torch.Tensor]: - """ - Build dataset that provides data to bootstrap on - - Args: - dataset_name (str): Name of the dataset, needs to have associated metadata - to load the data - cfg (CfgNode): bootstrapping config - Returns: - Sequence[Tensor] - dataset that provides image batches, Tensors of size - [N, C, H, W] of type float32 - """ - logger = logging.getLogger(__name__) - _add_category_info_to_bootstrapping_metadata(dataset_name, cfg) - meta = MetadataCatalog.get(dataset_name) - factory = BootstrapDatasetFactoryCatalog.get(meta.dataset_type) - dataset = None - if factory is not None: - dataset = factory(meta, cfg) - if dataset is None: - logger.warning(f"Failed to create dataset {dataset_name} of type {meta.dataset_type}") - return dataset - - -def build_data_sampler(cfg: CfgNode, sampler_cfg: CfgNode, embedder: Optional[torch.nn.Module]): - if sampler_cfg.TYPE == "densepose_uniform": - data_sampler = PredictionToGroundTruthSampler() - # transform densepose pred -> gt - data_sampler.register_sampler( - "pred_densepose", - "gt_densepose", - DensePoseUniformSampler(count_per_class=sampler_cfg.COUNT_PER_CLASS), - ) - data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler()) - return data_sampler - elif sampler_cfg.TYPE == "densepose_UV_confidence": - data_sampler = PredictionToGroundTruthSampler() - # transform densepose pred -> gt - data_sampler.register_sampler( - "pred_densepose", - "gt_densepose", - DensePoseConfidenceBasedSampler( - confidence_channel="sigma_2", - count_per_class=sampler_cfg.COUNT_PER_CLASS, - search_proportion=0.5, - ), - ) - data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler()) - return data_sampler - elif sampler_cfg.TYPE == "densepose_fine_segm_confidence": - data_sampler = PredictionToGroundTruthSampler() - # transform densepose pred -> gt - data_sampler.register_sampler( - "pred_densepose", - "gt_densepose", - DensePoseConfidenceBasedSampler( - confidence_channel="fine_segm_confidence", - count_per_class=sampler_cfg.COUNT_PER_CLASS, - search_proportion=0.5, - ), - ) - data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler()) - return data_sampler - elif sampler_cfg.TYPE == "densepose_coarse_segm_confidence": - data_sampler = PredictionToGroundTruthSampler() - # transform densepose pred -> gt - data_sampler.register_sampler( - "pred_densepose", - "gt_densepose", - DensePoseConfidenceBasedSampler( - confidence_channel="coarse_segm_confidence", - count_per_class=sampler_cfg.COUNT_PER_CLASS, - search_proportion=0.5, - ), - ) - data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler()) - return data_sampler - elif sampler_cfg.TYPE == "densepose_cse_uniform": - assert embedder is not None - data_sampler = PredictionToGroundTruthSampler() - # transform densepose pred -> gt - data_sampler.register_sampler( - "pred_densepose", - "gt_densepose", - DensePoseCSEUniformSampler( - cfg=cfg, - use_gt_categories=sampler_cfg.USE_GROUND_TRUTH_CATEGORIES, - embedder=embedder, - count_per_class=sampler_cfg.COUNT_PER_CLASS, - ), - ) - data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler()) - return data_sampler - elif sampler_cfg.TYPE == "densepose_cse_coarse_segm_confidence": - assert embedder is not None - data_sampler = PredictionToGroundTruthSampler() - # transform densepose pred -> gt - data_sampler.register_sampler( - "pred_densepose", - "gt_densepose", - DensePoseCSEConfidenceBasedSampler( - cfg=cfg, - use_gt_categories=sampler_cfg.USE_GROUND_TRUTH_CATEGORIES, - embedder=embedder, - confidence_channel="coarse_segm_confidence", - count_per_class=sampler_cfg.COUNT_PER_CLASS, - search_proportion=0.5, - ), - ) - data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler()) - return data_sampler - - raise ValueError(f"Unknown data sampler type {sampler_cfg.TYPE}") - - -def build_data_filter(cfg: CfgNode): - if cfg.TYPE == "detection_score": - min_score = cfg.MIN_VALUE - return ScoreBasedFilter(min_score=min_score) - raise ValueError(f"Unknown data filter type {cfg.TYPE}") - - -def build_inference_based_loader( - cfg: CfgNode, - dataset_cfg: CfgNode, - model: torch.nn.Module, - embedder: Optional[torch.nn.Module] = None, -) -> InferenceBasedLoader: - """ - Constructs data loader based on inference results of a model. - """ - dataset = build_bootstrap_dataset(dataset_cfg.DATASET, dataset_cfg.IMAGE_LOADER) - meta = MetadataCatalog.get(dataset_cfg.DATASET) - training_sampler = TrainingSampler(len(dataset)) - data_loader = torch.utils.data.DataLoader( - dataset, # pyre-ignore[6] - batch_size=dataset_cfg.IMAGE_LOADER.BATCH_SIZE, - sampler=training_sampler, - num_workers=dataset_cfg.IMAGE_LOADER.NUM_WORKERS, - collate_fn=trivial_batch_collator, - worker_init_fn=worker_init_reset_seed, - ) - return InferenceBasedLoader( - model, - data_loader=data_loader, - data_sampler=build_data_sampler(cfg, dataset_cfg.DATA_SAMPLER, embedder), - data_filter=build_data_filter(dataset_cfg.FILTER), - shuffle=True, - batch_size=dataset_cfg.INFERENCE.OUTPUT_BATCH_SIZE, - inference_batch_size=dataset_cfg.INFERENCE.INPUT_BATCH_SIZE, - category_to_class_mapping=meta.category_to_class_mapping, - ) - - -def has_inference_based_loaders(cfg: CfgNode) -> bool: - """ - Returns True, if at least one inferense-based loader must - be instantiated for training - """ - return len(cfg.BOOTSTRAP_DATASETS) > 0 - - -def build_inference_based_loaders( - cfg: CfgNode, model: torch.nn.Module -) -> Tuple[List[InferenceBasedLoader], List[float]]: - loaders = [] - ratios = [] - embedder = build_densepose_embedder(cfg).to(device=model.device) # pyre-ignore[16] - for dataset_spec in cfg.BOOTSTRAP_DATASETS: - dataset_cfg = get_bootstrap_dataset_config().clone() - dataset_cfg.merge_from_other_cfg(CfgNode(dataset_spec)) - loader = build_inference_based_loader(cfg, dataset_cfg, model, embedder) - loaders.append(loader) - ratios.append(dataset_cfg.RATIO) - return loaders, ratios - - -def build_video_list_dataset(meta: Metadata, cfg: CfgNode): - video_list_fpath = meta.video_list_fpath - video_base_path = meta.video_base_path - category = meta.category - if cfg.TYPE == "video_keyframe": - frame_selector = build_frame_selector(cfg.SELECT) - transform = build_transform(cfg.TRANSFORM, data_type="image") - video_list = video_list_from_file(video_list_fpath, video_base_path) - keyframe_helper_fpath = getattr(cfg, "KEYFRAME_HELPER", None) - return VideoKeyframeDataset( - video_list, category, frame_selector, transform, keyframe_helper_fpath - ) - - -class _BootstrapDatasetFactoryCatalog(UserDict): - """ - A global dictionary that stores information about bootstrapped datasets creation functions - from metadata and config, for diverse DatasetType - """ - - def register(self, dataset_type: DatasetType, factory: Callable[[Metadata, CfgNode], Dataset]): - """ - Args: - dataset_type (DatasetType): a DatasetType e.g. DatasetType.VIDEO_LIST - factory (Callable[Metadata, CfgNode]): a callable which takes Metadata and cfg - arguments and returns a dataset object. - """ - assert dataset_type not in self, "Dataset '{}' is already registered!".format(dataset_type) - self[dataset_type] = factory - - -BootstrapDatasetFactoryCatalog = _BootstrapDatasetFactoryCatalog() -BootstrapDatasetFactoryCatalog.register(DatasetType.VIDEO_LIST, build_video_list_dataset) diff --git a/detectron2/projects/DensePose/densepose/data/combined_loader.py b/detectron2/projects/DensePose/densepose/data/combined_loader.py deleted file mode 100644 index c038c23a3b436b1cc6c29427c8dbf940f56250c9..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/combined_loader.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import random -from collections import deque -from typing import Any, Collection, Deque, Iterable, Iterator, List, Sequence - -Loader = Iterable[Any] - - -def _pooled_next(iterator: Iterator[Any], pool: Deque[Any]): - if not pool: - pool.extend(next(iterator)) - return pool.popleft() - - -class CombinedDataLoader: - """ - Combines data loaders using the provided sampling ratios - """ - - BATCH_COUNT = 100 - - def __init__(self, loaders: Collection[Loader], batch_size: int, ratios: Sequence[float]): - self.loaders = loaders - self.batch_size = batch_size - self.ratios = ratios - - def __iter__(self) -> Iterator[List[Any]]: - iters = [iter(loader) for loader in self.loaders] - indices = [] - pool = [deque()] * len(iters) - # infinite iterator, as in D2 - while True: - if not indices: - # just a buffer of indices, its size doesn't matter - # as long as it's a multiple of batch_size - k = self.batch_size * self.BATCH_COUNT - indices = random.choices(range(len(self.loaders)), self.ratios, k=k) - try: - batch = [_pooled_next(iters[i], pool[i]) for i in indices[: self.batch_size]] - except StopIteration: - break - indices = indices[self.batch_size :] - yield batch diff --git a/detectron2/projects/DensePose/densepose/data/dataset_mapper.py b/detectron2/projects/DensePose/densepose/data/dataset_mapper.py deleted file mode 100644 index 5537a94c0811f7f6849f534612222e8dc154b59d..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/dataset_mapper.py +++ /dev/null @@ -1,170 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import copy -import logging -from typing import Any, Dict, List, Tuple -import torch - -from detectron2.data import MetadataCatalog -from detectron2.data import detection_utils as utils -from detectron2.data import transforms as T -from detectron2.layers import ROIAlign -from detectron2.structures import BoxMode -from detectron2.utils.file_io import PathManager - -from densepose.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData - - -def build_augmentation(cfg, is_train): - logger = logging.getLogger(__name__) - result = utils.build_augmentation(cfg, is_train) - if is_train: - random_rotation = T.RandomRotation( - cfg.INPUT.ROTATION_ANGLES, expand=False, sample_style="choice" - ) - result.append(random_rotation) - logger.info("DensePose-specific augmentation used in training: " + str(random_rotation)) - return result - - -class DatasetMapper: - """ - A customized version of `detectron2.data.DatasetMapper` - """ - - def __init__(self, cfg, is_train=True): - self.augmentation = build_augmentation(cfg, is_train) - - # fmt: off - self.img_format = cfg.INPUT.FORMAT - self.mask_on = ( - cfg.MODEL.MASK_ON or ( - cfg.MODEL.DENSEPOSE_ON - and cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS) - ) - self.keypoint_on = cfg.MODEL.KEYPOINT_ON - self.densepose_on = cfg.MODEL.DENSEPOSE_ON - assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet" - # fmt: on - if self.keypoint_on and is_train: - # Flip only makes sense in training - self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) - else: - self.keypoint_hflip_indices = None - - if self.densepose_on: - densepose_transform_srcs = [ - MetadataCatalog.get(ds).densepose_transform_src - for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST - ] - assert len(densepose_transform_srcs) > 0 - # TODO: check that DensePose transformation data is the same for - # all the datasets. Otherwise one would have to pass DB ID with - # each entry to select proper transformation data. For now, since - # all DensePose annotated data uses the same data semantics, we - # omit this check. - densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0]) - self.densepose_transform_data = DensePoseTransformData.load( - densepose_transform_data_fpath - ) - - self.is_train = is_train - - def __call__(self, dataset_dict): - """ - Args: - dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. - - Returns: - dict: a format that builtin models in detectron2 accept - """ - dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below - image = utils.read_image(dataset_dict["file_name"], format=self.img_format) - utils.check_image_size(dataset_dict, image) - - image, transforms = T.apply_transform_gens(self.augmentation, image) - image_shape = image.shape[:2] # h, w - dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) - - if not self.is_train: - dataset_dict.pop("annotations", None) - return dataset_dict - - for anno in dataset_dict["annotations"]: - if not self.mask_on: - anno.pop("segmentation", None) - if not self.keypoint_on: - anno.pop("keypoints", None) - - # USER: Implement additional transformations if you have other types of data - # USER: Don't call transpose_densepose if you don't need - annos = [ - self._transform_densepose( - utils.transform_instance_annotations( - obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices - ), - transforms, - ) - for obj in dataset_dict.pop("annotations") - if obj.get("iscrowd", 0) == 0 - ] - - if self.mask_on: - self._add_densepose_masks_as_segmentation(annos, image_shape) - - instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask") - densepose_annotations = [obj.get("densepose") for obj in annos] - if densepose_annotations and not all(v is None for v in densepose_annotations): - instances.gt_densepose = DensePoseList( - densepose_annotations, instances.gt_boxes, image_shape - ) - - dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] - return dataset_dict - - def _transform_densepose(self, annotation, transforms): - if not self.densepose_on: - return annotation - - # Handle densepose annotations - is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation) - if is_valid: - densepose_data = DensePoseDataRelative(annotation, cleanup=True) - densepose_data.apply_transform(transforms, self.densepose_transform_data) - annotation["densepose"] = densepose_data - else: - # logger = logging.getLogger(__name__) - # logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid)) - DensePoseDataRelative.cleanup_annotation(annotation) - # NOTE: annotations for certain instances may be unavailable. - # 'None' is accepted by the DensePostList data structure. - annotation["densepose"] = None - return annotation - - def _add_densepose_masks_as_segmentation( - self, annotations: List[Dict[str, Any]], image_shape_hw: Tuple[int, int] - ): - for obj in annotations: - if ("densepose" not in obj) or ("segmentation" in obj): - continue - # DP segmentation: torch.Tensor [S, S] of float32, S=256 - segm_dp = torch.zeros_like(obj["densepose"].segm) - segm_dp[obj["densepose"].segm > 0] = 1 - segm_h, segm_w = segm_dp.shape - bbox_segm_dp = torch.tensor((0, 0, segm_h - 1, segm_w - 1), dtype=torch.float32) - # image bbox - x0, y0, x1, y1 = ( - v.item() for v in BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) - ) - segm_aligned = ( - ROIAlign((y1 - y0, x1 - x0), 1.0, 0, aligned=True) - .forward(segm_dp.view(1, 1, *segm_dp.shape), bbox_segm_dp) - .squeeze() - ) - image_mask = torch.zeros(*image_shape_hw, dtype=torch.float32) - image_mask[y0:y1, x0:x1] = segm_aligned - # segmentation for BitMask: np.array [H, W] of bool - obj["segmentation"] = image_mask >= 0.5 diff --git a/detectron2/projects/DensePose/densepose/data/datasets/__init__.py b/detectron2/projects/DensePose/densepose/data/datasets/__init__.py deleted file mode 100644 index ccf0cf1c1dd2e21e096bd7c849150d9c261b9b4f..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/datasets/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from . import builtin # ensure the builtin datasets are registered - -__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")] diff --git a/detectron2/projects/DensePose/densepose/data/datasets/builtin.py b/detectron2/projects/DensePose/densepose/data/datasets/builtin.py deleted file mode 100644 index 759c295e064b29c7968ec7db5e78d3d4de033578..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/datasets/builtin.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -from .chimpnsee import register_dataset as register_chimpnsee_dataset -from .coco import BASE_DATASETS as BASE_COCO_DATASETS -from .coco import DATASETS as COCO_DATASETS -from .coco import register_datasets as register_coco_datasets -from .lvis import DATASETS as LVIS_DATASETS -from .lvis import register_datasets as register_lvis_datasets - -DEFAULT_DATASETS_ROOT = "datasets" - - -register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT) -register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT) -register_lvis_datasets(LVIS_DATASETS, DEFAULT_DATASETS_ROOT) - -register_chimpnsee_dataset(DEFAULT_DATASETS_ROOT) # pyre-ignore[19] diff --git a/detectron2/projects/DensePose/densepose/data/datasets/chimpnsee.py b/detectron2/projects/DensePose/densepose/data/datasets/chimpnsee.py deleted file mode 100644 index 7a0ee3768597f730f8230f52807a953148350f16..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/datasets/chimpnsee.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Optional - -from detectron2.data import DatasetCatalog, MetadataCatalog - -from ..utils import maybe_prepend_base_path -from .dataset_type import DatasetType - -CHIMPNSEE_DATASET_NAME = "chimpnsee" - - -def register_dataset(datasets_root: Optional[str] = None) -> None: - def empty_load_callback(): - pass - - video_list_fpath = maybe_prepend_base_path( - datasets_root, - "chimpnsee/cdna.eva.mpg.de/video_list.txt", - ) - video_base_path = maybe_prepend_base_path(datasets_root, "chimpnsee/cdna.eva.mpg.de") - - DatasetCatalog.register(CHIMPNSEE_DATASET_NAME, empty_load_callback) - MetadataCatalog.get(CHIMPNSEE_DATASET_NAME).set( - dataset_type=DatasetType.VIDEO_LIST, - video_list_fpath=video_list_fpath, - video_base_path=video_base_path, - category="chimpanzee", - ) diff --git a/detectron2/projects/DensePose/densepose/data/datasets/coco.py b/detectron2/projects/DensePose/densepose/data/datasets/coco.py deleted file mode 100644 index 47c9a5e1dc7422a970b7804277f9ba07841bc714..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/datasets/coco.py +++ /dev/null @@ -1,434 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import contextlib -import io -import logging -import os -from collections import defaultdict -from dataclasses import dataclass -from typing import Any, Dict, Iterable, List, Optional -from fvcore.common.timer import Timer - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode -from detectron2.utils.file_io import PathManager - -from ..utils import maybe_prepend_base_path - -DENSEPOSE_MASK_KEY = "dp_masks" -DENSEPOSE_IUV_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"] -DENSEPOSE_CSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_vertex", "ref_model"] -DENSEPOSE_ALL_POSSIBLE_KEYS = set( - DENSEPOSE_IUV_KEYS_WITHOUT_MASK + DENSEPOSE_CSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY] -) -DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/" - - -@dataclass -class CocoDatasetInfo: - name: str - images_root: str - annotations_fpath: str - - -DATASETS = [ - CocoDatasetInfo( - name="densepose_coco_2014_train", - images_root="coco/train2014", - annotations_fpath="coco/annotations/densepose_train2014.json", - ), - CocoDatasetInfo( - name="densepose_coco_2014_minival", - images_root="coco/val2014", - annotations_fpath="coco/annotations/densepose_minival2014.json", - ), - CocoDatasetInfo( - name="densepose_coco_2014_minival_100", - images_root="coco/val2014", - annotations_fpath="coco/annotations/densepose_minival2014_100.json", - ), - CocoDatasetInfo( - name="densepose_coco_2014_valminusminival", - images_root="coco/val2014", - annotations_fpath="coco/annotations/densepose_valminusminival2014.json", - ), - CocoDatasetInfo( - name="densepose_coco_2014_train_cse", - images_root="coco/train2014", - annotations_fpath="coco_cse/densepose_train2014_cse.json", - ), - CocoDatasetInfo( - name="densepose_coco_2014_minival_cse", - images_root="coco/val2014", - annotations_fpath="coco_cse/densepose_minival2014_cse.json", - ), - CocoDatasetInfo( - name="densepose_coco_2014_minival_100_cse", - images_root="coco/val2014", - annotations_fpath="coco_cse/densepose_minival2014_100_cse.json", - ), - CocoDatasetInfo( - name="densepose_coco_2014_valminusminival_cse", - images_root="coco/val2014", - annotations_fpath="coco_cse/densepose_valminusminival2014_cse.json", - ), - CocoDatasetInfo( - name="densepose_chimps", - images_root="densepose_chimps/images", - annotations_fpath="densepose_chimps/densepose_chimps_densepose.json", - ), - CocoDatasetInfo( - name="densepose_chimps_cse_train", - images_root="densepose_chimps/images", - annotations_fpath="densepose_chimps/densepose_chimps_cse_train.json", - ), - CocoDatasetInfo( - name="densepose_chimps_cse_val", - images_root="densepose_chimps/images", - annotations_fpath="densepose_chimps/densepose_chimps_cse_val.json", - ), - CocoDatasetInfo( - name="posetrack2017_train", - images_root="posetrack2017/posetrack_data_2017", - annotations_fpath="posetrack2017/densepose_posetrack_train2017.json", - ), - CocoDatasetInfo( - name="posetrack2017_val", - images_root="posetrack2017/posetrack_data_2017", - annotations_fpath="posetrack2017/densepose_posetrack_val2017.json", - ), - CocoDatasetInfo( - name="lvis_v05_train", - images_root="coco/train2017", - annotations_fpath="lvis/lvis_v0.5_plus_dp_train.json", - ), - CocoDatasetInfo( - name="lvis_v05_val", - images_root="coco/val2017", - annotations_fpath="lvis/lvis_v0.5_plus_dp_val.json", - ), -] - - -BASE_DATASETS = [ - CocoDatasetInfo( - name="base_coco_2017_train", - images_root="coco/train2017", - annotations_fpath="coco/annotations/instances_train2017.json", - ), - CocoDatasetInfo( - name="base_coco_2017_val", - images_root="coco/val2017", - annotations_fpath="coco/annotations/instances_val2017.json", - ), - CocoDatasetInfo( - name="base_coco_2017_val_100", - images_root="coco/val2017", - annotations_fpath="coco/annotations/instances_val2017_100.json", - ), -] - - -def get_metadata(base_path: Optional[str]) -> Dict[str, Any]: - """ - Returns metadata associated with COCO DensePose datasets - - Args: - base_path: Optional[str] - Base path used to load metadata from - - Returns: - Dict[str, Any] - Metadata in the form of a dictionary - """ - meta = { - "densepose_transform_src": maybe_prepend_base_path(base_path, "UV_symmetry_transforms.mat"), - "densepose_smpl_subdiv": maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"), - "densepose_smpl_subdiv_transform": maybe_prepend_base_path( - base_path, - "SMPL_SUBDIV_TRANSFORM.mat", - ), - } - return meta - - -def _load_coco_annotations(json_file: str): - """ - Load COCO annotations from a JSON file - - Args: - json_file: str - Path to the file to load annotations from - Returns: - Instance of `pycocotools.coco.COCO` that provides access to annotations - data - """ - from pycocotools.coco import COCO - - logger = logging.getLogger(__name__) - timer = Timer() - with contextlib.redirect_stdout(io.StringIO()): - coco_api = COCO(json_file) - if timer.seconds() > 1: - logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) - return coco_api - - -def _add_categories_metadata(dataset_name: str, categories: List[Dict[str, Any]]): - meta = MetadataCatalog.get(dataset_name) - meta.categories = {c["id"]: c["name"] for c in categories} - logger = logging.getLogger(__name__) - logger.info("Dataset {} categories: {}".format(dataset_name, meta.categories)) - - -def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]): - if "minival" in json_file: - # Skip validation on COCO2014 valminusminival and minival annotations - # The ratio of buggy annotations there is tiny and does not affect accuracy - # Therefore we explicitly white-list them - return - ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] - assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( - json_file - ) - - -def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]): - if "bbox" not in ann_dict: - return - obj["bbox"] = ann_dict["bbox"] - obj["bbox_mode"] = BoxMode.XYWH_ABS - - -def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]): - if "segmentation" not in ann_dict: - return - segm = ann_dict["segmentation"] - if not isinstance(segm, dict): - # filter out invalid polygons (< 3 points) - segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] - if len(segm) == 0: - return - obj["segmentation"] = segm - - -def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]): - if "keypoints" not in ann_dict: - return - keypts = ann_dict["keypoints"] # list[int] - for idx, v in enumerate(keypts): - if idx % 3 != 2: - # COCO's segmentation coordinates are floating points in [0, H or W], - # but keypoint coordinates are integers in [0, H-1 or W-1] - # Therefore we assume the coordinates are "pixel indices" and - # add 0.5 to convert to floating point coordinates. - keypts[idx] = v + 0.5 - obj["keypoints"] = keypts - - -def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]): - for key in DENSEPOSE_ALL_POSSIBLE_KEYS: - if key in ann_dict: - obj[key] = ann_dict[key] - - -def _combine_images_with_annotations( - dataset_name: str, - image_root: str, - img_datas: Iterable[Dict[str, Any]], - ann_datas: Iterable[Iterable[Dict[str, Any]]], -): - - ann_keys = ["iscrowd", "category_id"] - dataset_dicts = [] - contains_video_frame_info = False - - for img_dict, ann_dicts in zip(img_datas, ann_datas): - record = {} - record["file_name"] = os.path.join(image_root, img_dict["file_name"]) - record["height"] = img_dict["height"] - record["width"] = img_dict["width"] - record["image_id"] = img_dict["id"] - record["dataset"] = dataset_name - if "frame_id" in img_dict: - record["frame_id"] = img_dict["frame_id"] - record["video_id"] = img_dict.get("vid_id", None) - contains_video_frame_info = True - objs = [] - for ann_dict in ann_dicts: - assert ann_dict["image_id"] == record["image_id"] - assert ann_dict.get("ignore", 0) == 0 - obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict} - _maybe_add_bbox(obj, ann_dict) - _maybe_add_segm(obj, ann_dict) - _maybe_add_keypoints(obj, ann_dict) - _maybe_add_densepose(obj, ann_dict) - objs.append(obj) - record["annotations"] = objs - dataset_dicts.append(record) - if contains_video_frame_info: - create_video_frame_mapping(dataset_name, dataset_dicts) - return dataset_dicts - - -def get_contiguous_id_to_category_id_map(metadata): - cat_id_2_cont_id = metadata.thing_dataset_id_to_contiguous_id - cont_id_2_cat_id = {} - for cat_id, cont_id in cat_id_2_cont_id.items(): - if cont_id in cont_id_2_cat_id: - continue - cont_id_2_cat_id[cont_id] = cat_id - return cont_id_2_cat_id - - -def maybe_filter_categories_cocoapi(dataset_name, coco_api): - meta = MetadataCatalog.get(dataset_name) - cont_id_2_cat_id = get_contiguous_id_to_category_id_map(meta) - cat_id_2_cont_id = meta.thing_dataset_id_to_contiguous_id - # filter categories - cats = [] - for cat in coco_api.dataset["categories"]: - cat_id = cat["id"] - if cat_id not in cat_id_2_cont_id: - continue - cont_id = cat_id_2_cont_id[cat_id] - if (cont_id in cont_id_2_cat_id) and (cont_id_2_cat_id[cont_id] == cat_id): - cats.append(cat) - coco_api.dataset["categories"] = cats - # filter annotations, if multiple categories are mapped to a single - # contiguous ID, use only one category ID and map all annotations to that category ID - anns = [] - for ann in coco_api.dataset["annotations"]: - cat_id = ann["category_id"] - if cat_id not in cat_id_2_cont_id: - continue - cont_id = cat_id_2_cont_id[cat_id] - ann["category_id"] = cont_id_2_cat_id[cont_id] - anns.append(ann) - coco_api.dataset["annotations"] = anns - # recreate index - coco_api.createIndex() - - -def maybe_filter_and_map_categories_cocoapi(dataset_name, coco_api): - meta = MetadataCatalog.get(dataset_name) - category_id_map = meta.thing_dataset_id_to_contiguous_id - # map categories - cats = [] - for cat in coco_api.dataset["categories"]: - cat_id = cat["id"] - if cat_id not in category_id_map: - continue - cat["id"] = category_id_map[cat_id] - cats.append(cat) - coco_api.dataset["categories"] = cats - # map annotation categories - anns = [] - for ann in coco_api.dataset["annotations"]: - cat_id = ann["category_id"] - if cat_id not in category_id_map: - continue - ann["category_id"] = category_id_map[cat_id] - anns.append(ann) - coco_api.dataset["annotations"] = anns - # recreate index - coco_api.createIndex() - - -def create_video_frame_mapping(dataset_name, dataset_dicts): - mapping = defaultdict(dict) - for d in dataset_dicts: - video_id = d.get("video_id") - if video_id is None: - continue - mapping[video_id].update({d["frame_id"]: d["file_name"]}) - MetadataCatalog.get(dataset_name).set(video_frame_mapping=mapping) - - -def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str): - """ - Loads a JSON file with annotations in COCO instances format. - Replaces `detectron2.data.datasets.coco.load_coco_json` to handle metadata - in a more flexible way. Postpones category mapping to a later stage to be - able to combine several datasets with different (but coherent) sets of - categories. - - Args: - - annotations_json_file: str - Path to the JSON file with annotations in COCO instances format. - image_root: str - directory that contains all the images - dataset_name: str - the name that identifies a dataset, e.g. "densepose_coco_2014_train" - extra_annotation_keys: Optional[List[str]] - If provided, these keys are used to extract additional data from - the annotations. - """ - coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file)) - _add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds())) - # sort indices for reproducible results - img_ids = sorted(coco_api.imgs.keys()) - # imgs is a list of dicts, each looks something like: - # {'license': 4, - # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', - # 'file_name': 'COCO_val2014_000000001268.jpg', - # 'height': 427, - # 'width': 640, - # 'date_captured': '2013-11-17 05:57:24', - # 'id': 1268} - imgs = coco_api.loadImgs(img_ids) - logger = logging.getLogger(__name__) - logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file)) - # anns is a list[list[dict]], where each dict is an annotation - # record for an object. The inner list enumerates the objects in an image - # and the outer list enumerates over images. - anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] - _verify_annotations_have_unique_ids(annotations_json_file, anns) - dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns) - return dataset_records - - -def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[str] = None): - """ - Registers provided COCO DensePose dataset - - Args: - dataset_data: CocoDatasetInfo - Dataset data - datasets_root: Optional[str] - Datasets root folder (default: None) - """ - annotations_fpath = maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath) - images_root = maybe_prepend_base_path(datasets_root, dataset_data.images_root) - - def load_annotations(): - return load_coco_json( - annotations_json_file=annotations_fpath, - image_root=images_root, - dataset_name=dataset_data.name, - ) - - DatasetCatalog.register(dataset_data.name, load_annotations) - MetadataCatalog.get(dataset_data.name).set( - json_file=annotations_fpath, - image_root=images_root, - **get_metadata(DENSEPOSE_METADATA_URL_PREFIX) - ) - - -def register_datasets( - datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[str] = None -): - """ - Registers provided COCO DensePose datasets - - Args: - datasets_data: Iterable[CocoDatasetInfo] - An iterable of dataset datas - datasets_root: Optional[str] - Datasets root folder (default: None) - """ - for dataset_data in datasets_data: - register_dataset(dataset_data, datasets_root) diff --git a/detectron2/projects/DensePose/densepose/data/datasets/dataset_type.py b/detectron2/projects/DensePose/densepose/data/datasets/dataset_type.py deleted file mode 100644 index 4e546f2aa74b4586d97618d41c69432ed01e21e9..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/datasets/dataset_type.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from enum import Enum - - -class DatasetType(Enum): - """ - Dataset type, mostly used for datasets that contain data to bootstrap models on - """ - - VIDEO_LIST = "video_list" diff --git a/detectron2/projects/DensePose/densepose/data/datasets/lvis.py b/detectron2/projects/DensePose/densepose/data/datasets/lvis.py deleted file mode 100644 index e90caac4bb429f9500a98998df18d238254a709e..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/datasets/lvis.py +++ /dev/null @@ -1,259 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import logging -import os -from typing import Any, Dict, Iterable, List, Optional -from fvcore.common.timer import Timer - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.data.datasets.lvis import get_lvis_instances_meta -from detectron2.structures import BoxMode -from detectron2.utils.file_io import PathManager - -from ..utils import maybe_prepend_base_path -from .coco import ( - DENSEPOSE_ALL_POSSIBLE_KEYS, - DENSEPOSE_METADATA_URL_PREFIX, - CocoDatasetInfo, - get_metadata, -) - -DATASETS = [ - CocoDatasetInfo( - name="densepose_lvis_v1_ds1_train_v1", - images_root="coco_", - annotations_fpath="lvis/densepose_lvis_v1_ds1_train_v1.json", - ), - CocoDatasetInfo( - name="densepose_lvis_v1_ds1_val_v1", - images_root="coco_", - annotations_fpath="lvis/densepose_lvis_v1_ds1_val_v1.json", - ), - CocoDatasetInfo( - name="densepose_lvis_v1_ds2_train_v1", - images_root="coco_", - annotations_fpath="lvis/densepose_lvis_v1_ds2_train_v1.json", - ), - CocoDatasetInfo( - name="densepose_lvis_v1_ds2_val_v1", - images_root="coco_", - annotations_fpath="lvis/densepose_lvis_v1_ds2_val_v1.json", - ), - CocoDatasetInfo( - name="densepose_lvis_v1_ds1_val_animals_100", - images_root="coco_", - annotations_fpath="lvis/densepose_lvis_v1_val_animals_100_v2.json", - ), -] - - -def _load_lvis_annotations(json_file: str): - """ - Load COCO annotations from a JSON file - - Args: - json_file: str - Path to the file to load annotations from - Returns: - Instance of `pycocotools.coco.COCO` that provides access to annotations - data - """ - from lvis import LVIS - - json_file = PathManager.get_local_path(json_file) - logger = logging.getLogger(__name__) - timer = Timer() - lvis_api = LVIS(json_file) - if timer.seconds() > 1: - logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) - return lvis_api - - -def _add_categories_metadata(dataset_name: str) -> None: - metadict = get_lvis_instances_meta(dataset_name) - categories = metadict["thing_classes"] - metadata = MetadataCatalog.get(dataset_name) - metadata.categories = {i + 1: categories[i] for i in range(len(categories))} - logger = logging.getLogger(__name__) - logger.info(f"Dataset {dataset_name} has {len(categories)} categories") - - -def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]) -> None: - ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] - assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( - json_file - ) - - -def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]) -> None: - if "bbox" not in ann_dict: - return - obj["bbox"] = ann_dict["bbox"] - obj["bbox_mode"] = BoxMode.XYWH_ABS - - -def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]) -> None: - if "segmentation" not in ann_dict: - return - segm = ann_dict["segmentation"] - if not isinstance(segm, dict): - # filter out invalid polygons (< 3 points) - segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] - if len(segm) == 0: - return - obj["segmentation"] = segm - - -def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]) -> None: - if "keypoints" not in ann_dict: - return - keypts = ann_dict["keypoints"] # list[int] - for idx, v in enumerate(keypts): - if idx % 3 != 2: - # COCO's segmentation coordinates are floating points in [0, H or W], - # but keypoint coordinates are integers in [0, H-1 or W-1] - # Therefore we assume the coordinates are "pixel indices" and - # add 0.5 to convert to floating point coordinates. - keypts[idx] = v + 0.5 - obj["keypoints"] = keypts - - -def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]) -> None: - for key in DENSEPOSE_ALL_POSSIBLE_KEYS: - if key in ann_dict: - obj[key] = ann_dict[key] - - -def _combine_images_with_annotations( - dataset_name: str, - image_root: str, - img_datas: Iterable[Dict[str, Any]], - ann_datas: Iterable[Iterable[Dict[str, Any]]], -): - - dataset_dicts = [] - - def get_file_name(img_root, img_dict): - # Determine the path including the split folder ("train2017", "val2017", "test2017") from - # the coco_url field. Example: - # 'coco_url': 'http://images.cocodataset.org/train2017/000000155379.jpg' - split_folder, file_name = img_dict["coco_url"].split("/")[-2:] - return os.path.join(img_root + split_folder, file_name) - - for img_dict, ann_dicts in zip(img_datas, ann_datas): - record = {} - record["file_name"] = get_file_name(image_root, img_dict) - record["height"] = img_dict["height"] - record["width"] = img_dict["width"] - record["not_exhaustive_category_ids"] = img_dict.get("not_exhaustive_category_ids", []) - record["neg_category_ids"] = img_dict.get("neg_category_ids", []) - record["image_id"] = img_dict["id"] - record["dataset"] = dataset_name - - objs = [] - for ann_dict in ann_dicts: - assert ann_dict["image_id"] == record["image_id"] - obj = {} - _maybe_add_bbox(obj, ann_dict) - obj["iscrowd"] = ann_dict.get("iscrowd", 0) - obj["category_id"] = ann_dict["category_id"] - _maybe_add_segm(obj, ann_dict) - _maybe_add_keypoints(obj, ann_dict) - _maybe_add_densepose(obj, ann_dict) - objs.append(obj) - record["annotations"] = objs - dataset_dicts.append(record) - return dataset_dicts - - -def load_lvis_json(annotations_json_file: str, image_root: str, dataset_name: str): - """ - Loads a JSON file with annotations in LVIS instances format. - Replaces `detectron2.data.datasets.coco.load_lvis_json` to handle metadata - in a more flexible way. Postpones category mapping to a later stage to be - able to combine several datasets with different (but coherent) sets of - categories. - - Args: - - annotations_json_file: str - Path to the JSON file with annotations in COCO instances format. - image_root: str - directory that contains all the images - dataset_name: str - the name that identifies a dataset, e.g. "densepose_coco_2014_train" - extra_annotation_keys: Optional[List[str]] - If provided, these keys are used to extract additional data from - the annotations. - """ - lvis_api = _load_lvis_annotations(PathManager.get_local_path(annotations_json_file)) - - _add_categories_metadata(dataset_name) - - # sort indices for reproducible results - img_ids = sorted(lvis_api.imgs.keys()) - # imgs is a list of dicts, each looks something like: - # {'license': 4, - # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', - # 'file_name': 'COCO_val2014_000000001268.jpg', - # 'height': 427, - # 'width': 640, - # 'date_captured': '2013-11-17 05:57:24', - # 'id': 1268} - imgs = lvis_api.load_imgs(img_ids) - logger = logging.getLogger(__name__) - logger.info("Loaded {} images in LVIS format from {}".format(len(imgs), annotations_json_file)) - # anns is a list[list[dict]], where each dict is an annotation - # record for an object. The inner list enumerates the objects in an image - # and the outer list enumerates over images. - anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids] - - _verify_annotations_have_unique_ids(annotations_json_file, anns) - dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns) - return dataset_records - - -def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[str] = None) -> None: - """ - Registers provided LVIS DensePose dataset - - Args: - dataset_data: CocoDatasetInfo - Dataset data - datasets_root: Optional[str] - Datasets root folder (default: None) - """ - annotations_fpath = maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath) - images_root = maybe_prepend_base_path(datasets_root, dataset_data.images_root) - - def load_annotations(): - return load_lvis_json( - annotations_json_file=annotations_fpath, - image_root=images_root, - dataset_name=dataset_data.name, - ) - - DatasetCatalog.register(dataset_data.name, load_annotations) - MetadataCatalog.get(dataset_data.name).set( - json_file=annotations_fpath, - image_root=images_root, - evaluator_type="lvis", - **get_metadata(DENSEPOSE_METADATA_URL_PREFIX), - ) - - -def register_datasets( - datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[str] = None -) -> None: - """ - Registers provided LVIS DensePose datasets - - Args: - datasets_data: Iterable[CocoDatasetInfo] - An iterable of dataset datas - datasets_root: Optional[str] - Datasets root folder (default: None) - """ - for dataset_data in datasets_data: - register_dataset(dataset_data, datasets_root) diff --git a/detectron2/projects/DensePose/densepose/data/image_list_dataset.py b/detectron2/projects/DensePose/densepose/data/image_list_dataset.py deleted file mode 100644 index 503bf647d7810f4b45cb3a442370ddbbf8e7f2a3..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/image_list_dataset.py +++ /dev/null @@ -1,74 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import logging -import numpy as np -from typing import Any, Callable, Dict, List, Optional, Union -import torch -from torch.utils.data.dataset import Dataset - -from detectron2.data.detection_utils import read_image - -ImageTransform = Callable[[torch.Tensor], torch.Tensor] - - -class ImageListDataset(Dataset): - """ - Dataset that provides images from a list. - """ - - _EMPTY_IMAGE = torch.empty((0, 3, 1, 1)) - - def __init__( - self, - image_list: List[str], - category_list: Union[str, List[str], None] = None, - transform: Optional[ImageTransform] = None, - ): - """ - Args: - image_list (List[str]): list of paths to image files - category_list (Union[str, List[str], None]): list of animal categories for - each image. If it is a string, or None, this applies to all images - """ - if type(category_list) is list: - self.category_list = category_list - else: - self.category_list = [category_list] * len(image_list) - assert len(image_list) == len( - self.category_list - ), "length of image and category lists must be equal" - self.image_list = image_list - self.transform = transform - - def __getitem__(self, idx: int) -> Dict[str, Any]: - """ - Gets selected images from the list - - Args: - idx (int): video index in the video list file - Returns: - A dictionary containing two keys: - images (torch.Tensor): tensor of size [N, 3, H, W] (N = 1, or 0 for _EMPTY_IMAGE) - categories (List[str]): categories of the frames - """ - categories = [self.category_list[idx]] - fpath = self.image_list[idx] - transform = self.transform - - try: - image = torch.from_numpy(np.ascontiguousarray(read_image(fpath, format="BGR"))) - image = image.permute(2, 0, 1).unsqueeze(0).float() # HWC -> NCHW - if transform is not None: - image = transform(image) - return {"images": image, "categories": categories} - except (OSError, RuntimeError) as e: - logger = logging.getLogger(__name__) - logger.warning(f"Error opening image file container {fpath}: {e}") - - return {"images": self._EMPTY_IMAGE, "categories": []} - - def __len__(self): - return len(self.image_list) diff --git a/detectron2/projects/DensePose/densepose/data/inference_based_loader.py b/detectron2/projects/DensePose/densepose/data/inference_based_loader.py deleted file mode 100644 index b643935cb7cbcaa06f66ca1c459ef25c5753cffd..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/inference_based_loader.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import random -from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple -import torch -from torch import nn - -SampledData = Any -ModelOutput = Any - - -def _grouper(iterable: Iterable[Any], n: int, fillvalue=None) -> Iterator[Tuple[Any]]: - """ - Group elements of an iterable by chunks of size `n`, e.g. - grouper(range(9), 4) -> - (0, 1, 2, 3), (4, 5, 6, 7), (8, None, None, None) - """ - it = iter(iterable) - while True: - values = [] - for _ in range(n): - try: - value = next(it) - except StopIteration: - if values: - values.extend([fillvalue] * (n - len(values))) - yield tuple(values) - return - values.append(value) - yield tuple(values) - - -class ScoreBasedFilter: - """ - Filters entries in model output based on their scores - Discards all entries with score less than the specified minimum - """ - - def __init__(self, min_score: float = 0.8): - self.min_score = min_score - - def __call__(self, model_output: ModelOutput) -> ModelOutput: - for model_output_i in model_output: - instances = model_output_i["instances"] - if not instances.has("scores"): - continue - instances_filtered = instances[instances.scores >= self.min_score] - model_output_i["instances"] = instances_filtered - return model_output - - -class InferenceBasedLoader: - """ - Data loader based on results inferred by a model. Consists of: - - a data loader that provides batches of images - - a model that is used to infer the results - - a data sampler that converts inferred results to annotations - """ - - def __init__( - self, - model: nn.Module, - data_loader: Iterable[List[Dict[str, Any]]], - data_sampler: Optional[Callable[[ModelOutput], List[SampledData]]] = None, - data_filter: Optional[Callable[[ModelOutput], ModelOutput]] = None, - shuffle: bool = True, - batch_size: int = 4, - inference_batch_size: int = 4, - drop_last: bool = False, - category_to_class_mapping: Optional[dict] = None, - ): - """ - Constructor - - Args: - model (torch.nn.Module): model used to produce data - data_loader (Iterable[List[Dict[str, Any]]]): iterable that provides - dictionaries with "images" and "categories" fields to perform inference on - data_sampler (Callable: ModelOutput -> SampledData): functor - that produces annotation data from inference results; - (optional, default: None) - data_filter (Callable: ModelOutput -> ModelOutput): filter - that selects model outputs for further processing - (optional, default: None) - shuffle (bool): if True, the input images get shuffled - batch_size (int): batch size for the produced annotation data - inference_batch_size (int): batch size for input images - drop_last (bool): if True, drop the last batch if it is undersized - category_to_class_mapping (dict): category to class mapping - """ - self.model = model - self.model.eval() - self.data_loader = data_loader - self.data_sampler = data_sampler - self.data_filter = data_filter - self.shuffle = shuffle - self.batch_size = batch_size - self.inference_batch_size = inference_batch_size - self.drop_last = drop_last - if category_to_class_mapping is not None: - self.category_to_class_mapping = category_to_class_mapping - else: - self.category_to_class_mapping = {} - - def __iter__(self) -> Iterator[List[SampledData]]: - for batch in self.data_loader: - # batch : List[Dict[str: Tensor[N, C, H, W], str: Optional[str]]] - # images_batch : Tensor[N, C, H, W] - # image : Tensor[C, H, W] - images_and_categories = [ - {"image": image, "category": category} - for element in batch - for image, category in zip(element["images"], element["categories"]) - ] - if not images_and_categories: - continue - if self.shuffle: - random.shuffle(images_and_categories) - yield from self._produce_data(images_and_categories) # pyre-ignore[6] - - def _produce_data( - self, images_and_categories: List[Tuple[torch.Tensor, Optional[str]]] - ) -> Iterator[List[SampledData]]: - """ - Produce batches of data from images - - Args: - images_and_categories (List[Tuple[torch.Tensor, Optional[str]]]): - list of images and corresponding categories to process - - Returns: - Iterator over batches of data sampled from model outputs - """ - data_batches: List[SampledData] = [] - category_to_class_mapping = self.category_to_class_mapping - batched_images_and_categories = _grouper(images_and_categories, self.inference_batch_size) - for batch in batched_images_and_categories: - batch = [ - { - "image": image_and_category["image"].to(self.model.device), - "category": image_and_category["category"], - } - for image_and_category in batch - if image_and_category is not None - ] - if not batch: - continue - with torch.no_grad(): - model_output = self.model(batch) - for model_output_i, batch_i in zip(model_output, batch): - assert len(batch_i["image"].shape) == 3 - model_output_i["image"] = batch_i["image"] - instance_class = category_to_class_mapping.get(batch_i["category"], 0) - model_output_i["instances"].dataset_classes = torch.tensor( - [instance_class] * len(model_output_i["instances"]) - ) - model_output_filtered = ( - model_output if self.data_filter is None else self.data_filter(model_output) - ) - data = ( - model_output_filtered - if self.data_sampler is None - else self.data_sampler(model_output_filtered) - ) - for data_i in data: - if len(data_i["instances"]): - data_batches.append(data_i) - if len(data_batches) >= self.batch_size: - yield data_batches[: self.batch_size] - data_batches = data_batches[self.batch_size :] - if not self.drop_last and data_batches: - yield data_batches diff --git a/detectron2/projects/DensePose/densepose/data/meshes/__init__.py b/detectron2/projects/DensePose/densepose/data/meshes/__init__.py deleted file mode 100644 index 7552c251b2225af62212aae69d4ce273608f7a67..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/meshes/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from . import builtin - -__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")] diff --git a/detectron2/projects/DensePose/densepose/data/meshes/builtin.py b/detectron2/projects/DensePose/densepose/data/meshes/builtin.py deleted file mode 100644 index fc8ec8418852dc344d7c4bd9f6c5fdd049b30a6d..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/meshes/builtin.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from .catalog import MeshInfo, register_meshes - -DENSEPOSE_MESHES_DIR = "https://dl.fbaipublicfiles.com/densepose/meshes/" - -MESHES = [ - MeshInfo( - name="smpl_27554", - data="smpl_27554.pkl", - geodists="geodists/geodists_smpl_27554.pkl", - symmetry="symmetry/symmetry_smpl_27554.pkl", - texcoords="texcoords/texcoords_smpl_27554.pkl", - ), - MeshInfo( - name="chimp_5029", - data="chimp_5029.pkl", - geodists="geodists/geodists_chimp_5029.pkl", - symmetry="symmetry/symmetry_chimp_5029.pkl", - texcoords="texcoords/texcoords_chimp_5029.pkl", - ), - MeshInfo( - name="cat_5001", - data="cat_5001.pkl", - geodists="geodists/geodists_cat_5001.pkl", - symmetry="symmetry/symmetry_cat_5001.pkl", - texcoords="texcoords/texcoords_cat_5001.pkl", - ), - MeshInfo( - name="cat_7466", - data="cat_7466.pkl", - geodists="geodists/geodists_cat_7466.pkl", - symmetry="symmetry/symmetry_cat_7466.pkl", - texcoords="texcoords/texcoords_cat_7466.pkl", - ), - MeshInfo( - name="sheep_5004", - data="sheep_5004.pkl", - geodists="geodists/geodists_sheep_5004.pkl", - symmetry="symmetry/symmetry_sheep_5004.pkl", - texcoords="texcoords/texcoords_sheep_5004.pkl", - ), - MeshInfo( - name="zebra_5002", - data="zebra_5002.pkl", - geodists="geodists/geodists_zebra_5002.pkl", - symmetry="symmetry/symmetry_zebra_5002.pkl", - texcoords="texcoords/texcoords_zebra_5002.pkl", - ), - MeshInfo( - name="horse_5004", - data="horse_5004.pkl", - geodists="geodists/geodists_horse_5004.pkl", - symmetry="symmetry/symmetry_horse_5004.pkl", - texcoords="texcoords/texcoords_zebra_5002.pkl", - ), - MeshInfo( - name="giraffe_5002", - data="giraffe_5002.pkl", - geodists="geodists/geodists_giraffe_5002.pkl", - symmetry="symmetry/symmetry_giraffe_5002.pkl", - texcoords="texcoords/texcoords_giraffe_5002.pkl", - ), - MeshInfo( - name="elephant_5002", - data="elephant_5002.pkl", - geodists="geodists/geodists_elephant_5002.pkl", - symmetry="symmetry/symmetry_elephant_5002.pkl", - texcoords="texcoords/texcoords_elephant_5002.pkl", - ), - MeshInfo( - name="dog_5002", - data="dog_5002.pkl", - geodists="geodists/geodists_dog_5002.pkl", - symmetry="symmetry/symmetry_dog_5002.pkl", - texcoords="texcoords/texcoords_dog_5002.pkl", - ), - MeshInfo( - name="dog_7466", - data="dog_7466.pkl", - geodists="geodists/geodists_dog_7466.pkl", - symmetry="symmetry/symmetry_dog_7466.pkl", - texcoords="texcoords/texcoords_dog_7466.pkl", - ), - MeshInfo( - name="cow_5002", - data="cow_5002.pkl", - geodists="geodists/geodists_cow_5002.pkl", - symmetry="symmetry/symmetry_cow_5002.pkl", - texcoords="texcoords/texcoords_cow_5002.pkl", - ), - MeshInfo( - name="bear_4936", - data="bear_4936.pkl", - geodists="geodists/geodists_bear_4936.pkl", - symmetry="symmetry/symmetry_bear_4936.pkl", - texcoords="texcoords/texcoords_bear_4936.pkl", - ), -] - -register_meshes(MESHES, DENSEPOSE_MESHES_DIR) diff --git a/detectron2/projects/DensePose/densepose/data/meshes/catalog.py b/detectron2/projects/DensePose/densepose/data/meshes/catalog.py deleted file mode 100644 index ae624a8aa21fb11cc3c3f7ee467f28b896959781..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/meshes/catalog.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -import logging -from collections import UserDict -from dataclasses import dataclass -from typing import Iterable, Optional - -from ..utils import maybe_prepend_base_path - - -@dataclass -class MeshInfo: - name: str - data: str - geodists: Optional[str] = None - symmetry: Optional[str] = None - texcoords: Optional[str] = None - - -class _MeshCatalog(UserDict): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.mesh_ids = {} - self.mesh_names = {} - self.max_mesh_id = -1 - - def __setitem__(self, key, value): - if key in self: - logger = logging.getLogger(__name__) - logger.warning( - f"Overwriting mesh catalog entry '{key}': old value {self[key]}" - f", new value {value}" - ) - mesh_id = self.mesh_ids[key] - else: - self.max_mesh_id += 1 - mesh_id = self.max_mesh_id - super().__setitem__(key, value) - self.mesh_ids[key] = mesh_id - self.mesh_names[mesh_id] = key - - def get_mesh_id(self, shape_name: str) -> int: - return self.mesh_ids[shape_name] - - def get_mesh_name(self, mesh_id: int) -> str: - return self.mesh_names[mesh_id] - - -MeshCatalog = _MeshCatalog() - - -def register_mesh(mesh_info: MeshInfo, base_path: Optional[str]) -> None: - geodists, symmetry, texcoords = mesh_info.geodists, mesh_info.symmetry, mesh_info.texcoords - if geodists: - geodists = maybe_prepend_base_path(base_path, geodists) - if symmetry: - symmetry = maybe_prepend_base_path(base_path, symmetry) - if texcoords: - texcoords = maybe_prepend_base_path(base_path, texcoords) - MeshCatalog[mesh_info.name] = MeshInfo( - name=mesh_info.name, - data=maybe_prepend_base_path(base_path, mesh_info.data), - geodists=geodists, - symmetry=symmetry, - texcoords=texcoords, - ) - - -def register_meshes(mesh_infos: Iterable[MeshInfo], base_path: Optional[str]) -> None: - for mesh_info in mesh_infos: - register_mesh(mesh_info, base_path) diff --git a/detectron2/projects/DensePose/densepose/data/samplers/__init__.py b/detectron2/projects/DensePose/densepose/data/samplers/__init__.py deleted file mode 100644 index 7bf28288d8929c1b250720a2c6decfc9978dd903..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/samplers/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .densepose_uniform import DensePoseUniformSampler -from .densepose_confidence_based import DensePoseConfidenceBasedSampler -from .densepose_cse_uniform import DensePoseCSEUniformSampler -from .densepose_cse_confidence_based import DensePoseCSEConfidenceBasedSampler -from .mask_from_densepose import MaskFromDensePoseSampler -from .prediction_to_gt import PredictionToGroundTruthSampler diff --git a/detectron2/projects/DensePose/densepose/data/samplers/densepose_base.py b/detectron2/projects/DensePose/densepose/data/samplers/densepose_base.py deleted file mode 100644 index 260413a5b65853d12b4cdb1bcff906f02ed7d63c..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/samplers/densepose_base.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Any, Dict, List, Tuple -import torch -from torch.nn import functional as F - -from detectron2.structures import BoxMode, Instances - -from densepose.converters import ToChartResultConverter -from densepose.converters.base import IntTupleBox, make_int_box -from densepose.structures import DensePoseDataRelative, DensePoseList - - -class DensePoseBaseSampler: - """ - Base DensePose sampler to produce DensePose data from DensePose predictions. - Samples for each class are drawn according to some distribution over all pixels estimated - to belong to that class. - """ - - def __init__(self, count_per_class: int = 8): - """ - Constructor - - Args: - count_per_class (int): the sampler produces at most `count_per_class` - samples for each category - """ - self.count_per_class = count_per_class - - def __call__(self, instances: Instances) -> DensePoseList: - """ - Convert DensePose predictions (an instance of `DensePoseChartPredictorOutput`) - into DensePose annotations data (an instance of `DensePoseList`) - """ - boxes_xyxy_abs = instances.pred_boxes.tensor.clone().cpu() - boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - dp_datas = [] - for i in range(len(boxes_xywh_abs)): - annotation_i = self._sample(instances[i], make_int_box(boxes_xywh_abs[i])) - annotation_i[DensePoseDataRelative.S_KEY] = self._resample_mask( # pyre-ignore[6] - instances[i].pred_densepose - ) - dp_datas.append(DensePoseDataRelative(annotation_i)) - # create densepose annotations on CPU - dp_list = DensePoseList(dp_datas, boxes_xyxy_abs, instances.image_size) - return dp_list - - def _sample(self, instance: Instances, bbox_xywh: IntTupleBox) -> Dict[str, List[Any]]: - """ - Sample DensPoseDataRelative from estimation results - """ - labels, dp_result = self._produce_labels_and_results(instance) - annotation = { - DensePoseDataRelative.X_KEY: [], - DensePoseDataRelative.Y_KEY: [], - DensePoseDataRelative.U_KEY: [], - DensePoseDataRelative.V_KEY: [], - DensePoseDataRelative.I_KEY: [], - } - n, h, w = dp_result.shape - for part_id in range(1, DensePoseDataRelative.N_PART_LABELS + 1): - # indices - tuple of 3 1D tensors of size k - # 0: index along the first dimension N - # 1: index along H dimension - # 2: index along W dimension - indices = torch.nonzero(labels.expand(n, h, w) == part_id, as_tuple=True) - # values - an array of size [n, k] - # n: number of channels (U, V, confidences) - # k: number of points labeled with part_id - values = dp_result[indices].view(n, -1) - k = values.shape[1] - count = min(self.count_per_class, k) - if count <= 0: - continue - index_sample = self._produce_index_sample(values, count) - sampled_values = values[:, index_sample] - sampled_y = indices[1][index_sample] + 0.5 - sampled_x = indices[2][index_sample] + 0.5 - # prepare / normalize data - x = (sampled_x / w * 256.0).cpu().tolist() - y = (sampled_y / h * 256.0).cpu().tolist() - u = sampled_values[0].clamp(0, 1).cpu().tolist() - v = sampled_values[1].clamp(0, 1).cpu().tolist() - fine_segm_labels = [part_id] * count - # extend annotations - annotation[DensePoseDataRelative.X_KEY].extend(x) - annotation[DensePoseDataRelative.Y_KEY].extend(y) - annotation[DensePoseDataRelative.U_KEY].extend(u) - annotation[DensePoseDataRelative.V_KEY].extend(v) - annotation[DensePoseDataRelative.I_KEY].extend(fine_segm_labels) - return annotation - - def _produce_index_sample(self, values: torch.Tensor, count: int): - """ - Abstract method to produce a sample of indices to select data - To be implemented in descendants - - Args: - values (torch.Tensor): an array of size [n, k] that contains - estimated values (U, V, confidences); - n: number of channels (U, V, confidences) - k: number of points labeled with part_id - count (int): number of samples to produce, should be positive and <= k - - Return: - list(int): indices of values (along axis 1) selected as a sample - """ - raise NotImplementedError - - def _produce_labels_and_results(self, instance: Instances) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Method to get labels and DensePose results from an instance - - Args: - instance (Instances): an instance of `DensePoseChartPredictorOutput` - - Return: - labels (torch.Tensor): shape [H, W], DensePose segmentation labels - dp_result (torch.Tensor): shape [2, H, W], stacked DensePose results u and v - """ - converter = ToChartResultConverter - chart_result = converter.convert(instance.pred_densepose, instance.pred_boxes) - labels, dp_result = chart_result.labels.cpu(), chart_result.uv.cpu() - return labels, dp_result - - def _resample_mask(self, output: Any) -> torch.Tensor: - """ - Convert DensePose predictor output to segmentation annotation - tensors of size - (256, 256) and type `int64`. - - Args: - output: DensePose predictor output with the following attributes: - - coarse_segm: tensor of size [N, D, H, W] with unnormalized coarse - segmentation scores - - fine_segm: tensor of size [N, C, H, W] with unnormalized fine - segmentation scores - Return: - Tensor of size (S, S) and type `int64` with coarse segmentation annotations, - where S = DensePoseDataRelative.MASK_SIZE - """ - sz = DensePoseDataRelative.MASK_SIZE - S = ( - F.interpolate(output.coarse_segm, (sz, sz), mode="bilinear", align_corners=False) - .argmax(dim=1) - .long() - ) - I = ( - ( - F.interpolate( - output.fine_segm, - (sz, sz), - mode="bilinear", - align_corners=False, - ).argmax(dim=1) - * (S > 0).long() - ) - .squeeze() - .cpu() - ) - # Map fine segmentation results to coarse segmentation ground truth - # TODO: extract this into separate classes - # coarse segmentation: 1 = Torso, 2 = Right Hand, 3 = Left Hand, - # 4 = Left Foot, 5 = Right Foot, 6 = Upper Leg Right, 7 = Upper Leg Left, - # 8 = Lower Leg Right, 9 = Lower Leg Left, 10 = Upper Arm Left, - # 11 = Upper Arm Right, 12 = Lower Arm Left, 13 = Lower Arm Right, - # 14 = Head - # fine segmentation: 1, 2 = Torso, 3 = Right Hand, 4 = Left Hand, - # 5 = Left Foot, 6 = Right Foot, 7, 9 = Upper Leg Right, - # 8, 10 = Upper Leg Left, 11, 13 = Lower Leg Right, - # 12, 14 = Lower Leg Left, 15, 17 = Upper Arm Left, - # 16, 18 = Upper Arm Right, 19, 21 = Lower Arm Left, - # 20, 22 = Lower Arm Right, 23, 24 = Head - FINE_TO_COARSE_SEGMENTATION = { - 1: 1, - 2: 1, - 3: 2, - 4: 3, - 5: 4, - 6: 5, - 7: 6, - 8: 7, - 9: 6, - 10: 7, - 11: 8, - 12: 9, - 13: 8, - 14: 9, - 15: 10, - 16: 11, - 17: 10, - 18: 11, - 19: 12, - 20: 13, - 21: 12, - 22: 13, - 23: 14, - 24: 14, - } - mask = torch.zeros((sz, sz), dtype=torch.int64, device=torch.device("cpu")) - for i in range(DensePoseDataRelative.N_PART_LABELS): - mask[I == i + 1] = FINE_TO_COARSE_SEGMENTATION[i + 1] - return mask diff --git a/detectron2/projects/DensePose/densepose/data/samplers/densepose_confidence_based.py b/detectron2/projects/DensePose/densepose/data/samplers/densepose_confidence_based.py deleted file mode 100644 index 5a9a637e214cbd584773a9fb6031368b5d32417b..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/samplers/densepose_confidence_based.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import random -from typing import Optional, Tuple -import torch - -from densepose.converters import ToChartResultConverterWithConfidences - -from .densepose_base import DensePoseBaseSampler - - -class DensePoseConfidenceBasedSampler(DensePoseBaseSampler): - """ - Samples DensePose data from DensePose predictions. - Samples for each class are drawn using confidence value estimates. - """ - - def __init__( - self, - confidence_channel: str, - count_per_class: int = 8, - search_count_multiplier: Optional[float] = None, - search_proportion: Optional[float] = None, - ): - """ - Constructor - - Args: - confidence_channel (str): confidence channel to use for sampling; - possible values: - "sigma_2": confidences for UV values - "fine_segm_confidence": confidences for fine segmentation - "coarse_segm_confidence": confidences for coarse segmentation - (default: "sigma_2") - count_per_class (int): the sampler produces at most `count_per_class` - samples for each category (default: 8) - search_count_multiplier (float or None): if not None, the total number - of the most confident estimates of a given class to consider is - defined as `min(search_count_multiplier * count_per_class, N)`, - where `N` is the total number of estimates of the class; cannot be - specified together with `search_proportion` (default: None) - search_proportion (float or None): if not None, the total number of the - of the most confident estimates of a given class to consider is - defined as `min(max(search_proportion * N, count_per_class), N)`, - where `N` is the total number of estimates of the class; cannot be - specified together with `search_count_multiplier` (default: None) - """ - super().__init__(count_per_class) - self.confidence_channel = confidence_channel - self.search_count_multiplier = search_count_multiplier - self.search_proportion = search_proportion - assert (search_count_multiplier is None) or (search_proportion is None), ( - f"Cannot specify both search_count_multiplier (={search_count_multiplier})" - f"and search_proportion (={search_proportion})" - ) - - def _produce_index_sample(self, values: torch.Tensor, count: int): - """ - Produce a sample of indices to select data based on confidences - - Args: - values (torch.Tensor): an array of size [n, k] that contains - estimated values (U, V, confidences); - n: number of channels (U, V, confidences) - k: number of points labeled with part_id - count (int): number of samples to produce, should be positive and <= k - - Return: - list(int): indices of values (along axis 1) selected as a sample - """ - k = values.shape[1] - if k == count: - index_sample = list(range(k)) - else: - # take the best count * search_count_multiplier pixels, - # sample from them uniformly - # (here best = smallest variance) - _, sorted_confidence_indices = torch.sort(values[2]) - if self.search_count_multiplier is not None: - search_count = min(int(count * self.search_count_multiplier), k) - elif self.search_proportion is not None: - search_count = min(max(int(k * self.search_proportion), count), k) - else: - search_count = min(count, k) - sample_from_top = random.sample(range(search_count), count) - index_sample = sorted_confidence_indices[:search_count][sample_from_top] - return index_sample - - def _produce_labels_and_results(self, instance) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Method to get labels and DensePose results from an instance, with confidences - - Args: - instance (Instances): an instance of `DensePoseChartPredictorOutputWithConfidences` - - Return: - labels (torch.Tensor): shape [H, W], DensePose segmentation labels - dp_result (torch.Tensor): shape [3, H, W], DensePose results u and v - stacked with the confidence channel - """ - converter = ToChartResultConverterWithConfidences - chart_result = converter.convert(instance.pred_densepose, instance.pred_boxes) - labels, dp_result = chart_result.labels.cpu(), chart_result.uv.cpu() - dp_result = torch.cat( - (dp_result, getattr(chart_result, self.confidence_channel)[None].cpu()) - ) - - return labels, dp_result diff --git a/detectron2/projects/DensePose/densepose/data/samplers/densepose_cse_base.py b/detectron2/projects/DensePose/densepose/data/samplers/densepose_cse_base.py deleted file mode 100644 index 593f1339f29308ff93ba98ed1426ee1dbd47be27..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/samplers/densepose_cse_base.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Any, Dict, List, Tuple -import torch -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.structures import Instances - -from densepose.converters.base import IntTupleBox -from densepose.data.utils import get_class_to_mesh_name_mapping -from densepose.modeling.cse.utils import squared_euclidean_distance_matrix -from densepose.structures import DensePoseDataRelative - -from .densepose_base import DensePoseBaseSampler - - -class DensePoseCSEBaseSampler(DensePoseBaseSampler): - """ - Base DensePose sampler to produce DensePose data from DensePose predictions. - Samples for each class are drawn according to some distribution over all pixels estimated - to belong to that class. - """ - - def __init__( - self, - cfg: CfgNode, - use_gt_categories: bool, - embedder: torch.nn.Module, - count_per_class: int = 8, - ): - """ - Constructor - - Args: - cfg (CfgNode): the config of the model - embedder (torch.nn.Module): necessary to compute mesh vertex embeddings - count_per_class (int): the sampler produces at most `count_per_class` - samples for each category - """ - super().__init__(count_per_class) - self.embedder = embedder - self.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg) - self.use_gt_categories = use_gt_categories - - def _sample(self, instance: Instances, bbox_xywh: IntTupleBox) -> Dict[str, List[Any]]: - """ - Sample DensPoseDataRelative from estimation results - """ - if self.use_gt_categories: - instance_class = instance.dataset_classes.tolist()[0] - else: - instance_class = instance.pred_classes.tolist()[0] - mesh_name = self.class_to_mesh_name[instance_class] - - annotation = { - DensePoseDataRelative.X_KEY: [], - DensePoseDataRelative.Y_KEY: [], - DensePoseDataRelative.VERTEX_IDS_KEY: [], - DensePoseDataRelative.MESH_NAME_KEY: mesh_name, - } - - mask, embeddings, other_values = self._produce_mask_and_results(instance, bbox_xywh) - indices = torch.nonzero(mask, as_tuple=True) - selected_embeddings = embeddings.permute(1, 2, 0)[indices].cpu() - values = other_values[:, indices[0], indices[1]] - k = values.shape[1] - - count = min(self.count_per_class, k) - if count <= 0: - return annotation - - index_sample = self._produce_index_sample(values, count) - closest_vertices = squared_euclidean_distance_matrix( - selected_embeddings[index_sample], self.embedder(mesh_name) - ) - closest_vertices = torch.argmin(closest_vertices, dim=1) - - sampled_y = indices[0][index_sample] + 0.5 - sampled_x = indices[1][index_sample] + 0.5 - # prepare / normalize data - _, _, w, h = bbox_xywh - x = (sampled_x / w * 256.0).cpu().tolist() - y = (sampled_y / h * 256.0).cpu().tolist() - # extend annotations - annotation[DensePoseDataRelative.X_KEY].extend(x) - annotation[DensePoseDataRelative.Y_KEY].extend(y) - annotation[DensePoseDataRelative.VERTEX_IDS_KEY].extend(closest_vertices.cpu().tolist()) - return annotation - - def _produce_mask_and_results( - self, instance: Instances, bbox_xywh: IntTupleBox - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Method to get labels and DensePose results from an instance - - Args: - instance (Instances): an instance of `DensePoseEmbeddingPredictorOutput` - bbox_xywh (IntTupleBox): the corresponding bounding box - - Return: - mask (torch.Tensor): shape [H, W], DensePose segmentation mask - embeddings (Tuple[torch.Tensor]): a tensor of shape [D, H, W], - DensePose CSE Embeddings - other_values (Tuple[torch.Tensor]): a tensor of shape [0, H, W], - for potential other values - """ - densepose_output = instance.pred_densepose - S = densepose_output.coarse_segm - E = densepose_output.embedding - _, _, w, h = bbox_xywh - embeddings = F.interpolate(E, size=(h, w), mode="bilinear")[0] - coarse_segm_resized = F.interpolate(S, size=(h, w), mode="bilinear")[0] - mask = coarse_segm_resized.argmax(0) > 0 - other_values = torch.empty((0, h, w), device=E.device) - return mask, embeddings, other_values - - def _resample_mask(self, output: Any) -> torch.Tensor: - """ - Convert DensePose predictor output to segmentation annotation - tensors of size - (256, 256) and type `int64`. - - Args: - output: DensePose predictor output with the following attributes: - - coarse_segm: tensor of size [N, D, H, W] with unnormalized coarse - segmentation scores - Return: - Tensor of size (S, S) and type `int64` with coarse segmentation annotations, - where S = DensePoseDataRelative.MASK_SIZE - """ - sz = DensePoseDataRelative.MASK_SIZE - mask = ( - F.interpolate(output.coarse_segm, (sz, sz), mode="bilinear", align_corners=False) - .argmax(dim=1) - .long() - .squeeze() - .cpu() - ) - return mask diff --git a/detectron2/projects/DensePose/densepose/data/samplers/densepose_cse_confidence_based.py b/detectron2/projects/DensePose/densepose/data/samplers/densepose_cse_confidence_based.py deleted file mode 100644 index d656a5ab853152c65d8f4c88fe7210cf68ee8df7..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/samplers/densepose_cse_confidence_based.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import random -from typing import Optional, Tuple -import torch -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.structures import Instances - -from densepose.converters.base import IntTupleBox - -from .densepose_cse_base import DensePoseCSEBaseSampler - - -class DensePoseCSEConfidenceBasedSampler(DensePoseCSEBaseSampler): - """ - Samples DensePose data from DensePose predictions. - Samples for each class are drawn using confidence value estimates. - """ - - def __init__( - self, - cfg: CfgNode, - use_gt_categories: bool, - embedder: torch.nn.Module, - confidence_channel: str, - count_per_class: int = 8, - search_count_multiplier: Optional[float] = None, - search_proportion: Optional[float] = None, - ): - """ - Constructor - - Args: - cfg (CfgNode): the config of the model - embedder (torch.nn.Module): necessary to compute mesh vertex embeddings - confidence_channel (str): confidence channel to use for sampling; - possible values: - "coarse_segm_confidence": confidences for coarse segmentation - (default: "coarse_segm_confidence") - count_per_class (int): the sampler produces at most `count_per_class` - samples for each category (default: 8) - search_count_multiplier (float or None): if not None, the total number - of the most confident estimates of a given class to consider is - defined as `min(search_count_multiplier * count_per_class, N)`, - where `N` is the total number of estimates of the class; cannot be - specified together with `search_proportion` (default: None) - search_proportion (float or None): if not None, the total number of the - of the most confident estimates of a given class to consider is - defined as `min(max(search_proportion * N, count_per_class), N)`, - where `N` is the total number of estimates of the class; cannot be - specified together with `search_count_multiplier` (default: None) - """ - super().__init__(cfg, use_gt_categories, embedder, count_per_class) - self.confidence_channel = confidence_channel - self.search_count_multiplier = search_count_multiplier - self.search_proportion = search_proportion - assert (search_count_multiplier is None) or (search_proportion is None), ( - f"Cannot specify both search_count_multiplier (={search_count_multiplier})" - f"and search_proportion (={search_proportion})" - ) - - def _produce_index_sample(self, values: torch.Tensor, count: int): - """ - Produce a sample of indices to select data based on confidences - - Args: - values (torch.Tensor): a tensor of length k that contains confidences - k: number of points labeled with part_id - count (int): number of samples to produce, should be positive and <= k - - Return: - list(int): indices of values (along axis 1) selected as a sample - """ - k = values.shape[1] - if k == count: - index_sample = list(range(k)) - else: - # take the best count * search_count_multiplier pixels, - # sample from them uniformly - # (here best = smallest variance) - _, sorted_confidence_indices = torch.sort(values[0]) - if self.search_count_multiplier is not None: - search_count = min(int(count * self.search_count_multiplier), k) - elif self.search_proportion is not None: - search_count = min(max(int(k * self.search_proportion), count), k) - else: - search_count = min(count, k) - sample_from_top = random.sample(range(search_count), count) - index_sample = sorted_confidence_indices[-search_count:][sample_from_top] - return index_sample - - def _produce_mask_and_results( - self, instance: Instances, bbox_xywh: IntTupleBox - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Method to get labels and DensePose results from an instance - - Args: - instance (Instances): an instance of - `DensePoseEmbeddingPredictorOutputWithConfidences` - bbox_xywh (IntTupleBox): the corresponding bounding box - - Return: - mask (torch.Tensor): shape [H, W], DensePose segmentation mask - embeddings (Tuple[torch.Tensor]): a tensor of shape [D, H, W] - DensePose CSE Embeddings - other_values: a tensor of shape [1, H, W], DensePose CSE confidence - """ - _, _, w, h = bbox_xywh - densepose_output = instance.pred_densepose - mask, embeddings, _ = super()._produce_mask_and_results(instance, bbox_xywh) - other_values = F.interpolate( - getattr(densepose_output, self.confidence_channel), - size=(h, w), - mode="bilinear", - )[0].cpu() - return mask, embeddings, other_values diff --git a/detectron2/projects/DensePose/densepose/data/samplers/densepose_cse_uniform.py b/detectron2/projects/DensePose/densepose/data/samplers/densepose_cse_uniform.py deleted file mode 100644 index 482c650caf404bfe96dd28c5092d2508b17a1dbf..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/samplers/densepose_cse_uniform.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .densepose_cse_base import DensePoseCSEBaseSampler -from .densepose_uniform import DensePoseUniformSampler - - -class DensePoseCSEUniformSampler(DensePoseCSEBaseSampler, DensePoseUniformSampler): - """ - Uniform Sampler for CSE - """ - - pass diff --git a/detectron2/projects/DensePose/densepose/data/samplers/densepose_uniform.py b/detectron2/projects/DensePose/densepose/data/samplers/densepose_uniform.py deleted file mode 100644 index af0e35b667047674a498433e4c153475a5b5a1fc..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/samplers/densepose_uniform.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import random -import torch - -from .densepose_base import DensePoseBaseSampler - - -class DensePoseUniformSampler(DensePoseBaseSampler): - """ - Samples DensePose data from DensePose predictions. - Samples for each class are drawn uniformly over all pixels estimated - to belong to that class. - """ - - def __init__(self, count_per_class: int = 8): - """ - Constructor - - Args: - count_per_class (int): the sampler produces at most `count_per_class` - samples for each category - """ - super().__init__(count_per_class) - - def _produce_index_sample(self, values: torch.Tensor, count: int): - """ - Produce a uniform sample of indices to select data - - Args: - values (torch.Tensor): an array of size [n, k] that contains - estimated values (U, V, confidences); - n: number of channels (U, V, confidences) - k: number of points labeled with part_id - count (int): number of samples to produce, should be positive and <= k - - Return: - list(int): indices of values (along axis 1) selected as a sample - """ - k = values.shape[1] - return random.sample(range(k), count) diff --git a/detectron2/projects/DensePose/densepose/data/samplers/mask_from_densepose.py b/detectron2/projects/DensePose/densepose/data/samplers/mask_from_densepose.py deleted file mode 100644 index 9d631dff320bbec264675e6772c565cd06fc6b9f..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/samplers/mask_from_densepose.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from detectron2.structures import BitMasks, Instances - -from densepose.converters import ToMaskConverter - - -class MaskFromDensePoseSampler: - """ - Produce mask GT from DensePose predictions - This sampler simply converts DensePose predictions to BitMasks - that a contain a bool tensor of the size of the input image - """ - - def __call__(self, instances: Instances) -> BitMasks: - """ - Converts predicted data from `instances` into the GT mask data - - Args: - instances (Instances): predicted results, expected to have `pred_densepose` field - - Returns: - Boolean Tensor of the size of the input image that has non-zero - values at pixels that are estimated to belong to the detected object - """ - return ToMaskConverter.convert( - instances.pred_densepose, instances.pred_boxes, instances.image_size - ) diff --git a/detectron2/projects/DensePose/densepose/data/samplers/prediction_to_gt.py b/detectron2/projects/DensePose/densepose/data/samplers/prediction_to_gt.py deleted file mode 100644 index 42a28ff4f19012e96fdf3fb4923500839429a999..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/samplers/prediction_to_gt.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from dataclasses import dataclass -from typing import Any, Callable, Dict, List, Optional - -from detectron2.structures import Instances - -ModelOutput = Dict[str, Any] -SampledData = Dict[str, Any] - - -@dataclass -class _Sampler: - """ - Sampler registry entry that contains: - - src (str): source field to sample from (deleted after sampling) - - dst (Optional[str]): destination field to sample to, if not None - - func (Optional[Callable: Any -> Any]): function that performs sampling, - if None, reference copy is performed - """ - - src: str - dst: Optional[str] - func: Optional[Callable[[Any], Any]] - - -class PredictionToGroundTruthSampler: - """ - Sampler implementation that converts predictions to GT using registered - samplers for different fields of `Instances`. - """ - - def __init__(self, dataset_name: str = ""): - self.dataset_name = dataset_name - self._samplers = {} - self.register_sampler("pred_boxes", "gt_boxes", None) - self.register_sampler("pred_classes", "gt_classes", None) - # delete scores - self.register_sampler("scores") - - def __call__(self, model_output: List[ModelOutput]) -> List[SampledData]: - """ - Transform model output into ground truth data through sampling - - Args: - model_output (Dict[str, Any]): model output - Returns: - Dict[str, Any]: sampled data - """ - for model_output_i in model_output: - instances: Instances = model_output_i["instances"] - # transform data in each field - for _, sampler in self._samplers.items(): - if not instances.has(sampler.src) or sampler.dst is None: - continue - if sampler.func is None: - instances.set(sampler.dst, instances.get(sampler.src)) - else: - instances.set(sampler.dst, sampler.func(instances)) - # delete model output data that was transformed - for _, sampler in self._samplers.items(): - if sampler.src != sampler.dst and instances.has(sampler.src): - instances.remove(sampler.src) - model_output_i["dataset"] = self.dataset_name - return model_output - - def register_sampler( - self, - prediction_attr: str, - gt_attr: Optional[str] = None, - func: Optional[Callable[[Any], Any]] = None, - ): - """ - Register sampler for a field - - Args: - prediction_attr (str): field to replace with a sampled value - gt_attr (Optional[str]): field to store the sampled value to, if not None - func (Optional[Callable: Any -> Any]): sampler function - """ - self._samplers[(prediction_attr, gt_attr)] = _Sampler( - src=prediction_attr, dst=gt_attr, func=func - ) - - def remove_sampler( - self, - prediction_attr: str, - gt_attr: Optional[str] = None, - ): - """ - Remove sampler for a field - - Args: - prediction_attr (str): field to replace with a sampled value - gt_attr (Optional[str]): field to store the sampled value to, if not None - """ - assert (prediction_attr, gt_attr) in self._samplers - del self._samplers[(prediction_attr, gt_attr)] diff --git a/detectron2/projects/DensePose/densepose/data/transform/__init__.py b/detectron2/projects/DensePose/densepose/data/transform/__init__.py deleted file mode 100644 index 147671e198475ce4a82b17e8f81a688d697207d8..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/transform/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .image import ImageResizeTransform diff --git a/detectron2/projects/DensePose/densepose/data/transform/image.py b/detectron2/projects/DensePose/densepose/data/transform/image.py deleted file mode 100644 index 0f35b3ab1de3b1b58e9d7f9763c73eb1236f67d2..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/transform/image.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import torch - - -class ImageResizeTransform: - """ - Transform that resizes images loaded from a dataset - (BGR data in NCHW channel order, typically uint8) to a format ready to be - consumed by DensePose training (BGR float32 data in NCHW channel order) - """ - - def __init__(self, min_size: int = 800, max_size: int = 1333): - self.min_size = min_size - self.max_size = max_size - - def __call__(self, images: torch.Tensor) -> torch.Tensor: - """ - Args: - images (torch.Tensor): tensor of size [N, 3, H, W] that contains - BGR data (typically in uint8) - Returns: - images (torch.Tensor): tensor of size [N, 3, H1, W1] where - H1 and W1 are chosen to respect the specified min and max sizes - and preserve the original aspect ratio, the data channels - follow BGR order and the data type is `torch.float32` - """ - # resize with min size - images = images.float() - min_size = min(images.shape[-2:]) - max_size = max(images.shape[-2:]) - scale = min(self.min_size / min_size, self.max_size / max_size) - images = torch.nn.functional.interpolate( - images, - scale_factor=scale, - mode="bilinear", - align_corners=False, - ) - return images diff --git a/detectron2/projects/DensePose/densepose/data/utils.py b/detectron2/projects/DensePose/densepose/data/utils.py deleted file mode 100644 index 7625f3d5f7894d2d1519e8672d6fb2e6411e07ba..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/utils.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import os -from typing import Dict, Optional - -from detectron2.config import CfgNode - - -def is_relative_local_path(path: str) -> bool: - path_str = os.fsdecode(path) - return ("://" not in path_str) and not os.path.isabs(path) - - -def maybe_prepend_base_path(base_path: Optional[str], path: str): - """ - Prepends the provided path with a base path prefix if: - 1) base path is not None; - 2) path is a local path - """ - if base_path is None: - return path - if is_relative_local_path(path): - return os.path.join(base_path, path) - return path - - -def get_class_to_mesh_name_mapping(cfg: CfgNode) -> Dict[int, str]: - return { - int(class_id): mesh_name - for class_id, mesh_name in cfg.DATASETS.CLASS_TO_MESH_NAME_MAPPING.items() - } - - -def get_category_to_class_mapping(dataset_cfg: CfgNode) -> Dict[str, int]: - return { - category: int(class_id) - for category, class_id in dataset_cfg.CATEGORY_TO_CLASS_MAPPING.items() - } diff --git a/detectron2/projects/DensePose/densepose/data/video/__init__.py b/detectron2/projects/DensePose/densepose/data/video/__init__.py deleted file mode 100644 index dbd83443be8d6fff40b35a13758c31984f3d89be..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/video/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .frame_selector import ( - FrameSelectionStrategy, - RandomKFramesSelector, - FirstKFramesSelector, - LastKFramesSelector, - FrameTsList, - FrameSelector, -) - -from .video_keyframe_dataset import ( - VideoKeyframeDataset, - video_list_from_file, - list_keyframes, - read_keyframes, -) diff --git a/detectron2/projects/DensePose/densepose/data/video/frame_selector.py b/detectron2/projects/DensePose/densepose/data/video/frame_selector.py deleted file mode 100644 index 77a97a82f7c7bb95b2023df946b246f9de71a7d2..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/video/frame_selector.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import random -from collections.abc import Callable -from enum import Enum -from typing import Callable as TCallable -from typing import List - -FrameTsList = List[int] -FrameSelector = TCallable[[FrameTsList], FrameTsList] - - -class FrameSelectionStrategy(Enum): - """ - Frame selection strategy used with videos: - - "random_k": select k random frames - - "first_k": select k first frames - - "last_k": select k last frames - - "all": select all frames - """ - - # fmt: off - RANDOM_K = "random_k" - FIRST_K = "first_k" - LAST_K = "last_k" - ALL = "all" - # fmt: on - - -class RandomKFramesSelector(Callable): # pyre-ignore[39] - """ - Selector that retains at most `k` random frames - """ - - def __init__(self, k: int): - self.k = k - - def __call__(self, frame_tss: FrameTsList) -> FrameTsList: - """ - Select `k` random frames - - Args: - frames_tss (List[int]): timestamps of input frames - Returns: - List[int]: timestamps of selected frames - """ - return random.sample(frame_tss, min(self.k, len(frame_tss))) - - -class FirstKFramesSelector(Callable): # pyre-ignore[39] - """ - Selector that retains at most `k` first frames - """ - - def __init__(self, k: int): - self.k = k - - def __call__(self, frame_tss: FrameTsList) -> FrameTsList: - """ - Select `k` first frames - - Args: - frames_tss (List[int]): timestamps of input frames - Returns: - List[int]: timestamps of selected frames - """ - return frame_tss[: self.k] - - -class LastKFramesSelector(Callable): # pyre-ignore[39] - """ - Selector that retains at most `k` last frames from video data - """ - - def __init__(self, k: int): - self.k = k - - def __call__(self, frame_tss: FrameTsList) -> FrameTsList: - """ - Select `k` last frames - - Args: - frames_tss (List[int]): timestamps of input frames - Returns: - List[int]: timestamps of selected frames - """ - return frame_tss[-self.k :] diff --git a/detectron2/projects/DensePose/densepose/data/video/video_keyframe_dataset.py b/detectron2/projects/DensePose/densepose/data/video/video_keyframe_dataset.py deleted file mode 100644 index d68857f095a1224313b1dfddc9d75981b04ffa34..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/data/video/video_keyframe_dataset.py +++ /dev/null @@ -1,304 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import csv -import logging -import numpy as np -from typing import Any, Callable, Dict, List, Optional, Union -import av -import torch -from torch.utils.data.dataset import Dataset - -from detectron2.utils.file_io import PathManager - -from ..utils import maybe_prepend_base_path -from .frame_selector import FrameSelector, FrameTsList - -FrameList = List[av.frame.Frame] # pyre-ignore[16] -FrameTransform = Callable[[torch.Tensor], torch.Tensor] - - -def list_keyframes(video_fpath: str, video_stream_idx: int = 0) -> FrameTsList: - """ - Traverses all keyframes of a video file. Returns a list of keyframe - timestamps. Timestamps are counts in timebase units. - - Args: - video_fpath (str): Video file path - video_stream_idx (int): Video stream index (default: 0) - Returns: - List[int]: list of keyframe timestaps (timestamp is a count in timebase - units) - """ - try: - with PathManager.open(video_fpath, "rb") as io: - # pyre-fixme[16]: Module `av` has no attribute `open`. - container = av.open(io, mode="r") - stream = container.streams.video[video_stream_idx] - keyframes = [] - pts = -1 - # Note: even though we request forward seeks for keyframes, sometimes - # a keyframe in backwards direction is returned. We introduce tolerance - # as a max count of ignored backward seeks - tolerance_backward_seeks = 2 - while True: - try: - container.seek(pts + 1, backward=False, any_frame=False, stream=stream) - except av.AVError as e: - # the exception occurs when the video length is exceeded, - # we then return whatever data we've already collected - logger = logging.getLogger(__name__) - logger.debug( - f"List keyframes: Error seeking video file {video_fpath}, " - f"video stream {video_stream_idx}, pts {pts + 1}, AV error: {e}" - ) - return keyframes - except OSError as e: - logger = logging.getLogger(__name__) - logger.warning( - f"List keyframes: Error seeking video file {video_fpath}, " - f"video stream {video_stream_idx}, pts {pts + 1}, OS error: {e}" - ) - return [] - packet = next(container.demux(video=video_stream_idx)) - if packet.pts is not None and packet.pts <= pts: - logger = logging.getLogger(__name__) - logger.warning( - f"Video file {video_fpath}, stream {video_stream_idx}: " - f"bad seek for packet {pts + 1} (got packet {packet.pts}), " - f"tolerance {tolerance_backward_seeks}." - ) - tolerance_backward_seeks -= 1 - if tolerance_backward_seeks == 0: - return [] - pts += 1 - continue - tolerance_backward_seeks = 2 - pts = packet.pts - if pts is None: - return keyframes - if packet.is_keyframe: - keyframes.append(pts) - return keyframes - except OSError as e: - logger = logging.getLogger(__name__) - logger.warning( - f"List keyframes: Error opening video file container {video_fpath}, " f"OS error: {e}" - ) - except RuntimeError as e: - logger = logging.getLogger(__name__) - logger.warning( - f"List keyframes: Error opening video file container {video_fpath}, " - f"Runtime error: {e}" - ) - return [] - - -def read_keyframes( - video_fpath: str, keyframes: FrameTsList, video_stream_idx: int = 0 -) -> FrameList: # pyre-ignore[11] - """ - Reads keyframe data from a video file. - - Args: - video_fpath (str): Video file path - keyframes (List[int]): List of keyframe timestamps (as counts in - timebase units to be used in container seek operations) - video_stream_idx (int): Video stream index (default: 0) - Returns: - List[Frame]: list of frames that correspond to the specified timestamps - """ - try: - with PathManager.open(video_fpath, "rb") as io: - # pyre-fixme[16]: Module `av` has no attribute `open`. - container = av.open(io) - stream = container.streams.video[video_stream_idx] - frames = [] - for pts in keyframes: - try: - container.seek(pts, any_frame=False, stream=stream) - frame = next(container.decode(video=0)) - frames.append(frame) - except av.AVError as e: - logger = logging.getLogger(__name__) - logger.warning( - f"Read keyframes: Error seeking video file {video_fpath}, " - f"video stream {video_stream_idx}, pts {pts}, AV error: {e}" - ) - container.close() - return frames - except OSError as e: - logger = logging.getLogger(__name__) - logger.warning( - f"Read keyframes: Error seeking video file {video_fpath}, " - f"video stream {video_stream_idx}, pts {pts}, OS error: {e}" - ) - container.close() - return frames - except StopIteration: - logger = logging.getLogger(__name__) - logger.warning( - f"Read keyframes: Error decoding frame from {video_fpath}, " - f"video stream {video_stream_idx}, pts {pts}" - ) - container.close() - return frames - - container.close() - return frames - except OSError as e: - logger = logging.getLogger(__name__) - logger.warning( - f"Read keyframes: Error opening video file container {video_fpath}, OS error: {e}" - ) - except RuntimeError as e: - logger = logging.getLogger(__name__) - logger.warning( - f"Read keyframes: Error opening video file container {video_fpath}, Runtime error: {e}" - ) - return [] - - -def video_list_from_file(video_list_fpath: str, base_path: Optional[str] = None): - """ - Create a list of paths to video files from a text file. - - Args: - video_list_fpath (str): path to a plain text file with the list of videos - base_path (str): base path for entries from the video list (default: None) - """ - video_list = [] - with PathManager.open(video_list_fpath, "r") as io: - for line in io: - video_list.append(maybe_prepend_base_path(base_path, str(line.strip()))) - return video_list - - -def read_keyframe_helper_data(fpath: str): - """ - Read keyframe data from a file in CSV format: the header should contain - "video_id" and "keyframes" fields. Value specifications are: - video_id: int - keyframes: list(int) - Example of contents: - video_id,keyframes - 2,"[1,11,21,31,41,51,61,71,81]" - - Args: - fpath (str): File containing keyframe data - - Return: - video_id_to_keyframes (dict: int -> list(int)): for a given video ID it - contains a list of keyframes for that video - """ - video_id_to_keyframes = {} - try: - with PathManager.open(fpath, "r") as io: - csv_reader = csv.reader(io) - header = next(csv_reader) - video_id_idx = header.index("video_id") - keyframes_idx = header.index("keyframes") - for row in csv_reader: - video_id = int(row[video_id_idx]) - assert ( - video_id not in video_id_to_keyframes - ), f"Duplicate keyframes entry for video {fpath}" - video_id_to_keyframes[video_id] = ( - [int(v) for v in row[keyframes_idx][1:-1].split(",")] - if len(row[keyframes_idx]) > 2 - else [] - ) - except Exception as e: - logger = logging.getLogger(__name__) - logger.warning(f"Error reading keyframe helper data from {fpath}: {e}") - return video_id_to_keyframes - - -class VideoKeyframeDataset(Dataset): - """ - Dataset that provides keyframes for a set of videos. - """ - - _EMPTY_FRAMES = torch.empty((0, 3, 1, 1)) - - def __init__( - self, - video_list: List[str], - category_list: Union[str, List[str], None] = None, - frame_selector: Optional[FrameSelector] = None, - transform: Optional[FrameTransform] = None, - keyframe_helper_fpath: Optional[str] = None, - ): - """ - Dataset constructor - - Args: - video_list (List[str]): list of paths to video files - category_list (Union[str, List[str], None]): list of animal categories for each - video file. If it is a string, or None, this applies to all videos - frame_selector (Callable: KeyFrameList -> KeyFrameList): - selects keyframes to process, keyframes are given by - packet timestamps in timebase counts. If None, all keyframes - are selected (default: None) - transform (Callable: torch.Tensor -> torch.Tensor): - transforms a batch of RGB images (tensors of size [B, 3, H, W]), - returns a tensor of the same size. If None, no transform is - applied (default: None) - - """ - if type(category_list) is list: - self.category_list = category_list - else: - self.category_list = [category_list] * len(video_list) - assert len(video_list) == len( - self.category_list - ), "length of video and category lists must be equal" - self.video_list = video_list - self.frame_selector = frame_selector - self.transform = transform - self.keyframe_helper_data = ( - read_keyframe_helper_data(keyframe_helper_fpath) - if keyframe_helper_fpath is not None - else None - ) - - def __getitem__(self, idx: int) -> Dict[str, Any]: - """ - Gets selected keyframes from a given video - - Args: - idx (int): video index in the video list file - Returns: - A dictionary containing two keys: - images (torch.Tensor): tensor of size [N, H, W, 3] or of size - defined by the transform that contains keyframes data - categories (List[str]): categories of the frames - """ - categories = [self.category_list[idx]] - fpath = self.video_list[idx] - keyframes = ( - list_keyframes(fpath) - if self.keyframe_helper_data is None or idx not in self.keyframe_helper_data - else self.keyframe_helper_data[idx] - ) - transform = self.transform - frame_selector = self.frame_selector - if not keyframes: - return {"images": self._EMPTY_FRAMES, "categories": []} - if frame_selector is not None: - keyframes = frame_selector(keyframes) - frames = read_keyframes(fpath, keyframes) - if not frames: - return {"images": self._EMPTY_FRAMES, "categories": []} - frames = np.stack([frame.to_rgb().to_ndarray() for frame in frames]) - frames = torch.as_tensor(frames, device=torch.device("cpu")) - frames = frames[..., [2, 1, 0]] # RGB -> BGR - frames = frames.permute(0, 3, 1, 2).float() # NHWC -> NCHW - if transform is not None: - frames = transform(frames) - return {"images": frames, "categories": categories} - - def __len__(self): - return len(self.video_list) diff --git a/detectron2/projects/DensePose/densepose/engine/__init__.py b/detectron2/projects/DensePose/densepose/engine/__init__.py deleted file mode 100644 index 4709c1b2d87e3c578d98aaa083e41323e4047ac9..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/engine/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .trainer import Trainer diff --git a/detectron2/projects/DensePose/densepose/engine/trainer.py b/detectron2/projects/DensePose/densepose/engine/trainer.py deleted file mode 100644 index 0c9046312244dc2381ea722413986010f4ba75e7..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/engine/trainer.py +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -import logging -import os -from collections import OrderedDict -from typing import List, Optional, Union -import torch -from torch import nn - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import CfgNode -from detectron2.engine import DefaultTrainer -from detectron2.evaluation import ( - DatasetEvaluator, - DatasetEvaluators, - inference_on_dataset, - print_csv_format, -) -from detectron2.solver.build import get_default_optimizer_params, maybe_add_gradient_clipping -from detectron2.utils import comm -from detectron2.utils.events import EventWriter, get_event_storage - -from densepose import DensePoseDatasetMapperTTA, DensePoseGeneralizedRCNNWithTTA, load_from_cfg -from densepose.data import ( - DatasetMapper, - build_combined_loader, - build_detection_test_loader, - build_detection_train_loader, - build_inference_based_loaders, - has_inference_based_loaders, -) -from densepose.evaluation.d2_evaluator_adapter import Detectron2COCOEvaluatorAdapter -from densepose.evaluation.evaluator import DensePoseCOCOEvaluator, build_densepose_evaluator_storage -from densepose.modeling.cse import Embedder - - -class SampleCountingLoader: - def __init__(self, loader): - self.loader = loader - - def __iter__(self): - it = iter(self.loader) - storage = get_event_storage() - while True: - try: - batch = next(it) - num_inst_per_dataset = {} - for data in batch: - dataset_name = data["dataset"] - if dataset_name not in num_inst_per_dataset: - num_inst_per_dataset[dataset_name] = 0 - num_inst = len(data["instances"]) - num_inst_per_dataset[dataset_name] += num_inst - for dataset_name in num_inst_per_dataset: - storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name]) - yield batch - except StopIteration: - break - - -class SampleCountMetricPrinter(EventWriter): - def __init__(self): - self.logger = logging.getLogger(__name__) - - def write(self): - storage = get_event_storage() - batch_stats_strs = [] - for key, buf in storage.histories().items(): - if key.startswith("batch/"): - batch_stats_strs.append(f"{key} {buf.avg(20)}") - self.logger.info(", ".join(batch_stats_strs)) - - -class Trainer(DefaultTrainer): - @classmethod - def extract_embedder_from_model(cls, model: nn.Module) -> Optional[Embedder]: - if isinstance(model, nn.parallel.DistributedDataParallel): - model = model.module - if hasattr(model, "roi_heads") and hasattr(model.roi_heads, "embedder"): - return model.roi_heads.embedder - return None - - # TODO: the only reason to copy the base class code here is to pass the embedder from - # the model to the evaluator; that should be refactored to avoid unnecessary copy-pasting - @classmethod - def test( - cls, - cfg: CfgNode, - model: nn.Module, - evaluators: Optional[Union[DatasetEvaluator, List[DatasetEvaluator]]] = None, - ): - """ - Args: - cfg (CfgNode): - model (nn.Module): - evaluators (DatasetEvaluator, list[DatasetEvaluator] or None): if None, will call - :meth:`build_evaluator`. Otherwise, must have the same length as - ``cfg.DATASETS.TEST``. - - Returns: - dict: a dict of result metrics - """ - logger = logging.getLogger(__name__) - if isinstance(evaluators, DatasetEvaluator): - evaluators = [evaluators] - if evaluators is not None: - assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format( - len(cfg.DATASETS.TEST), len(evaluators) - ) - - results = OrderedDict() - for idx, dataset_name in enumerate(cfg.DATASETS.TEST): - data_loader = cls.build_test_loader(cfg, dataset_name) - # When evaluators are passed in as arguments, - # implicitly assume that evaluators can be created before data_loader. - if evaluators is not None: - evaluator = evaluators[idx] - else: - try: - embedder = cls.extract_embedder_from_model(model) - evaluator = cls.build_evaluator(cfg, dataset_name, embedder=embedder) - except NotImplementedError: - logger.warn( - "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, " - "or implement its `build_evaluator` method." - ) - results[dataset_name] = {} - continue - if cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE or comm.is_main_process(): - results_i = inference_on_dataset(model, data_loader, evaluator) - else: - results_i = {} - results[dataset_name] = results_i - if comm.is_main_process(): - assert isinstance( - results_i, dict - ), "Evaluator must return a dict on the main process. Got {} instead.".format( - results_i - ) - logger.info("Evaluation results for {} in csv format:".format(dataset_name)) - print_csv_format(results_i) - - if len(results) == 1: - results = list(results.values())[0] - return results - - @classmethod - def build_evaluator( - cls, - cfg: CfgNode, - dataset_name: str, - output_folder: Optional[str] = None, - embedder: Optional[Embedder] = None, - ) -> DatasetEvaluators: - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluators = [] - distributed = cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE - # Note: we currently use COCO evaluator for both COCO and LVIS datasets - # to have compatible metrics. LVIS bbox evaluator could also be used - # with an adapter to properly handle filtered / mapped categories - # evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - # if evaluator_type == "coco": - # evaluators.append(COCOEvaluator(dataset_name, output_dir=output_folder)) - # elif evaluator_type == "lvis": - # evaluators.append(LVISEvaluator(dataset_name, output_dir=output_folder)) - evaluators.append( - Detectron2COCOEvaluatorAdapter( - dataset_name, output_dir=output_folder, distributed=distributed - ) - ) - if cfg.MODEL.DENSEPOSE_ON: - storage = build_densepose_evaluator_storage(cfg, output_folder) - evaluators.append( - DensePoseCOCOEvaluator( - dataset_name, - distributed, - output_folder, - evaluator_type=cfg.DENSEPOSE_EVALUATION.TYPE, - min_iou_threshold=cfg.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD, - storage=storage, - embedder=embedder, - should_evaluate_mesh_alignment=cfg.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT, - mesh_alignment_mesh_names=cfg.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES, - ) - ) - return DatasetEvaluators(evaluators) - - @classmethod - def build_optimizer(cls, cfg: CfgNode, model: nn.Module): - params = get_default_optimizer_params( - model, - base_lr=cfg.SOLVER.BASE_LR, - weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM, - bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR, - weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS, - overrides={ - "features": { - "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR, - }, - "embeddings": { - "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR, - }, - }, - ) - optimizer = torch.optim.SGD( - params, - cfg.SOLVER.BASE_LR, - momentum=cfg.SOLVER.MOMENTUM, - nesterov=cfg.SOLVER.NESTEROV, - weight_decay=cfg.SOLVER.WEIGHT_DECAY, - ) - # pyre-fixme[6]: For 2nd param expected `Type[Optimizer]` but got `SGD`. - return maybe_add_gradient_clipping(cfg, optimizer) - - @classmethod - def build_test_loader(cls, cfg: CfgNode, dataset_name): - return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False)) - - @classmethod - def build_train_loader(cls, cfg: CfgNode): - data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True)) - if not has_inference_based_loaders(cfg): - return data_loader - model = cls.build_model(cfg) - model.to(cfg.BOOTSTRAP_MODEL.DEVICE) - DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False) - inference_based_loaders, ratios = build_inference_based_loaders(cfg, model) - loaders = [data_loader] + inference_based_loaders - ratios = [1.0] + ratios - combined_data_loader = build_combined_loader(cfg, loaders, ratios) - sample_counting_loader = SampleCountingLoader(combined_data_loader) - return sample_counting_loader - - def build_writers(self): - writers = super().build_writers() - writers.append(SampleCountMetricPrinter()) - return writers - - @classmethod - def test_with_TTA(cls, cfg: CfgNode, model): - logger = logging.getLogger("detectron2.trainer") - # In the end of training, run an evaluation with TTA - # Only support some R-CNN models. - logger.info("Running inference with test-time augmentation ...") - transform_data = load_from_cfg(cfg) - model = DensePoseGeneralizedRCNNWithTTA( - cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg) - ) - evaluators = [ - cls.build_evaluator( - cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") - ) - for name in cfg.DATASETS.TEST - ] - res = cls.test(cfg, model, evaluators) # pyre-ignore[6] - res = OrderedDict({k + "_TTA": v for k, v in res.items()}) - return res diff --git a/detectron2/projects/DensePose/densepose/evaluation/__init__.py b/detectron2/projects/DensePose/densepose/evaluation/__init__.py deleted file mode 100644 index cffabf0808c913a309b791ba8869c80db52a0ac8..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/evaluation/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .evaluator import DensePoseCOCOEvaluator diff --git a/detectron2/projects/DensePose/densepose/evaluation/d2_evaluator_adapter.py b/detectron2/projects/DensePose/densepose/evaluation/d2_evaluator_adapter.py deleted file mode 100644 index a7fbb9e34f42bce02c71eab9efad742491c6b4aa..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/evaluation/d2_evaluator_adapter.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from detectron2.data.catalog import Metadata -from detectron2.evaluation import COCOEvaluator - -from densepose.data.datasets.coco import ( - get_contiguous_id_to_category_id_map, - maybe_filter_categories_cocoapi, -) - - -def _maybe_add_iscrowd_annotations(cocoapi) -> None: - for ann in cocoapi.dataset["annotations"]: - if "iscrowd" not in ann: - ann["iscrowd"] = 0 - - -class Detectron2COCOEvaluatorAdapter(COCOEvaluator): - def __init__( - self, - dataset_name, - output_dir=None, - distributed=True, - ): - super().__init__(dataset_name, output_dir=output_dir, distributed=distributed) - maybe_filter_categories_cocoapi(dataset_name, self._coco_api) - _maybe_add_iscrowd_annotations(self._coco_api) - # substitute category metadata to account for categories - # that are mapped to the same contiguous id - if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): - self._maybe_substitute_metadata() - - def _maybe_substitute_metadata(self): - cont_id_2_cat_id = get_contiguous_id_to_category_id_map(self._metadata) - cat_id_2_cont_id = self._metadata.thing_dataset_id_to_contiguous_id - if len(cont_id_2_cat_id) == len(cat_id_2_cont_id): - return - - cat_id_2_cont_id_injective = {} - for cat_id, cont_id in cat_id_2_cont_id.items(): - if (cont_id in cont_id_2_cat_id) and (cont_id_2_cat_id[cont_id] == cat_id): - cat_id_2_cont_id_injective[cat_id] = cont_id - - metadata_new = Metadata(name=self._metadata.name) - for key, value in self._metadata.__dict__.items(): - if key == "thing_dataset_id_to_contiguous_id": - setattr(metadata_new, key, cat_id_2_cont_id_injective) - else: - setattr(metadata_new, key, value) - self._metadata = metadata_new diff --git a/detectron2/projects/DensePose/densepose/evaluation/densepose_coco_evaluation.py b/detectron2/projects/DensePose/densepose/evaluation/densepose_coco_evaluation.py deleted file mode 100644 index 16bcec6a08921eb62f22ece337821d7ce9e7e591..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/evaluation/densepose_coco_evaluation.py +++ /dev/null @@ -1,1305 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# All rights reserved. -# -# This source code is licensed under the license found in the -# LICENSE file in the root directory of this source tree. -# This is a modified version of cocoeval.py where we also have the densepose evaluation. - -# pyre-unsafe - -__author__ = "tsungyi" - -import copy -import datetime -import logging -import numpy as np -import pickle -import time -from collections import defaultdict -from enum import Enum -from typing import Any, Dict, Tuple -import scipy.spatial.distance as ssd -import torch -import torch.nn.functional as F -from pycocotools import mask as maskUtils -from scipy.io import loadmat -from scipy.ndimage import zoom as spzoom - -from detectron2.utils.file_io import PathManager - -from densepose.converters.chart_output_to_chart_result import resample_uv_tensors_to_bbox -from densepose.converters.segm_to_mask import ( - resample_coarse_segm_tensor_to_bbox, - resample_fine_and_coarse_segm_tensors_to_bbox, -) -from densepose.modeling.cse.utils import squared_euclidean_distance_matrix -from densepose.structures import DensePoseDataRelative -from densepose.structures.mesh import create_mesh - -logger = logging.getLogger(__name__) - - -class DensePoseEvalMode(str, Enum): - # use both masks and geodesic distances (GPS * IOU) to compute scores - GPSM = "gpsm" - # use only geodesic distances (GPS) to compute scores - GPS = "gps" - # use only masks (IOU) to compute scores - IOU = "iou" - - -class DensePoseDataMode(str, Enum): - # use estimated IUV data (default mode) - IUV_DT = "iuvdt" - # use ground truth IUV data - IUV_GT = "iuvgt" - # use ground truth labels I and set UV to 0 - I_GT_UV_0 = "igtuv0" - # use ground truth labels I and estimated UV coordinates - I_GT_UV_DT = "igtuvdt" - # use estimated labels I and set UV to 0 - I_DT_UV_0 = "idtuv0" - - -class DensePoseCocoEval: - # Interface for evaluating detection on the Microsoft COCO dataset. - # - # The usage for CocoEval is as follows: - # cocoGt=..., cocoDt=... # load dataset and results - # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object - # E.params.recThrs = ...; # set parameters as desired - # E.evaluate(); # run per image evaluation - # E.accumulate(); # accumulate per image results - # E.summarize(); # display summary metrics of results - # For example usage see evalDemo.m and http://mscoco.org/. - # - # The evaluation parameters are as follows (defaults in brackets): - # imgIds - [all] N img ids to use for evaluation - # catIds - [all] K cat ids to use for evaluation - # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation - # recThrs - [0:.01:1] R=101 recall thresholds for evaluation - # areaRng - [...] A=4 object area ranges for evaluation - # maxDets - [1 10 100] M=3 thresholds on max detections per image - # iouType - ['segm'] set iouType to 'segm', 'bbox', 'keypoints' or 'densepose' - # iouType replaced the now DEPRECATED useSegm parameter. - # useCats - [1] if true use category labels for evaluation - # Note: if useCats=0 category labels are ignored as in proposal scoring. - # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified. - # - # evaluate(): evaluates detections on every image and every category and - # concats the results into the "evalImgs" with fields: - # dtIds - [1xD] id for each of the D detections (dt) - # gtIds - [1xG] id for each of the G ground truths (gt) - # dtMatches - [TxD] matching gt id at each IoU or 0 - # gtMatches - [TxG] matching dt id at each IoU or 0 - # dtScores - [1xD] confidence of each dt - # gtIgnore - [1xG] ignore flag for each gt - # dtIgnore - [TxD] ignore flag for each dt at each IoU - # - # accumulate(): accumulates the per-image, per-category evaluation - # results in "evalImgs" into the dictionary "eval" with fields: - # params - parameters used for evaluation - # date - date evaluation was performed - # counts - [T,R,K,A,M] parameter dimensions (see above) - # precision - [TxRxKxAxM] precision for every evaluation setting - # recall - [TxKxAxM] max recall for every evaluation setting - # Note: precision and recall==-1 for settings with no gt objects. - # - # See also coco, mask, pycocoDemo, pycocoEvalDemo - # - # Microsoft COCO Toolbox. version 2.0 - # Data, paper, and tutorials available at: http://mscoco.org/ - # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. - # Licensed under the Simplified BSD License [see coco/license.txt] - def __init__( - self, - cocoGt=None, - cocoDt=None, - iouType: str = "densepose", - multi_storage=None, - embedder=None, - dpEvalMode: DensePoseEvalMode = DensePoseEvalMode.GPS, - dpDataMode: DensePoseDataMode = DensePoseDataMode.IUV_DT, - ): - """ - Initialize CocoEval using coco APIs for gt and dt - :param cocoGt: coco object with ground truth annotations - :param cocoDt: coco object with detection results - :return: None - """ - self.cocoGt = cocoGt # ground truth COCO API - self.cocoDt = cocoDt # detections COCO API - self.multi_storage = multi_storage - self.embedder = embedder - self._dpEvalMode = dpEvalMode - self._dpDataMode = dpDataMode - self.evalImgs = defaultdict(list) # per-image per-category eval results [KxAxI] - self.eval = {} # accumulated evaluation results - self._gts = defaultdict(list) # gt for evaluation - self._dts = defaultdict(list) # dt for evaluation - self.params = Params(iouType=iouType) # parameters - self._paramsEval = {} # parameters for evaluation - self.stats = [] # result summarization - self.ious = {} # ious between all gts and dts - if cocoGt is not None: - self.params.imgIds = sorted(cocoGt.getImgIds()) - self.params.catIds = sorted(cocoGt.getCatIds()) - self.ignoreThrBB = 0.7 - self.ignoreThrUV = 0.9 - - def _loadGEval(self): - smpl_subdiv_fpath = PathManager.get_local_path( - "https://dl.fbaipublicfiles.com/densepose/data/SMPL_subdiv.mat" - ) - pdist_transform_fpath = PathManager.get_local_path( - "https://dl.fbaipublicfiles.com/densepose/data/SMPL_SUBDIV_TRANSFORM.mat" - ) - pdist_matrix_fpath = PathManager.get_local_path( - "https://dl.fbaipublicfiles.com/densepose/data/Pdist_matrix.pkl", timeout_sec=120 - ) - SMPL_subdiv = loadmat(smpl_subdiv_fpath) - self.PDIST_transform = loadmat(pdist_transform_fpath) - self.PDIST_transform = self.PDIST_transform["index"].squeeze() - UV = np.array([SMPL_subdiv["U_subdiv"], SMPL_subdiv["V_subdiv"]]).squeeze() - ClosestVertInds = np.arange(UV.shape[1]) + 1 - self.Part_UVs = [] - self.Part_ClosestVertInds = [] - for i in np.arange(24): - self.Part_UVs.append(UV[:, SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)]) - self.Part_ClosestVertInds.append( - ClosestVertInds[SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)] - ) - - with open(pdist_matrix_fpath, "rb") as hFile: - arrays = pickle.load(hFile, encoding="latin1") - self.Pdist_matrix = arrays["Pdist_matrix"] - self.Part_ids = np.array(SMPL_subdiv["Part_ID_subdiv"].squeeze()) - # Mean geodesic distances for parts. - self.Mean_Distances = np.array([0, 0.351, 0.107, 0.126, 0.237, 0.173, 0.142, 0.128, 0.150]) - # Coarse Part labels. - self.CoarseParts = np.array( - [0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8] - ) - - def _prepare(self): - """ - Prepare ._gts and ._dts for evaluation based on params - :return: None - """ - - def _toMask(anns, coco): - # modify ann['segmentation'] by reference - for ann in anns: - # safeguard for invalid segmentation annotation; - # annotations containing empty lists exist in the posetrack - # dataset. This is not a correct segmentation annotation - # in terms of COCO format; we need to deal with it somehow - segm = ann["segmentation"] - if type(segm) is list and len(segm) == 0: - ann["segmentation"] = None - continue - rle = coco.annToRLE(ann) - ann["segmentation"] = rle - - def _getIgnoreRegion(iid, coco): - img = coco.imgs[iid] - - if "ignore_regions_x" not in img.keys(): - return None - - if len(img["ignore_regions_x"]) == 0: - return None - - rgns_merged = [ - [v for xy in zip(region_x, region_y) for v in xy] - for region_x, region_y in zip(img["ignore_regions_x"], img["ignore_regions_y"]) - ] - rles = maskUtils.frPyObjects(rgns_merged, img["height"], img["width"]) - rle = maskUtils.merge(rles) - return maskUtils.decode(rle) - - def _checkIgnore(dt, iregion): - if iregion is None: - return True - - bb = np.array(dt["bbox"]).astype(int) - x1, y1, x2, y2 = bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3] - x2 = min([x2, iregion.shape[1]]) - y2 = min([y2, iregion.shape[0]]) - - if bb[2] * bb[3] == 0: - return False - - crop_iregion = iregion[y1:y2, x1:x2] - - if crop_iregion.sum() == 0: - return True - - if "densepose" not in dt.keys(): # filtering boxes - return crop_iregion.sum() / bb[2] / bb[3] < self.ignoreThrBB - - # filtering UVs - ignoremask = np.require(crop_iregion, requirements=["F"]) - mask = self._extract_mask(dt) - uvmask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"]) - uvmask_ = maskUtils.encode(uvmask) - ignoremask_ = maskUtils.encode(ignoremask) - uviou = maskUtils.iou([uvmask_], [ignoremask_], [1])[0] - return uviou < self.ignoreThrUV - - p = self.params - - if p.useCats: - gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) - dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) - else: - gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds)) - dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds)) - - imns = self.cocoGt.loadImgs(p.imgIds) - self.size_mapping = {} - for im in imns: - self.size_mapping[im["id"]] = [im["height"], im["width"]] - - # if iouType == 'uv', add point gt annotations - if p.iouType == "densepose": - self._loadGEval() - - # convert ground truth to mask if iouType == 'segm' - if p.iouType == "segm": - _toMask(gts, self.cocoGt) - _toMask(dts, self.cocoDt) - - # set ignore flag - for gt in gts: - gt["ignore"] = gt["ignore"] if "ignore" in gt else 0 - gt["ignore"] = "iscrowd" in gt and gt["iscrowd"] - if p.iouType == "keypoints": - gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"] - if p.iouType == "densepose": - gt["ignore"] = ("dp_x" in gt) == 0 - if p.iouType == "segm": - gt["ignore"] = gt["segmentation"] is None - - self._gts = defaultdict(list) # gt for evaluation - self._dts = defaultdict(list) # dt for evaluation - self._igrgns = defaultdict(list) - - for gt in gts: - iid = gt["image_id"] - if iid not in self._igrgns.keys(): - self._igrgns[iid] = _getIgnoreRegion(iid, self.cocoGt) - if _checkIgnore(gt, self._igrgns[iid]): - self._gts[iid, gt["category_id"]].append(gt) - for dt in dts: - iid = dt["image_id"] - if (iid not in self._igrgns) or _checkIgnore(dt, self._igrgns[iid]): - self._dts[iid, dt["category_id"]].append(dt) - - self.evalImgs = defaultdict(list) # per-image per-category evaluation results - self.eval = {} # accumulated evaluation results - - def evaluate(self): - """ - Run per image evaluation on given images and store results (a list of dict) in self.evalImgs - :return: None - """ - tic = time.time() - logger.info("Running per image DensePose evaluation... {}".format(self.params.iouType)) - p = self.params - # add backward compatibility if useSegm is specified in params - if p.useSegm is not None: - p.iouType = "segm" if p.useSegm == 1 else "bbox" - logger.info("useSegm (deprecated) is not None. Running DensePose evaluation") - p.imgIds = list(np.unique(p.imgIds)) - if p.useCats: - p.catIds = list(np.unique(p.catIds)) - p.maxDets = sorted(p.maxDets) - self.params = p - - self._prepare() - # loop through images, area range, max detection number - catIds = p.catIds if p.useCats else [-1] - - if p.iouType in ["segm", "bbox"]: - computeIoU = self.computeIoU - elif p.iouType == "keypoints": - computeIoU = self.computeOks - elif p.iouType == "densepose": - computeIoU = self.computeOgps - if self._dpEvalMode in {DensePoseEvalMode.GPSM, DensePoseEvalMode.IOU}: - self.real_ious = { - (imgId, catId): self.computeDPIoU(imgId, catId) - for imgId in p.imgIds - for catId in catIds - } - - self.ious = { - (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds - } - - evaluateImg = self.evaluateImg - maxDet = p.maxDets[-1] - self.evalImgs = [ - evaluateImg(imgId, catId, areaRng, maxDet) - for catId in catIds - for areaRng in p.areaRng - for imgId in p.imgIds - ] - self._paramsEval = copy.deepcopy(self.params) - toc = time.time() - logger.info("DensePose evaluation DONE (t={:0.2f}s).".format(toc - tic)) - - def getDensePoseMask(self, polys): - maskGen = np.zeros([256, 256]) - stop = min(len(polys) + 1, 15) - for i in range(1, stop): - if polys[i - 1]: - currentMask = maskUtils.decode(polys[i - 1]) - maskGen[currentMask > 0] = i - return maskGen - - def _generate_rlemask_on_image(self, mask, imgId, data): - bbox_xywh = np.array(data["bbox"]) - x, y, w, h = bbox_xywh - im_h, im_w = self.size_mapping[imgId] - im_mask = np.zeros((im_h, im_w), dtype=np.uint8) - if mask is not None: - x0 = max(int(x), 0) - x1 = min(int(x + w), im_w, int(x) + mask.shape[1]) - y0 = max(int(y), 0) - y1 = min(int(y + h), im_h, int(y) + mask.shape[0]) - y = int(y) - x = int(x) - im_mask[y0:y1, x0:x1] = mask[y0 - y : y1 - y, x0 - x : x1 - x] - im_mask = np.require(np.asarray(im_mask > 0), dtype=np.uint8, requirements=["F"]) - rle_mask = maskUtils.encode(np.array(im_mask[:, :, np.newaxis], order="F"))[0] - return rle_mask - - def computeDPIoU(self, imgId, catId): - p = self.params - if p.useCats: - gt = self._gts[imgId, catId] - dt = self._dts[imgId, catId] - else: - gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] - dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] - if len(gt) == 0 and len(dt) == 0: - return [] - inds = np.argsort([-d["score"] for d in dt], kind="mergesort") - dt = [dt[i] for i in inds] - if len(dt) > p.maxDets[-1]: - dt = dt[0 : p.maxDets[-1]] - - gtmasks = [] - for g in gt: - if DensePoseDataRelative.S_KEY in g: - # convert DensePose mask to a binary mask - mask = np.minimum(self.getDensePoseMask(g[DensePoseDataRelative.S_KEY]), 1.0) - _, _, w, h = g["bbox"] - scale_x = float(max(w, 1)) / mask.shape[1] - scale_y = float(max(h, 1)) / mask.shape[0] - mask = spzoom(mask, (scale_y, scale_x), order=1, prefilter=False) - mask = np.array(mask > 0.5, dtype=np.uint8) - rle_mask = self._generate_rlemask_on_image(mask, imgId, g) - elif "segmentation" in g: - segmentation = g["segmentation"] - if isinstance(segmentation, list) and segmentation: - # polygons - im_h, im_w = self.size_mapping[imgId] - rles = maskUtils.frPyObjects(segmentation, im_h, im_w) - rle_mask = maskUtils.merge(rles) - elif isinstance(segmentation, dict): - if isinstance(segmentation["counts"], list): - # uncompressed RLE - im_h, im_w = self.size_mapping[imgId] - rle_mask = maskUtils.frPyObjects(segmentation, im_h, im_w) - else: - # compressed RLE - rle_mask = segmentation - else: - rle_mask = self._generate_rlemask_on_image(None, imgId, g) - else: - rle_mask = self._generate_rlemask_on_image(None, imgId, g) - gtmasks.append(rle_mask) - - dtmasks = [] - for d in dt: - mask = self._extract_mask(d) - mask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"]) - rle_mask = self._generate_rlemask_on_image(mask, imgId, d) - dtmasks.append(rle_mask) - - # compute iou between each dt and gt region - iscrowd = [int(o.get("iscrowd", 0)) for o in gt] - iousDP = maskUtils.iou(dtmasks, gtmasks, iscrowd) - return iousDP - - def computeIoU(self, imgId, catId): - p = self.params - if p.useCats: - gt = self._gts[imgId, catId] - dt = self._dts[imgId, catId] - else: - gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] - dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] - if len(gt) == 0 and len(dt) == 0: - return [] - inds = np.argsort([-d["score"] for d in dt], kind="mergesort") - dt = [dt[i] for i in inds] - if len(dt) > p.maxDets[-1]: - dt = dt[0 : p.maxDets[-1]] - - if p.iouType == "segm": - g = [g["segmentation"] for g in gt if g["segmentation"] is not None] - d = [d["segmentation"] for d in dt if d["segmentation"] is not None] - elif p.iouType == "bbox": - g = [g["bbox"] for g in gt] - d = [d["bbox"] for d in dt] - else: - raise Exception("unknown iouType for iou computation") - - # compute iou between each dt and gt region - iscrowd = [int(o.get("iscrowd", 0)) for o in gt] - ious = maskUtils.iou(d, g, iscrowd) - return ious - - def computeOks(self, imgId, catId): - p = self.params - # dimension here should be Nxm - gts = self._gts[imgId, catId] - dts = self._dts[imgId, catId] - inds = np.argsort([-d["score"] for d in dts], kind="mergesort") - dts = [dts[i] for i in inds] - if len(dts) > p.maxDets[-1]: - dts = dts[0 : p.maxDets[-1]] - # if len(gts) == 0 and len(dts) == 0: - if len(gts) == 0 or len(dts) == 0: - return [] - ious = np.zeros((len(dts), len(gts))) - sigmas = ( - np.array( - [ - 0.26, - 0.25, - 0.25, - 0.35, - 0.35, - 0.79, - 0.79, - 0.72, - 0.72, - 0.62, - 0.62, - 1.07, - 1.07, - 0.87, - 0.87, - 0.89, - 0.89, - ] - ) - / 10.0 - ) - vars = (sigmas * 2) ** 2 - k = len(sigmas) - # compute oks between each detection and ground truth object - for j, gt in enumerate(gts): - # create bounds for ignore regions(double the gt bbox) - g = np.array(gt["keypoints"]) - xg = g[0::3] - yg = g[1::3] - vg = g[2::3] - k1 = np.count_nonzero(vg > 0) - bb = gt["bbox"] - x0 = bb[0] - bb[2] - x1 = bb[0] + bb[2] * 2 - y0 = bb[1] - bb[3] - y1 = bb[1] + bb[3] * 2 - for i, dt in enumerate(dts): - d = np.array(dt["keypoints"]) - xd = d[0::3] - yd = d[1::3] - if k1 > 0: - # measure the per-keypoint distance if keypoints visible - dx = xd - xg - dy = yd - yg - else: - # measure minimum distance to keypoints in (x0,y0) & (x1,y1) - z = np.zeros(k) - dx = np.max((z, x0 - xd), axis=0) + np.max((z, xd - x1), axis=0) - dy = np.max((z, y0 - yd), axis=0) + np.max((z, yd - y1), axis=0) - e = (dx**2 + dy**2) / vars / (gt["area"] + np.spacing(1)) / 2 - if k1 > 0: - e = e[vg > 0] - ious[i, j] = np.sum(np.exp(-e)) / e.shape[0] - return ious - - def _extract_mask(self, dt: Dict[str, Any]) -> np.ndarray: - if "densepose" in dt: - densepose_results_quantized = dt["densepose"] - return densepose_results_quantized.labels_uv_uint8[0].numpy() - elif "cse_mask" in dt: - return dt["cse_mask"] - elif "coarse_segm" in dt: - dy = max(int(dt["bbox"][3]), 1) - dx = max(int(dt["bbox"][2]), 1) - return ( - F.interpolate( - dt["coarse_segm"].unsqueeze(0), - (dy, dx), - mode="bilinear", - align_corners=False, - ) - .squeeze(0) - .argmax(0) - .numpy() - .astype(np.uint8) - ) - elif "record_id" in dt: - assert ( - self.multi_storage is not None - ), f"Storage record id encountered in a detection {dt}, but no storage provided!" - record = self.multi_storage.get(dt["rank"], dt["record_id"]) - coarse_segm = record["coarse_segm"] - dy = max(int(dt["bbox"][3]), 1) - dx = max(int(dt["bbox"][2]), 1) - return ( - F.interpolate( - coarse_segm.unsqueeze(0), - (dy, dx), - mode="bilinear", - align_corners=False, - ) - .squeeze(0) - .argmax(0) - .numpy() - .astype(np.uint8) - ) - else: - raise Exception(f"No mask data in the detection: {dt}") - raise ValueError('The prediction dict needs to contain either "densepose" or "cse_mask"') - - def _extract_iuv( - self, densepose_data: np.ndarray, py: np.ndarray, px: np.ndarray, gt: Dict[str, Any] - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - """ - Extract arrays of I, U and V values at given points as numpy arrays - given the data mode stored in self._dpDataMode - """ - if self._dpDataMode == DensePoseDataMode.IUV_DT: - # estimated labels and UV (default) - ipoints = densepose_data[0, py, px] - upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255. - vpoints = densepose_data[2, py, px] / 255.0 - elif self._dpDataMode == DensePoseDataMode.IUV_GT: - # ground truth - ipoints = np.array(gt["dp_I"]) - upoints = np.array(gt["dp_U"]) - vpoints = np.array(gt["dp_V"]) - elif self._dpDataMode == DensePoseDataMode.I_GT_UV_0: - # ground truth labels, UV = 0 - ipoints = np.array(gt["dp_I"]) - upoints = upoints * 0.0 - vpoints = vpoints * 0.0 - elif self._dpDataMode == DensePoseDataMode.I_GT_UV_DT: - # ground truth labels, estimated UV - ipoints = np.array(gt["dp_I"]) - upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255. - vpoints = densepose_data[2, py, px] / 255.0 - elif self._dpDataMode == DensePoseDataMode.I_DT_UV_0: - # estimated labels, UV = 0 - ipoints = densepose_data[0, py, px] - upoints = upoints * 0.0 - vpoints = vpoints * 0.0 - else: - raise ValueError(f"Unknown data mode: {self._dpDataMode}") - return ipoints, upoints, vpoints - - def computeOgps_single_pair(self, dt, gt, py, px, pt_mask): - if "densepose" in dt: - ipoints, upoints, vpoints = self.extract_iuv_from_quantized(dt, gt, py, px, pt_mask) - return self.computeOgps_single_pair_iuv(dt, gt, ipoints, upoints, vpoints) - elif "u" in dt: - ipoints, upoints, vpoints = self.extract_iuv_from_raw(dt, gt, py, px, pt_mask) - return self.computeOgps_single_pair_iuv(dt, gt, ipoints, upoints, vpoints) - elif "record_id" in dt: - assert ( - self.multi_storage is not None - ), f"Storage record id encountered in detection {dt}, but no storage provided!" - record = self.multi_storage.get(dt["rank"], dt["record_id"]) - record["bbox"] = dt["bbox"] - if "u" in record: - ipoints, upoints, vpoints = self.extract_iuv_from_raw(record, gt, py, px, pt_mask) - return self.computeOgps_single_pair_iuv(dt, gt, ipoints, upoints, vpoints) - elif "embedding" in record: - return self.computeOgps_single_pair_cse( - dt, - gt, - py, - px, - pt_mask, - record["coarse_segm"], - record["embedding"], - record["bbox"], - ) - else: - raise Exception(f"Unknown record format: {record}") - elif "embedding" in dt: - return self.computeOgps_single_pair_cse( - dt, gt, py, px, pt_mask, dt["coarse_segm"], dt["embedding"], dt["bbox"] - ) - raise Exception(f"Unknown detection format: {dt}") - - def extract_iuv_from_quantized(self, dt, gt, py, px, pt_mask): - densepose_results_quantized = dt["densepose"] - ipoints, upoints, vpoints = self._extract_iuv( - densepose_results_quantized.labels_uv_uint8.numpy(), py, px, gt - ) - ipoints[pt_mask == -1] = 0 - return ipoints, upoints, vpoints - - def extract_iuv_from_raw(self, dt, gt, py, px, pt_mask): - labels_dt = resample_fine_and_coarse_segm_tensors_to_bbox( - dt["fine_segm"].unsqueeze(0), - dt["coarse_segm"].unsqueeze(0), - dt["bbox"], - ) - uv = resample_uv_tensors_to_bbox( - dt["u"].unsqueeze(0), dt["v"].unsqueeze(0), labels_dt.squeeze(0), dt["bbox"] - ) - labels_uv_uint8 = torch.cat((labels_dt.byte(), (uv * 255).clamp(0, 255).byte())) - ipoints, upoints, vpoints = self._extract_iuv(labels_uv_uint8.numpy(), py, px, gt) - ipoints[pt_mask == -1] = 0 - return ipoints, upoints, vpoints - - def computeOgps_single_pair_iuv(self, dt, gt, ipoints, upoints, vpoints): - cVertsGT, ClosestVertsGTTransformed = self.findAllClosestVertsGT(gt) - cVerts = self.findAllClosestVertsUV(upoints, vpoints, ipoints) - # Get pairwise geodesic distances between gt and estimated mesh points. - dist = self.getDistancesUV(ClosestVertsGTTransformed, cVerts) - # Compute the Ogps measure. - # Find the mean geodesic normalization distance for - # each GT point, based on which part it is on. - Current_Mean_Distances = self.Mean_Distances[ - self.CoarseParts[self.Part_ids[cVertsGT[cVertsGT > 0].astype(int) - 1]] - ] - return dist, Current_Mean_Distances - - def computeOgps_single_pair_cse( - self, dt, gt, py, px, pt_mask, coarse_segm, embedding, bbox_xywh_abs - ): - # 0-based mesh vertex indices - cVertsGT = torch.as_tensor(gt["dp_vertex"], dtype=torch.int64) - # label for each pixel of the bbox, [H, W] tensor of long - labels_dt = resample_coarse_segm_tensor_to_bbox( - coarse_segm.unsqueeze(0), bbox_xywh_abs - ).squeeze(0) - x, y, w, h = bbox_xywh_abs - # embedding for each pixel of the bbox, [D, H, W] tensor of float32 - embedding = F.interpolate( - embedding.unsqueeze(0), (int(h), int(w)), mode="bilinear", align_corners=False - ).squeeze(0) - # valid locations py, px - py_pt = torch.from_numpy(py[pt_mask > -1]) - px_pt = torch.from_numpy(px[pt_mask > -1]) - cVerts = torch.ones_like(cVertsGT) * -1 - cVerts[pt_mask > -1] = self.findClosestVertsCse( - embedding, py_pt, px_pt, labels_dt, gt["ref_model"] - ) - # Get pairwise geodesic distances between gt and estimated mesh points. - dist = self.getDistancesCse(cVertsGT, cVerts, gt["ref_model"]) - # normalize distances - if (gt["ref_model"] == "smpl_27554") and ("dp_I" in gt): - Current_Mean_Distances = self.Mean_Distances[ - self.CoarseParts[np.array(gt["dp_I"], dtype=int)] - ] - else: - Current_Mean_Distances = 0.255 - return dist, Current_Mean_Distances - - def computeOgps(self, imgId, catId): - p = self.params - # dimension here should be Nxm - g = self._gts[imgId, catId] - d = self._dts[imgId, catId] - inds = np.argsort([-d_["score"] for d_ in d], kind="mergesort") - d = [d[i] for i in inds] - if len(d) > p.maxDets[-1]: - d = d[0 : p.maxDets[-1]] - # if len(gts) == 0 and len(dts) == 0: - if len(g) == 0 or len(d) == 0: - return [] - ious = np.zeros((len(d), len(g))) - # compute opgs between each detection and ground truth object - # sigma = self.sigma #0.255 # dist = 0.3m corresponds to ogps = 0.5 - # 1 # dist = 0.3m corresponds to ogps = 0.96 - # 1.45 # dist = 1.7m (person height) corresponds to ogps = 0.5) - for j, gt in enumerate(g): - if not gt["ignore"]: - g_ = gt["bbox"] - for i, dt in enumerate(d): - # - dy = int(dt["bbox"][3]) - dx = int(dt["bbox"][2]) - dp_x = np.array(gt["dp_x"]) * g_[2] / 255.0 - dp_y = np.array(gt["dp_y"]) * g_[3] / 255.0 - py = (dp_y + g_[1] - dt["bbox"][1]).astype(int) - px = (dp_x + g_[0] - dt["bbox"][0]).astype(int) - # - pts = np.zeros(len(px)) - pts[px >= dx] = -1 - pts[py >= dy] = -1 - pts[px < 0] = -1 - pts[py < 0] = -1 - if len(pts) < 1: - ogps = 0.0 - elif np.max(pts) == -1: - ogps = 0.0 - else: - px[pts == -1] = 0 - py[pts == -1] = 0 - dists_between_matches, dist_norm_coeffs = self.computeOgps_single_pair( - dt, gt, py, px, pts - ) - # Compute gps - ogps_values = np.exp( - -(dists_between_matches**2) / (2 * (dist_norm_coeffs**2)) - ) - # - ogps = np.mean(ogps_values) if len(ogps_values) > 0 else 0.0 - ious[i, j] = ogps - - gbb = [gt["bbox"] for gt in g] - dbb = [dt["bbox"] for dt in d] - - # compute iou between each dt and gt region - iscrowd = [int(o.get("iscrowd", 0)) for o in g] - ious_bb = maskUtils.iou(dbb, gbb, iscrowd) - return ious, ious_bb - - def evaluateImg(self, imgId, catId, aRng, maxDet): - """ - perform evaluation for single category and image - :return: dict (single image results) - """ - - p = self.params - if p.useCats: - gt = self._gts[imgId, catId] - dt = self._dts[imgId, catId] - else: - gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] - dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] - if len(gt) == 0 and len(dt) == 0: - return None - - for g in gt: - # g['_ignore'] = g['ignore'] - if g["ignore"] or (g["area"] < aRng[0] or g["area"] > aRng[1]): - g["_ignore"] = True - else: - g["_ignore"] = False - - # sort dt highest score first, sort gt ignore last - gtind = np.argsort([g["_ignore"] for g in gt], kind="mergesort") - gt = [gt[i] for i in gtind] - dtind = np.argsort([-d["score"] for d in dt], kind="mergesort") - dt = [dt[i] for i in dtind[0:maxDet]] - iscrowd = [int(o.get("iscrowd", 0)) for o in gt] - # load computed ious - if p.iouType == "densepose": - # print('Checking the length', len(self.ious[imgId, catId])) - # if len(self.ious[imgId, catId]) == 0: - # print(self.ious[imgId, catId]) - ious = ( - self.ious[imgId, catId][0][:, gtind] - if len(self.ious[imgId, catId]) > 0 - else self.ious[imgId, catId] - ) - ioubs = ( - self.ious[imgId, catId][1][:, gtind] - if len(self.ious[imgId, catId]) > 0 - else self.ious[imgId, catId] - ) - if self._dpEvalMode in {DensePoseEvalMode.GPSM, DensePoseEvalMode.IOU}: - iousM = ( - self.real_ious[imgId, catId][:, gtind] - if len(self.real_ious[imgId, catId]) > 0 - else self.real_ious[imgId, catId] - ) - else: - ious = ( - self.ious[imgId, catId][:, gtind] - if len(self.ious[imgId, catId]) > 0 - else self.ious[imgId, catId] - ) - - T = len(p.iouThrs) - G = len(gt) - D = len(dt) - gtm = np.zeros((T, G)) - dtm = np.zeros((T, D)) - gtIg = np.array([g["_ignore"] for g in gt]) - dtIg = np.zeros((T, D)) - if np.all(gtIg) and p.iouType == "densepose": - dtIg = np.logical_or(dtIg, True) - - if len(ious) > 0: # and not p.iouType == 'densepose': - for tind, t in enumerate(p.iouThrs): - for dind, d in enumerate(dt): - # information about best match so far (m=-1 -> unmatched) - iou = min([t, 1 - 1e-10]) - m = -1 - for gind, _g in enumerate(gt): - # if this gt already matched, and not a crowd, continue - if gtm[tind, gind] > 0 and not iscrowd[gind]: - continue - # if dt matched to reg gt, and on ignore gt, stop - if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1: - break - if p.iouType == "densepose": - if self._dpEvalMode == DensePoseEvalMode.GPSM: - new_iou = np.sqrt(iousM[dind, gind] * ious[dind, gind]) - elif self._dpEvalMode == DensePoseEvalMode.IOU: - new_iou = iousM[dind, gind] - elif self._dpEvalMode == DensePoseEvalMode.GPS: - new_iou = ious[dind, gind] - else: - new_iou = ious[dind, gind] - if new_iou < iou: - continue - if new_iou == 0.0: - continue - # if match successful and best so far, store appropriately - iou = new_iou - m = gind - # if match made store id of match for both dt and gt - if m == -1: - continue - dtIg[tind, dind] = gtIg[m] - dtm[tind, dind] = gt[m]["id"] - gtm[tind, m] = d["id"] - - if p.iouType == "densepose": - if not len(ioubs) == 0: - for dind, d in enumerate(dt): - # information about best match so far (m=-1 -> unmatched) - if dtm[tind, dind] == 0: - ioub = 0.8 - m = -1 - for gind, _g in enumerate(gt): - # if this gt already matched, and not a crowd, continue - if gtm[tind, gind] > 0 and not iscrowd[gind]: - continue - # continue to next gt unless better match made - if ioubs[dind, gind] < ioub: - continue - # if match successful and best so far, store appropriately - ioub = ioubs[dind, gind] - m = gind - # if match made store id of match for both dt and gt - if m > -1: - dtIg[:, dind] = gtIg[m] - if gtIg[m]: - dtm[tind, dind] = gt[m]["id"] - gtm[tind, m] = d["id"] - # set unmatched detections outside of area range to ignore - a = np.array([d["area"] < aRng[0] or d["area"] > aRng[1] for d in dt]).reshape((1, len(dt))) - dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0))) - # store results for given image and category - # print('Done with the function', len(self.ious[imgId, catId])) - return { - "image_id": imgId, - "category_id": catId, - "aRng": aRng, - "maxDet": maxDet, - "dtIds": [d["id"] for d in dt], - "gtIds": [g["id"] for g in gt], - "dtMatches": dtm, - "gtMatches": gtm, - "dtScores": [d["score"] for d in dt], - "gtIgnore": gtIg, - "dtIgnore": dtIg, - } - - def accumulate(self, p=None): - """ - Accumulate per image evaluation results and store the result in self.eval - :param p: input params for evaluation - :return: None - """ - logger.info("Accumulating evaluation results...") - tic = time.time() - if not self.evalImgs: - logger.info("Please run evaluate() first") - # allows input customized parameters - if p is None: - p = self.params - p.catIds = p.catIds if p.useCats == 1 else [-1] - T = len(p.iouThrs) - R = len(p.recThrs) - K = len(p.catIds) if p.useCats else 1 - A = len(p.areaRng) - M = len(p.maxDets) - precision = -(np.ones((T, R, K, A, M))) # -1 for the precision of absent categories - recall = -(np.ones((T, K, A, M))) - - # create dictionary for future indexing - logger.info("Categories: {}".format(p.catIds)) - _pe = self._paramsEval - catIds = _pe.catIds if _pe.useCats else [-1] - setK = set(catIds) - setA = set(map(tuple, _pe.areaRng)) - setM = set(_pe.maxDets) - setI = set(_pe.imgIds) - # get inds to evaluate - k_list = [n for n, k in enumerate(p.catIds) if k in setK] - m_list = [m for n, m in enumerate(p.maxDets) if m in setM] - a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] - i_list = [n for n, i in enumerate(p.imgIds) if i in setI] - I0 = len(_pe.imgIds) - A0 = len(_pe.areaRng) - # retrieve E at each category, area range, and max number of detections - for k, k0 in enumerate(k_list): - Nk = k0 * A0 * I0 - for a, a0 in enumerate(a_list): - Na = a0 * I0 - for m, maxDet in enumerate(m_list): - E = [self.evalImgs[Nk + Na + i] for i in i_list] - E = [e for e in E if e is not None] - if len(E) == 0: - continue - dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E]) - - # different sorting method generates slightly different results. - # mergesort is used to be consistent as Matlab implementation. - inds = np.argsort(-dtScores, kind="mergesort") - - dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds] - dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds] - gtIg = np.concatenate([e["gtIgnore"] for e in E]) - npig = np.count_nonzero(gtIg == 0) - if npig == 0: - continue - tps = np.logical_and(dtm, np.logical_not(dtIg)) - fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) - tp_sum = np.cumsum(tps, axis=1).astype(dtype=float) - fp_sum = np.cumsum(fps, axis=1).astype(dtype=float) - for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): - tp = np.array(tp) - fp = np.array(fp) - nd = len(tp) - rc = tp / npig - pr = tp / (fp + tp + np.spacing(1)) - q = np.zeros((R,)) - - if nd: - recall[t, k, a, m] = rc[-1] - else: - recall[t, k, a, m] = 0 - - # numpy is slow without cython optimization for accessing elements - # use python array gets significant speed improvement - pr = pr.tolist() - q = q.tolist() - - for i in range(nd - 1, 0, -1): - if pr[i] > pr[i - 1]: - pr[i - 1] = pr[i] - - inds = np.searchsorted(rc, p.recThrs, side="left") - try: - for ri, pi in enumerate(inds): - q[ri] = pr[pi] - except Exception: - pass - precision[t, :, k, a, m] = np.array(q) - logger.info( - "Final: max precision {}, min precision {}".format(np.max(precision), np.min(precision)) - ) - self.eval = { - "params": p, - "counts": [T, R, K, A, M], - "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "precision": precision, - "recall": recall, - } - toc = time.time() - logger.info("DONE (t={:0.2f}s).".format(toc - tic)) - - def summarize(self): - """ - Compute and display summary metrics for evaluation results. - Note this function can *only* be applied on the default parameter setting - """ - - def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100): - p = self.params - iStr = " {:<18} {} @[ {}={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}" - titleStr = "Average Precision" if ap == 1 else "Average Recall" - typeStr = "(AP)" if ap == 1 else "(AR)" - measure = "IoU" - if self.params.iouType == "keypoints": - measure = "OKS" - elif self.params.iouType == "densepose": - measure = "OGPS" - iouStr = ( - "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1]) - if iouThr is None - else "{:0.2f}".format(iouThr) - ) - - aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] - mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] - if ap == 1: - # dimension of precision: [TxRxKxAxM] - s = self.eval["precision"] - # IoU - if iouThr is not None: - t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0] - s = s[t] - s = s[:, :, :, aind, mind] - else: - # dimension of recall: [TxKxAxM] - s = self.eval["recall"] - if iouThr is not None: - t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0] - s = s[t] - s = s[:, :, aind, mind] - if len(s[s > -1]) == 0: - mean_s = -1 - else: - mean_s = np.mean(s[s > -1]) - logger.info(iStr.format(titleStr, typeStr, measure, iouStr, areaRng, maxDets, mean_s)) - return mean_s - - def _summarizeDets(): - stats = np.zeros((12,)) - stats[0] = _summarize(1) - stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2]) - stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2]) - stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2]) - stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2]) - stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2]) - stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) - stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) - stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) - stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2]) - stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2]) - stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2]) - return stats - - def _summarizeKps(): - stats = np.zeros((10,)) - stats[0] = _summarize(1, maxDets=20) - stats[1] = _summarize(1, maxDets=20, iouThr=0.5) - stats[2] = _summarize(1, maxDets=20, iouThr=0.75) - stats[3] = _summarize(1, maxDets=20, areaRng="medium") - stats[4] = _summarize(1, maxDets=20, areaRng="large") - stats[5] = _summarize(0, maxDets=20) - stats[6] = _summarize(0, maxDets=20, iouThr=0.5) - stats[7] = _summarize(0, maxDets=20, iouThr=0.75) - stats[8] = _summarize(0, maxDets=20, areaRng="medium") - stats[9] = _summarize(0, maxDets=20, areaRng="large") - return stats - - def _summarizeUvs(): - stats = [_summarize(1, maxDets=self.params.maxDets[0])] - min_threshold = self.params.iouThrs.min() - if min_threshold <= 0.201: - stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.2)] - if min_threshold <= 0.301: - stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.3)] - if min_threshold <= 0.401: - stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.4)] - stats += [ - _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5), - _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75), - _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium"), - _summarize(1, maxDets=self.params.maxDets[0], areaRng="large"), - _summarize(0, maxDets=self.params.maxDets[0]), - _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5), - _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75), - _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium"), - _summarize(0, maxDets=self.params.maxDets[0], areaRng="large"), - ] - return np.array(stats) - - def _summarizeUvsOld(): - stats = np.zeros((18,)) - stats[0] = _summarize(1, maxDets=self.params.maxDets[0]) - stats[1] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5) - stats[2] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.55) - stats[3] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.60) - stats[4] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.65) - stats[5] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.70) - stats[6] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75) - stats[7] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.80) - stats[8] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.85) - stats[9] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.90) - stats[10] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.95) - stats[11] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium") - stats[12] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="large") - stats[13] = _summarize(0, maxDets=self.params.maxDets[0]) - stats[14] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5) - stats[15] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75) - stats[16] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium") - stats[17] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="large") - return stats - - if not self.eval: - raise Exception("Please run accumulate() first") - iouType = self.params.iouType - if iouType in ["segm", "bbox"]: - summarize = _summarizeDets - elif iouType in ["keypoints"]: - summarize = _summarizeKps - elif iouType in ["densepose"]: - summarize = _summarizeUvs - self.stats = summarize() - - def __str__(self): - self.summarize() - - # ================ functions for dense pose ============================== - def findAllClosestVertsUV(self, U_points, V_points, Index_points): - ClosestVerts = np.ones(Index_points.shape) * -1 - for i in np.arange(24): - # - if (i + 1) in Index_points: - UVs = np.array( - [U_points[Index_points == (i + 1)], V_points[Index_points == (i + 1)]] - ) - Current_Part_UVs = self.Part_UVs[i] - Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i] - D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze() - ClosestVerts[Index_points == (i + 1)] = Current_Part_ClosestVertInds[ - np.argmin(D, axis=0) - ] - ClosestVertsTransformed = self.PDIST_transform[ClosestVerts.astype(int) - 1] - ClosestVertsTransformed[ClosestVerts < 0] = 0 - return ClosestVertsTransformed - - def findClosestVertsCse(self, embedding, py, px, mask, mesh_name): - mesh_vertex_embeddings = self.embedder(mesh_name) - pixel_embeddings = embedding[:, py, px].t().to(device="cuda") - mask_vals = mask[py, px] - edm = squared_euclidean_distance_matrix(pixel_embeddings, mesh_vertex_embeddings) - vertex_indices = edm.argmin(dim=1).cpu() - vertex_indices[mask_vals <= 0] = -1 - return vertex_indices - - def findAllClosestVertsGT(self, gt): - # - I_gt = np.array(gt["dp_I"]) - U_gt = np.array(gt["dp_U"]) - V_gt = np.array(gt["dp_V"]) - # - # print(I_gt) - # - ClosestVertsGT = np.ones(I_gt.shape) * -1 - for i in np.arange(24): - if (i + 1) in I_gt: - UVs = np.array([U_gt[I_gt == (i + 1)], V_gt[I_gt == (i + 1)]]) - Current_Part_UVs = self.Part_UVs[i] - Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i] - D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze() - ClosestVertsGT[I_gt == (i + 1)] = Current_Part_ClosestVertInds[np.argmin(D, axis=0)] - # - ClosestVertsGTTransformed = self.PDIST_transform[ClosestVertsGT.astype(int) - 1] - ClosestVertsGTTransformed[ClosestVertsGT < 0] = 0 - return ClosestVertsGT, ClosestVertsGTTransformed - - def getDistancesCse(self, cVertsGT, cVerts, mesh_name): - geodists_vertices = torch.ones_like(cVertsGT) * float("inf") - selected = (cVertsGT >= 0) * (cVerts >= 0) - mesh = create_mesh(mesh_name, "cpu") - geodists_vertices[selected] = mesh.geodists[cVertsGT[selected], cVerts[selected]] - return geodists_vertices.numpy() - - def getDistancesUV(self, cVertsGT, cVerts): - # - n = 27554 - dists = [] - for d in range(len(cVertsGT)): - if cVertsGT[d] > 0: - if cVerts[d] > 0: - i = cVertsGT[d] - 1 - j = cVerts[d] - 1 - if j == i: - dists.append(0) - elif j > i: - ccc = i - i = j - j = ccc - i = n - i - 1 - j = n - j - 1 - k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1 - k = (n * n - n) / 2 - k - 1 - dists.append(self.Pdist_matrix[int(k)][0]) - else: - i = n - i - 1 - j = n - j - 1 - k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1 - k = (n * n - n) / 2 - k - 1 - dists.append(self.Pdist_matrix[int(k)][0]) - else: - dists.append(np.inf) - return np.atleast_1d(np.array(dists).squeeze()) - - -class Params: - """ - Params for coco evaluation api - """ - - def setDetParams(self): - self.imgIds = [] - self.catIds = [] - # np.arange causes trouble. the data point on arange is slightly larger than the true value - self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) - self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) - self.maxDets = [1, 10, 100] - self.areaRng = [ - [0**2, 1e5**2], - [0**2, 32**2], - [32**2, 96**2], - [96**2, 1e5**2], - ] - self.areaRngLbl = ["all", "small", "medium", "large"] - self.useCats = 1 - - def setKpParams(self): - self.imgIds = [] - self.catIds = [] - # np.arange causes trouble. the data point on arange is slightly larger than the true value - self.iouThrs = np.linspace(0.5, 0.95, np.round((0.95 - 0.5) / 0.05) + 1, endpoint=True) - self.recThrs = np.linspace(0.0, 1.00, np.round((1.00 - 0.0) / 0.01) + 1, endpoint=True) - self.maxDets = [20] - self.areaRng = [[0**2, 1e5**2], [32**2, 96**2], [96**2, 1e5**2]] - self.areaRngLbl = ["all", "medium", "large"] - self.useCats = 1 - - def setUvParams(self): - self.imgIds = [] - self.catIds = [] - self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) - self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) - self.maxDets = [20] - self.areaRng = [[0**2, 1e5**2], [32**2, 96**2], [96**2, 1e5**2]] - self.areaRngLbl = ["all", "medium", "large"] - self.useCats = 1 - - def __init__(self, iouType="segm"): - if iouType == "segm" or iouType == "bbox": - self.setDetParams() - elif iouType == "keypoints": - self.setKpParams() - elif iouType == "densepose": - self.setUvParams() - else: - raise Exception("iouType not supported") - self.iouType = iouType - # useSegm is deprecated - self.useSegm = None diff --git a/detectron2/projects/DensePose/densepose/evaluation/evaluator.py b/detectron2/projects/DensePose/densepose/evaluation/evaluator.py deleted file mode 100644 index 803d3dccbe60a637e349a22e3364f3c0b5f4f1e5..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/evaluation/evaluator.py +++ /dev/null @@ -1,423 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import contextlib -import copy -import io -import itertools -import logging -import numpy as np -import os -from collections import OrderedDict -from typing import Dict, Iterable, List, Optional -import pycocotools.mask as mask_utils -import torch -from pycocotools.coco import COCO -from tabulate import tabulate - -from detectron2.config import CfgNode -from detectron2.data import MetadataCatalog -from detectron2.evaluation import DatasetEvaluator -from detectron2.structures import BoxMode -from detectron2.utils.comm import gather, get_rank, is_main_process, synchronize -from detectron2.utils.file_io import PathManager -from detectron2.utils.logger import create_small_table - -from densepose.converters import ToChartResultConverter, ToMaskConverter -from densepose.data.datasets.coco import maybe_filter_and_map_categories_cocoapi -from densepose.structures import ( - DensePoseChartPredictorOutput, - DensePoseEmbeddingPredictorOutput, - quantize_densepose_chart_result, -) - -from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode -from .mesh_alignment_evaluator import MeshAlignmentEvaluator -from .tensor_storage import ( - SingleProcessFileTensorStorage, - SingleProcessRamTensorStorage, - SingleProcessTensorStorage, - SizeData, - storage_gather, -) - - -class DensePoseCOCOEvaluator(DatasetEvaluator): - def __init__( - self, - dataset_name, - distributed, - output_dir=None, - evaluator_type: str = "iuv", - min_iou_threshold: float = 0.5, - storage: Optional[SingleProcessTensorStorage] = None, - embedder=None, - should_evaluate_mesh_alignment: bool = False, - mesh_alignment_mesh_names: Optional[List[str]] = None, - ): - self._embedder = embedder - self._distributed = distributed - self._output_dir = output_dir - self._evaluator_type = evaluator_type - self._storage = storage - self._should_evaluate_mesh_alignment = should_evaluate_mesh_alignment - - assert not ( - should_evaluate_mesh_alignment and embedder is None - ), "Mesh alignment evaluation is activated, but no vertex embedder provided!" - if should_evaluate_mesh_alignment: - self._mesh_alignment_evaluator = MeshAlignmentEvaluator( - embedder, - mesh_alignment_mesh_names, - ) - - self._cpu_device = torch.device("cpu") - self._logger = logging.getLogger(__name__) - - self._metadata = MetadataCatalog.get(dataset_name) - self._min_threshold = min_iou_threshold - json_file = PathManager.get_local_path(self._metadata.json_file) - with contextlib.redirect_stdout(io.StringIO()): - self._coco_api = COCO(json_file) - maybe_filter_and_map_categories_cocoapi(dataset_name, self._coco_api) - - def reset(self): - self._predictions = [] - - def process(self, inputs, outputs): - """ - Args: - inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). - It is a list of dict. Each dict corresponds to an image and - contains keys like "height", "width", "file_name", "image_id". - outputs: the outputs of a COCO model. It is a list of dicts with key - "instances" that contains :class:`Instances`. - The :class:`Instances` object needs to have `densepose` field. - """ - for input, output in zip(inputs, outputs): - instances = output["instances"].to(self._cpu_device) - if not instances.has("pred_densepose"): - continue - prediction_list = prediction_to_dict( - instances, - input["image_id"], - self._embedder, - self._metadata.class_to_mesh_name, - self._storage is not None, - ) - if self._storage is not None: - for prediction_dict in prediction_list: - dict_to_store = {} - for field_name in self._storage.data_schema: - dict_to_store[field_name] = prediction_dict[field_name] - record_id = self._storage.put(dict_to_store) - prediction_dict["record_id"] = record_id - prediction_dict["rank"] = get_rank() - for field_name in self._storage.data_schema: - del prediction_dict[field_name] - self._predictions.extend(prediction_list) - - def evaluate(self, img_ids=None): - if self._distributed: - synchronize() - predictions = gather(self._predictions) - predictions = list(itertools.chain(*predictions)) - else: - predictions = self._predictions - - multi_storage = storage_gather(self._storage) if self._storage is not None else None - - if not is_main_process(): - return - return copy.deepcopy(self._eval_predictions(predictions, multi_storage, img_ids)) - - def _eval_predictions(self, predictions, multi_storage=None, img_ids=None): - """ - Evaluate predictions on densepose. - Return results with the metrics of the tasks. - """ - self._logger.info("Preparing results for COCO format ...") - - if self._output_dir: - PathManager.mkdirs(self._output_dir) - file_path = os.path.join(self._output_dir, "coco_densepose_predictions.pth") - with PathManager.open(file_path, "wb") as f: - torch.save(predictions, f) - - self._logger.info("Evaluating predictions ...") - res = OrderedDict() - results_gps, results_gpsm, results_segm = _evaluate_predictions_on_coco( - self._coco_api, - predictions, - multi_storage, - self._embedder, - class_names=self._metadata.get("thing_classes"), - min_threshold=self._min_threshold, - img_ids=img_ids, - ) - res["densepose_gps"] = results_gps - res["densepose_gpsm"] = results_gpsm - res["densepose_segm"] = results_segm - if self._should_evaluate_mesh_alignment: - res["densepose_mesh_alignment"] = self._evaluate_mesh_alignment() - return res - - def _evaluate_mesh_alignment(self): - self._logger.info("Mesh alignment evaluation ...") - mean_ge, mean_gps, per_mesh_metrics = self._mesh_alignment_evaluator.evaluate() - results = { - "GE": mean_ge * 100, - "GPS": mean_gps * 100, - } - mesh_names = set() - for metric_name in per_mesh_metrics: - for mesh_name, value in per_mesh_metrics[metric_name].items(): - results[f"{metric_name}-{mesh_name}"] = value * 100 - mesh_names.add(mesh_name) - self._print_mesh_alignment_results(results, mesh_names) - return results - - def _print_mesh_alignment_results(self, results: Dict[str, float], mesh_names: Iterable[str]): - self._logger.info("Evaluation results for densepose, mesh alignment:") - self._logger.info(f'| {"Mesh":13s} | {"GErr":7s} | {"GPS":7s} |') - self._logger.info("| :-----------: | :-----: | :-----: |") - for mesh_name in mesh_names: - ge_key = f"GE-{mesh_name}" - ge_str = f"{results[ge_key]:.4f}" if ge_key in results else " " - gps_key = f"GPS-{mesh_name}" - gps_str = f"{results[gps_key]:.4f}" if gps_key in results else " " - self._logger.info(f"| {mesh_name:13s} | {ge_str:7s} | {gps_str:7s} |") - self._logger.info("| :-------------------------------: |") - ge_key = "GE" - ge_str = f"{results[ge_key]:.4f}" if ge_key in results else " " - gps_key = "GPS" - gps_str = f"{results[gps_key]:.4f}" if gps_key in results else " " - self._logger.info(f'| {"MEAN":13s} | {ge_str:7s} | {gps_str:7s} |') - - -def prediction_to_dict(instances, img_id, embedder, class_to_mesh_name, use_storage): - """ - Args: - instances (Instances): the output of the model - img_id (str): the image id in COCO - - Returns: - list[dict]: the results in densepose evaluation format - """ - scores = instances.scores.tolist() - classes = instances.pred_classes.tolist() - raw_boxes_xywh = BoxMode.convert( - instances.pred_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS - ) - - if isinstance(instances.pred_densepose, DensePoseEmbeddingPredictorOutput): - results_densepose = densepose_cse_predictions_to_dict( - instances, embedder, class_to_mesh_name, use_storage - ) - elif isinstance(instances.pred_densepose, DensePoseChartPredictorOutput): - if not use_storage: - results_densepose = densepose_chart_predictions_to_dict(instances) - else: - results_densepose = densepose_chart_predictions_to_storage_dict(instances) - - results = [] - for k in range(len(instances)): - result = { - "image_id": img_id, - "category_id": classes[k], - "bbox": raw_boxes_xywh[k].tolist(), - "score": scores[k], - } - results.append({**result, **results_densepose[k]}) - return results - - -def densepose_chart_predictions_to_dict(instances): - segmentations = ToMaskConverter.convert( - instances.pred_densepose, instances.pred_boxes, instances.image_size - ) - - results = [] - for k in range(len(instances)): - densepose_results_quantized = quantize_densepose_chart_result( - ToChartResultConverter.convert(instances.pred_densepose[k], instances.pred_boxes[k]) - ) - densepose_results_quantized.labels_uv_uint8 = ( - densepose_results_quantized.labels_uv_uint8.cpu() - ) - segmentation = segmentations.tensor[k] - segmentation_encoded = mask_utils.encode( - np.require(segmentation.numpy(), dtype=np.uint8, requirements=["F"]) - ) - segmentation_encoded["counts"] = segmentation_encoded["counts"].decode("utf-8") - result = { - "densepose": densepose_results_quantized, - "segmentation": segmentation_encoded, - } - results.append(result) - return results - - -def densepose_chart_predictions_to_storage_dict(instances): - results = [] - for k in range(len(instances)): - densepose_predictor_output = instances.pred_densepose[k] - result = { - "coarse_segm": densepose_predictor_output.coarse_segm.squeeze(0).cpu(), - "fine_segm": densepose_predictor_output.fine_segm.squeeze(0).cpu(), - "u": densepose_predictor_output.u.squeeze(0).cpu(), - "v": densepose_predictor_output.v.squeeze(0).cpu(), - } - results.append(result) - return results - - -def densepose_cse_predictions_to_dict(instances, embedder, class_to_mesh_name, use_storage): - results = [] - for k in range(len(instances)): - cse = instances.pred_densepose[k] - results.append( - { - "coarse_segm": cse.coarse_segm[0].cpu(), - "embedding": cse.embedding[0].cpu(), - } - ) - return results - - -def _evaluate_predictions_on_coco( - coco_gt, - coco_results, - multi_storage=None, - embedder=None, - class_names=None, - min_threshold: float = 0.5, - img_ids=None, -): - logger = logging.getLogger(__name__) - - densepose_metrics = _get_densepose_metrics(min_threshold) - if len(coco_results) == 0: # cocoapi does not handle empty results very well - logger.warn("No predictions from the model! Set scores to -1") - results_gps = {metric: -1 for metric in densepose_metrics} - results_gpsm = {metric: -1 for metric in densepose_metrics} - results_segm = {metric: -1 for metric in densepose_metrics} - return results_gps, results_gpsm, results_segm - - coco_dt = coco_gt.loadRes(coco_results) - - results = [] - for eval_mode_name in ["GPS", "GPSM", "IOU"]: - eval_mode = getattr(DensePoseEvalMode, eval_mode_name) - coco_eval = DensePoseCocoEval( - coco_gt, coco_dt, "densepose", multi_storage, embedder, dpEvalMode=eval_mode - ) - result = _derive_results_from_coco_eval( - coco_eval, eval_mode_name, densepose_metrics, class_names, min_threshold, img_ids - ) - results.append(result) - return results - - -def _get_densepose_metrics(min_threshold: float = 0.5): - metrics = ["AP"] - if min_threshold <= 0.201: - metrics += ["AP20"] - if min_threshold <= 0.301: - metrics += ["AP30"] - if min_threshold <= 0.401: - metrics += ["AP40"] - metrics.extend(["AP50", "AP75", "APm", "APl", "AR", "AR50", "AR75", "ARm", "ARl"]) - return metrics - - -def _derive_results_from_coco_eval( - coco_eval, eval_mode_name, metrics, class_names, min_threshold: float, img_ids -): - if img_ids is not None: - coco_eval.params.imgIds = img_ids - coco_eval.params.iouThrs = np.linspace( - min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True - ) - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)} - logger = logging.getLogger(__name__) - logger.info( - f"Evaluation results for densepose, {eval_mode_name} metric: \n" - + create_small_table(results) - ) - if class_names is None or len(class_names) <= 1: - return results - - # Compute per-category AP, the same way as it is done in D2 - # (see detectron2/evaluation/coco_evaluation.py): - precisions = coco_eval.eval["precision"] - # precision has dims (iou, recall, cls, area range, max dets) - assert len(class_names) == precisions.shape[2] - - results_per_category = [] - for idx, name in enumerate(class_names): - # area range index 0: all area ranges - # max dets index -1: typically 100 per image - precision = precisions[:, :, idx, 0, -1] - precision = precision[precision > -1] - ap = np.mean(precision) if precision.size else float("nan") - results_per_category.append((f"{name}", float(ap * 100))) - - # tabulate it - n_cols = min(6, len(results_per_category) * 2) - results_flatten = list(itertools.chain(*results_per_category)) - results_2d = itertools.zip_longest(*[results_flatten[i::n_cols] for i in range(n_cols)]) - table = tabulate( - results_2d, - tablefmt="pipe", - floatfmt=".3f", - headers=["category", "AP"] * (n_cols // 2), - numalign="left", - ) - logger.info(f"Per-category {eval_mode_name} AP: \n" + table) - - results.update({"AP-" + name: ap for name, ap in results_per_category}) - return results - - -def build_densepose_evaluator_storage(cfg: CfgNode, output_folder: str): - storage_spec = cfg.DENSEPOSE_EVALUATION.STORAGE - if storage_spec == "none": - return None - evaluator_type = cfg.DENSEPOSE_EVALUATION.TYPE - # common output tensor sizes - hout = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE - wout = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE - n_csc = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS - # specific output tensors - if evaluator_type == "iuv": - n_fsc = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1 - schema = { - "coarse_segm": SizeData(dtype="float32", shape=(n_csc, hout, wout)), - "fine_segm": SizeData(dtype="float32", shape=(n_fsc, hout, wout)), - "u": SizeData(dtype="float32", shape=(n_fsc, hout, wout)), - "v": SizeData(dtype="float32", shape=(n_fsc, hout, wout)), - } - elif evaluator_type == "cse": - embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE - schema = { - "coarse_segm": SizeData(dtype="float32", shape=(n_csc, hout, wout)), - "embedding": SizeData(dtype="float32", shape=(embed_size, hout, wout)), - } - else: - raise ValueError(f"Unknown evaluator type: {evaluator_type}") - # storage types - if storage_spec == "ram": - storage = SingleProcessRamTensorStorage(schema, io.BytesIO()) - elif storage_spec == "file": - fpath = os.path.join(output_folder, f"DensePoseEvaluatorStorage.{get_rank()}.bin") - PathManager.mkdirs(output_folder) - storage = SingleProcessFileTensorStorage(schema, fpath, "wb") - else: - raise ValueError(f"Unknown storage specification: {storage_spec}") - return storage diff --git a/detectron2/projects/DensePose/densepose/evaluation/mesh_alignment_evaluator.py b/detectron2/projects/DensePose/densepose/evaluation/mesh_alignment_evaluator.py deleted file mode 100644 index f6c76f3cf2d54250f7fa1d9a2a3a1d2c60eb0aad..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/evaluation/mesh_alignment_evaluator.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -import json -import logging -from typing import List, Optional -import torch -from torch import nn - -from detectron2.utils.file_io import PathManager - -from densepose.structures.mesh import create_mesh - - -class MeshAlignmentEvaluator: - """ - Class for evaluation of 3D mesh alignment based on the learned vertex embeddings - """ - - def __init__(self, embedder: nn.Module, mesh_names: Optional[List[str]]): - self.embedder = embedder - # use the provided mesh names if not None and not an empty list - self.mesh_names = mesh_names if mesh_names else embedder.mesh_names - self.logger = logging.getLogger(__name__) - with PathManager.open( - "https://dl.fbaipublicfiles.com/densepose/data/cse/mesh_keyvertices_v0.json", "r" - ) as f: - self.mesh_keyvertices = json.load(f) - - def evaluate(self): - ge_per_mesh = {} - gps_per_mesh = {} - for mesh_name_1 in self.mesh_names: - avg_errors = [] - avg_gps = [] - embeddings_1 = self.embedder(mesh_name_1) - keyvertices_1 = self.mesh_keyvertices[mesh_name_1] - keyvertex_names_1 = list(keyvertices_1.keys()) - keyvertex_indices_1 = [keyvertices_1[name] for name in keyvertex_names_1] - for mesh_name_2 in self.mesh_names: - if mesh_name_1 == mesh_name_2: - continue - embeddings_2 = self.embedder(mesh_name_2) - keyvertices_2 = self.mesh_keyvertices[mesh_name_2] - sim_matrix_12 = embeddings_1[keyvertex_indices_1].mm(embeddings_2.T) - vertices_2_matching_keyvertices_1 = sim_matrix_12.argmax(axis=1) - mesh_2 = create_mesh(mesh_name_2, embeddings_2.device) - geodists = mesh_2.geodists[ - vertices_2_matching_keyvertices_1, - [keyvertices_2[name] for name in keyvertex_names_1], - ] - Current_Mean_Distances = 0.255 - gps = (-(geodists**2) / (2 * (Current_Mean_Distances**2))).exp() - avg_errors.append(geodists.mean().item()) - avg_gps.append(gps.mean().item()) - - ge_mean = torch.as_tensor(avg_errors).mean().item() - gps_mean = torch.as_tensor(avg_gps).mean().item() - ge_per_mesh[mesh_name_1] = ge_mean - gps_per_mesh[mesh_name_1] = gps_mean - ge_mean_global = torch.as_tensor(list(ge_per_mesh.values())).mean().item() - gps_mean_global = torch.as_tensor(list(gps_per_mesh.values())).mean().item() - per_mesh_metrics = { - "GE": ge_per_mesh, - "GPS": gps_per_mesh, - } - return ge_mean_global, gps_mean_global, per_mesh_metrics diff --git a/detectron2/projects/DensePose/densepose/evaluation/tensor_storage.py b/detectron2/projects/DensePose/densepose/evaluation/tensor_storage.py deleted file mode 100644 index 369a29470807e60be377516f7910a9f95ab0a47d..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/evaluation/tensor_storage.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import io -import numpy as np -import os -from dataclasses import dataclass -from functools import reduce -from operator import mul -from typing import BinaryIO, Dict, Optional, Tuple -import torch - -from detectron2.utils.comm import gather, get_rank -from detectron2.utils.file_io import PathManager - - -@dataclass -class SizeData: - dtype: str - shape: Tuple[int] - - -def _calculate_record_field_size_b(data_schema: Dict[str, SizeData], field_name: str) -> int: - schema = data_schema[field_name] - element_size_b = np.dtype(schema.dtype).itemsize - record_field_size_b = reduce(mul, schema.shape) * element_size_b - return record_field_size_b - - -def _calculate_record_size_b(data_schema: Dict[str, SizeData]) -> int: - record_size_b = 0 - for field_name in data_schema: - record_field_size_b = _calculate_record_field_size_b(data_schema, field_name) - record_size_b += record_field_size_b - return record_size_b - - -def _calculate_record_field_sizes_b(data_schema: Dict[str, SizeData]) -> Dict[str, int]: - field_sizes_b = {} - for field_name in data_schema: - field_sizes_b[field_name] = _calculate_record_field_size_b(data_schema, field_name) - return field_sizes_b - - -class SingleProcessTensorStorage: - """ - Compact tensor storage to keep tensor data of predefined size and type. - """ - - def __init__(self, data_schema: Dict[str, SizeData], storage_impl: BinaryIO): - """ - Construct tensor storage based on information on data shape and size. - Internally uses numpy to interpret the type specification. - The storage must support operations `seek(offset, whence=os.SEEK_SET)` and - `read(size)` to be able to perform the `get` operation. - The storage must support operation `write(bytes)` to be able to perform - the `put` operation. - - Args: - data_schema (dict: str -> SizeData): dictionary which maps tensor name - to its size data (shape and data type), e.g. - ``` - { - "coarse_segm": SizeData(dtype="float32", shape=(112, 112)), - "embedding": SizeData(dtype="float32", shape=(16, 112, 112)), - } - ``` - storage_impl (BinaryIO): io instance that handles file-like seek, read - and write operations, e.g. a file handle or a memory buffer like io.BytesIO - """ - self.data_schema = data_schema - self.record_size_b = _calculate_record_size_b(data_schema) - self.record_field_sizes_b = _calculate_record_field_sizes_b(data_schema) - self.storage_impl = storage_impl - self.next_record_id = 0 - - def get(self, record_id: int) -> Dict[str, torch.Tensor]: - """ - Load tensors from the storage by record ID - - Args: - record_id (int): Record ID, for which to load the data - - Return: - dict: str -> tensor: tensor name mapped to tensor data, recorded under the provided ID - """ - self.storage_impl.seek(record_id * self.record_size_b, os.SEEK_SET) - data_bytes = self.storage_impl.read(self.record_size_b) - assert len(data_bytes) == self.record_size_b, ( - f"Expected data size {self.record_size_b} B could not be read: " - f"got {len(data_bytes)} B" - ) - record = {} - cur_idx = 0 - # it's important to read and write in the same order - for field_name in sorted(self.data_schema): - schema = self.data_schema[field_name] - field_size_b = self.record_field_sizes_b[field_name] - chunk = data_bytes[cur_idx : cur_idx + field_size_b] - data_np = np.frombuffer( - chunk, dtype=schema.dtype, count=reduce(mul, schema.shape) - ).reshape(schema.shape) - record[field_name] = torch.from_numpy(data_np) - cur_idx += field_size_b - return record - - def put(self, data: Dict[str, torch.Tensor]) -> int: - """ - Store tensors in the storage - - Args: - data (dict: str -> tensor): data to store, a dictionary which maps - tensor names into tensors; tensor shapes must match those specified - in data schema. - Return: - int: record ID, under which the data is stored - """ - # it's important to read and write in the same order - for field_name in sorted(self.data_schema): - assert ( - field_name in data - ), f"Field '{field_name}' not present in data: data keys are {data.keys()}" - value = data[field_name] - assert value.shape == self.data_schema[field_name].shape, ( - f"Mismatched tensor shapes for field '{field_name}': " - f"expected {self.data_schema[field_name].shape}, got {value.shape}" - ) - data_bytes = value.cpu().numpy().tobytes() - assert len(data_bytes) == self.record_field_sizes_b[field_name], ( - f"Expected field {field_name} to be of size " - f"{self.record_field_sizes_b[field_name]} B, got {len(data_bytes)} B" - ) - self.storage_impl.write(data_bytes) - record_id = self.next_record_id - self.next_record_id += 1 - return record_id - - -class SingleProcessFileTensorStorage(SingleProcessTensorStorage): - """ - Implementation of a single process tensor storage which stores data in a file - """ - - def __init__(self, data_schema: Dict[str, SizeData], fpath: str, mode: str): - self.fpath = fpath - assert "b" in mode, f"Tensor storage should be opened in binary mode, got '{mode}'" - if "w" in mode: - # pyre-fixme[6]: For 2nd argument expected `Union[typing_extensions.Liter... - file_h = PathManager.open(fpath, mode) - elif "r" in mode: - local_fpath = PathManager.get_local_path(fpath) - file_h = open(local_fpath, mode) - else: - raise ValueError(f"Unsupported file mode {mode}, supported modes: rb, wb") - super().__init__(data_schema, file_h) # pyre-ignore[6] - - -class SingleProcessRamTensorStorage(SingleProcessTensorStorage): - """ - Implementation of a single process tensor storage which stores data in RAM - """ - - def __init__(self, data_schema: Dict[str, SizeData], buf: io.BytesIO): - super().__init__(data_schema, buf) - - -class MultiProcessTensorStorage: - """ - Representation of a set of tensor storages created by individual processes, - allows to access those storages from a single owner process. The storages - should either be shared or broadcasted to the owner process. - The processes are identified by their rank, data is uniquely defined by - the rank of the process and the record ID. - """ - - def __init__(self, rank_to_storage: Dict[int, SingleProcessTensorStorage]): - self.rank_to_storage = rank_to_storage - - def get(self, rank: int, record_id: int) -> Dict[str, torch.Tensor]: - storage = self.rank_to_storage[rank] - return storage.get(record_id) - - def put(self, rank: int, data: Dict[str, torch.Tensor]) -> int: - storage = self.rank_to_storage[rank] - return storage.put(data) - - -class MultiProcessFileTensorStorage(MultiProcessTensorStorage): - def __init__(self, data_schema: Dict[str, SizeData], rank_to_fpath: Dict[int, str], mode: str): - rank_to_storage = { - rank: SingleProcessFileTensorStorage(data_schema, fpath, mode) - for rank, fpath in rank_to_fpath.items() - } - super().__init__(rank_to_storage) # pyre-ignore[6] - - -class MultiProcessRamTensorStorage(MultiProcessTensorStorage): - def __init__(self, data_schema: Dict[str, SizeData], rank_to_buffer: Dict[int, io.BytesIO]): - rank_to_storage = { - rank: SingleProcessRamTensorStorage(data_schema, buf) - for rank, buf in rank_to_buffer.items() - } - super().__init__(rank_to_storage) # pyre-ignore[6] - - -def _ram_storage_gather( - storage: SingleProcessRamTensorStorage, dst_rank: int = 0 -) -> Optional[MultiProcessRamTensorStorage]: - storage.storage_impl.seek(0, os.SEEK_SET) - # TODO: overhead, pickling a bytes object, can just pass bytes in a tensor directly - # see detectron2/utils.comm.py - data_list = gather(storage.storage_impl.read(), dst=dst_rank) - if get_rank() != dst_rank: - return None - rank_to_buffer = {i: io.BytesIO(data_list[i]) for i in range(len(data_list))} - multiprocess_storage = MultiProcessRamTensorStorage(storage.data_schema, rank_to_buffer) - return multiprocess_storage - - -def _file_storage_gather( - storage: SingleProcessFileTensorStorage, - dst_rank: int = 0, - mode: str = "rb", -) -> Optional[MultiProcessFileTensorStorage]: - storage.storage_impl.close() - fpath_list = gather(storage.fpath, dst=dst_rank) - if get_rank() != dst_rank: - return None - rank_to_fpath = {i: fpath_list[i] for i in range(len(fpath_list))} - return MultiProcessFileTensorStorage(storage.data_schema, rank_to_fpath, mode) - - -def storage_gather( - storage: SingleProcessTensorStorage, dst_rank: int = 0 -) -> Optional[MultiProcessTensorStorage]: - if isinstance(storage, SingleProcessRamTensorStorage): - return _ram_storage_gather(storage, dst_rank) - elif isinstance(storage, SingleProcessFileTensorStorage): - return _file_storage_gather(storage, dst_rank) - raise Exception(f"Unsupported storage for gather operation: {storage}") diff --git a/detectron2/projects/DensePose/densepose/modeling/__init__.py b/detectron2/projects/DensePose/densepose/modeling/__init__.py deleted file mode 100644 index 5c5b48b1fc6100dd531f7b61467876e222e40bdd..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType -from .filter import DensePoseDataFilter -from .inference import densepose_inference -from .utils import initialize_module_params -from .build import ( - build_densepose_data_filter, - build_densepose_embedder, - build_densepose_head, - build_densepose_losses, - build_densepose_predictor, -) diff --git a/detectron2/projects/DensePose/densepose/modeling/build.py b/detectron2/projects/DensePose/densepose/modeling/build.py deleted file mode 100644 index 82e40d9284eeb9c90bf5e2ac13a95f587c76a595..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/build.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Optional -from torch import nn - -from detectron2.config import CfgNode - -from .cse.embedder import Embedder -from .filter import DensePoseDataFilter - - -def build_densepose_predictor(cfg: CfgNode, input_channels: int): - """ - Create an instance of DensePose predictor based on configuration options. - - Args: - cfg (CfgNode): configuration options - input_channels (int): input tensor size along the channel dimension - Return: - An instance of DensePose predictor - """ - from .predictors import DENSEPOSE_PREDICTOR_REGISTRY - - predictor_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME - return DENSEPOSE_PREDICTOR_REGISTRY.get(predictor_name)(cfg, input_channels) - - -def build_densepose_data_filter(cfg: CfgNode): - """ - Build DensePose data filter which selects data for training - - Args: - cfg (CfgNode): configuration options - - Return: - Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances) - An instance of DensePose filter, which takes feature tensors and proposals - as an input and returns filtered features and proposals - """ - dp_filter = DensePoseDataFilter(cfg) - return dp_filter - - -def build_densepose_head(cfg: CfgNode, input_channels: int): - """ - Build DensePose head based on configurations options - - Args: - cfg (CfgNode): configuration options - input_channels (int): input tensor size along the channel dimension - Return: - An instance of DensePose head - """ - from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY - - head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME - return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels) - - -def build_densepose_losses(cfg: CfgNode): - """ - Build DensePose loss based on configurations options - - Args: - cfg (CfgNode): configuration options - Return: - An instance of DensePose loss - """ - from .losses import DENSEPOSE_LOSS_REGISTRY - - loss_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME - return DENSEPOSE_LOSS_REGISTRY.get(loss_name)(cfg) - - -def build_densepose_embedder(cfg: CfgNode) -> Optional[nn.Module]: - """ - Build embedder used to embed mesh vertices into an embedding space. - Embedder contains sub-embedders, one for each mesh ID. - - Args: - cfg (cfgNode): configuration options - Return: - Embedding module - """ - if cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS: - return Embedder(cfg) - return None diff --git a/detectron2/projects/DensePose/densepose/modeling/confidence.py b/detectron2/projects/DensePose/densepose/modeling/confidence.py deleted file mode 100644 index 364e389078e78935da9e432bc04b5530d2d9963f..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/confidence.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from dataclasses import dataclass -from enum import Enum - -from detectron2.config import CfgNode - - -class DensePoseUVConfidenceType(Enum): - """ - Statistical model type for confidence learning, possible values: - - "iid_iso": statistically independent identically distributed residuals - with anisotropic covariance - - "indep_aniso": statistically independent residuals with anisotropic - covariances - For details, see: - N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning - Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019 - """ - - # fmt: off - IID_ISO = "iid_iso" - INDEP_ANISO = "indep_aniso" - # fmt: on - - -@dataclass -class DensePoseUVConfidenceConfig: - """ - Configuration options for confidence on UV data - """ - - enabled: bool = False - # lower bound on UV confidences - epsilon: float = 0.01 - type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO - - -@dataclass -class DensePoseSegmConfidenceConfig: - """ - Configuration options for confidence on segmentation - """ - - enabled: bool = False - # lower bound on confidence values - epsilon: float = 0.01 - - -@dataclass -class DensePoseConfidenceModelConfig: - """ - Configuration options for confidence models - """ - - # confidence for U and V values - uv_confidence: DensePoseUVConfidenceConfig - # segmentation confidence - segm_confidence: DensePoseSegmConfidenceConfig - - @staticmethod - def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig": - return DensePoseConfidenceModelConfig( - uv_confidence=DensePoseUVConfidenceConfig( - enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED, - epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON, - type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE), - ), - segm_confidence=DensePoseSegmConfidenceConfig( - enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED, - epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON, - ), - ) diff --git a/detectron2/projects/DensePose/densepose/modeling/cse/__init__.py b/detectron2/projects/DensePose/densepose/modeling/cse/__init__.py deleted file mode 100644 index 80248c94c5cc23f1503a6338af225f63bc8cec42..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/cse/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from .vertex_direct_embedder import VertexDirectEmbedder -from .vertex_feature_embedder import VertexFeatureEmbedder -from .embedder import Embedder diff --git a/detectron2/projects/DensePose/densepose/modeling/cse/embedder.py b/detectron2/projects/DensePose/densepose/modeling/cse/embedder.py deleted file mode 100644 index 69082294acee57517b4b4ab8c11814b7c99e5232..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/cse/embedder.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -import logging -import numpy as np -import pickle -from enum import Enum -from typing import Optional -import torch -from torch import nn - -from detectron2.config import CfgNode -from detectron2.utils.file_io import PathManager - -from .vertex_direct_embedder import VertexDirectEmbedder -from .vertex_feature_embedder import VertexFeatureEmbedder - - -class EmbedderType(Enum): - """ - Embedder type which defines how vertices are mapped into the embedding space: - - "vertex_direct": direct vertex embedding - - "vertex_feature": embedding vertex features - """ - - VERTEX_DIRECT = "vertex_direct" - VERTEX_FEATURE = "vertex_feature" - - -def create_embedder(embedder_spec: CfgNode, embedder_dim: int) -> nn.Module: - """ - Create an embedder based on the provided configuration - - Args: - embedder_spec (CfgNode): embedder configuration - embedder_dim (int): embedding space dimensionality - Return: - An embedder instance for the specified configuration - Raises ValueError, in case of unexpected embedder type - """ - embedder_type = EmbedderType(embedder_spec.TYPE) - if embedder_type == EmbedderType.VERTEX_DIRECT: - embedder = VertexDirectEmbedder( - num_vertices=embedder_spec.NUM_VERTICES, - embed_dim=embedder_dim, - ) - if embedder_spec.INIT_FILE != "": - embedder.load(embedder_spec.INIT_FILE) - elif embedder_type == EmbedderType.VERTEX_FEATURE: - embedder = VertexFeatureEmbedder( - num_vertices=embedder_spec.NUM_VERTICES, - feature_dim=embedder_spec.FEATURE_DIM, - embed_dim=embedder_dim, - train_features=embedder_spec.FEATURES_TRAINABLE, - ) - if embedder_spec.INIT_FILE != "": - embedder.load(embedder_spec.INIT_FILE) - else: - raise ValueError(f"Unexpected embedder type {embedder_type}") - - if not embedder_spec.IS_TRAINABLE: - embedder.requires_grad_(False) - - return embedder - - -class Embedder(nn.Module): - """ - Embedder module that serves as a container for embedders to use with different - meshes. Extends Module to automatically save / load state dict. - """ - - DEFAULT_MODEL_CHECKPOINT_PREFIX = "roi_heads.embedder." - - def __init__(self, cfg: CfgNode): - """ - Initialize mesh embedders. An embedder for mesh `i` is stored in a submodule - "embedder_{i}". - - Args: - cfg (CfgNode): configuration options - """ - super(Embedder, self).__init__() - self.mesh_names = set() - embedder_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE - logger = logging.getLogger(__name__) - for mesh_name, embedder_spec in cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.items(): - logger.info(f"Adding embedder embedder_{mesh_name} with spec {embedder_spec}") - self.add_module(f"embedder_{mesh_name}", create_embedder(embedder_spec, embedder_dim)) - self.mesh_names.add(mesh_name) - if cfg.MODEL.WEIGHTS != "": - self.load_from_model_checkpoint(cfg.MODEL.WEIGHTS) - - def load_from_model_checkpoint(self, fpath: str, prefix: Optional[str] = None): - if prefix is None: - prefix = Embedder.DEFAULT_MODEL_CHECKPOINT_PREFIX - state_dict = None - if fpath.endswith(".pkl"): - with PathManager.open(fpath, "rb") as hFile: - state_dict = pickle.load(hFile, encoding="latin1") - else: - with PathManager.open(fpath, "rb") as hFile: - state_dict = torch.load(hFile, map_location=torch.device("cpu")) - if state_dict is not None and "model" in state_dict: - state_dict_local = {} - for key in state_dict["model"]: - if key.startswith(prefix): - v_key = state_dict["model"][key] - if isinstance(v_key, np.ndarray): - v_key = torch.from_numpy(v_key) - state_dict_local[key[len(prefix) :]] = v_key - # non-strict loading to finetune on different meshes - self.load_state_dict(state_dict_local, strict=False) - - def forward(self, mesh_name: str) -> torch.Tensor: - """ - Produce vertex embeddings for the specific mesh; vertex embeddings are - a tensor of shape [N, D] where: - N = number of vertices - D = number of dimensions in the embedding space - Args: - mesh_name (str): name of a mesh for which to obtain vertex embeddings - Return: - Vertex embeddings, a tensor of shape [N, D] - """ - return getattr(self, f"embedder_{mesh_name}")() - - def has_embeddings(self, mesh_name: str) -> bool: - return hasattr(self, f"embedder_{mesh_name}") diff --git a/detectron2/projects/DensePose/densepose/modeling/cse/utils.py b/detectron2/projects/DensePose/densepose/modeling/cse/utils.py deleted file mode 100644 index bb83b1af580ef76d8eddb03980fa14fe97298965..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/cse/utils.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -import torch -from torch.nn import functional as F - - -def squared_euclidean_distance_matrix(pts1: torch.Tensor, pts2: torch.Tensor) -> torch.Tensor: - """ - Get squared Euclidean Distance Matrix - Computes pairwise squared Euclidean distances between points - - Args: - pts1: Tensor [M x D], M is the number of points, D is feature dimensionality - pts2: Tensor [N x D], N is the number of points, D is feature dimensionality - - Return: - Tensor [M, N]: matrix of squared Euclidean distances; at index (m, n) - it contains || pts1[m] - pts2[n] ||^2 - """ - edm = torch.mm(-2 * pts1, pts2.t()) - edm += (pts1 * pts1).sum(1, keepdim=True) + (pts2 * pts2).sum(1, keepdim=True).t() - return edm.contiguous() - - -def normalize_embeddings(embeddings: torch.Tensor, epsilon: float = 1e-6) -> torch.Tensor: - """ - Normalize N D-dimensional embedding vectors arranged in a tensor [N, D] - - Args: - embeddings (tensor [N, D]): N D-dimensional embedding vectors - epsilon (float): minimum value for a vector norm - Return: - Normalized embeddings (tensor [N, D]), such that L2 vector norms are all equal to 1. - """ - return embeddings / torch.clamp(embeddings.norm(p=None, dim=1, keepdim=True), min=epsilon) - - -def get_closest_vertices_mask_from_ES( - E: torch.Tensor, - S: torch.Tensor, - h: int, - w: int, - mesh_vertex_embeddings: torch.Tensor, - device: torch.device, -): - """ - Interpolate Embeddings and Segmentations to the size of a given bounding box, - and compute closest vertices and the segmentation mask - - Args: - E (tensor [1, D, H, W]): D-dimensional embedding vectors for every point of the - default-sized box - S (tensor [1, 2, H, W]): 2-dimensional segmentation mask for every point of the - default-sized box - h (int): height of the target bounding box - w (int): width of the target bounding box - mesh_vertex_embeddings (tensor [N, D]): vertex embeddings for a chosen mesh - N is the number of vertices in the mesh, D is feature dimensionality - device (torch.device): device to move the tensors to - Return: - Closest Vertices (tensor [h, w]), int, for every point of the resulting box - Segmentation mask (tensor [h, w]), boolean, for every point of the resulting box - """ - embedding_resized = F.interpolate(E, size=(h, w), mode="bilinear")[0].to(device) - coarse_segm_resized = F.interpolate(S, size=(h, w), mode="bilinear")[0].to(device) - mask = coarse_segm_resized.argmax(0) > 0 - closest_vertices = torch.zeros(mask.shape, dtype=torch.long, device=device) - all_embeddings = embedding_resized[:, mask].t() - size_chunk = 10_000 # Chunking to avoid possible OOM - edm = [] - if len(all_embeddings) == 0: - return closest_vertices, mask - for chunk in range((len(all_embeddings) - 1) // size_chunk + 1): - chunk_embeddings = all_embeddings[size_chunk * chunk : size_chunk * (chunk + 1)] - edm.append( - torch.argmin( - squared_euclidean_distance_matrix(chunk_embeddings, mesh_vertex_embeddings), dim=1 - ) - ) - closest_vertices[mask] = torch.cat(edm) - return closest_vertices, mask diff --git a/detectron2/projects/DensePose/densepose/modeling/cse/vertex_direct_embedder.py b/detectron2/projects/DensePose/densepose/modeling/cse/vertex_direct_embedder.py deleted file mode 100644 index 32d92e7786336da0ed9582793620c33a3853195e..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/cse/vertex_direct_embedder.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -import pickle -import torch -from torch import nn - -from detectron2.utils.file_io import PathManager - -from .utils import normalize_embeddings - - -class VertexDirectEmbedder(nn.Module): - """ - Class responsible for embedding vertices. Vertex embeddings take - the form of a tensor of size [N, D], where - N = number of vertices - D = number of dimensions in the embedding space - """ - - def __init__(self, num_vertices: int, embed_dim: int): - """ - Initialize embedder, set random embeddings - - Args: - num_vertices (int): number of vertices to embed - embed_dim (int): number of dimensions in the embedding space - """ - super(VertexDirectEmbedder, self).__init__() - self.embeddings = nn.Parameter(torch.Tensor(num_vertices, embed_dim)) - self.reset_parameters() - - @torch.no_grad() - def reset_parameters(self): - """ - Reset embeddings to random values - """ - self.embeddings.zero_() - - def forward(self) -> torch.Tensor: - """ - Produce vertex embeddings, a tensor of shape [N, D] where: - N = number of vertices - D = number of dimensions in the embedding space - - Return: - Full vertex embeddings, a tensor of shape [N, D] - """ - return normalize_embeddings(self.embeddings) - - @torch.no_grad() - def load(self, fpath: str): - """ - Load data from a file - - Args: - fpath (str): file path to load data from - """ - with PathManager.open(fpath, "rb") as hFile: - data = pickle.load(hFile) - for name in ["embeddings"]: - if name in data: - getattr(self, name).copy_( - torch.tensor(data[name]).float().to(device=getattr(self, name).device) - ) diff --git a/detectron2/projects/DensePose/densepose/modeling/cse/vertex_feature_embedder.py b/detectron2/projects/DensePose/densepose/modeling/cse/vertex_feature_embedder.py deleted file mode 100644 index cb495f88bc5a205e3639d797910c899d6344cca5..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/cse/vertex_feature_embedder.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -import pickle -import torch -from torch import nn - -from detectron2.utils.file_io import PathManager - -from .utils import normalize_embeddings - - -class VertexFeatureEmbedder(nn.Module): - """ - Class responsible for embedding vertex features. Mapping from - feature space to the embedding space is a tensor of size [K, D], where - K = number of dimensions in the feature space - D = number of dimensions in the embedding space - Vertex features is a tensor of size [N, K], where - N = number of vertices - K = number of dimensions in the feature space - Vertex embeddings are computed as F * E = tensor of size [N, D] - """ - - def __init__( - self, num_vertices: int, feature_dim: int, embed_dim: int, train_features: bool = False - ): - """ - Initialize embedder, set random embeddings - - Args: - num_vertices (int): number of vertices to embed - feature_dim (int): number of dimensions in the feature space - embed_dim (int): number of dimensions in the embedding space - train_features (bool): determines whether vertex features should - be trained (default: False) - """ - super(VertexFeatureEmbedder, self).__init__() - if train_features: - self.features = nn.Parameter(torch.Tensor(num_vertices, feature_dim)) - else: - self.register_buffer("features", torch.Tensor(num_vertices, feature_dim)) - self.embeddings = nn.Parameter(torch.Tensor(feature_dim, embed_dim)) - self.reset_parameters() - - @torch.no_grad() - def reset_parameters(self): - self.features.zero_() - self.embeddings.zero_() - - def forward(self) -> torch.Tensor: - """ - Produce vertex embeddings, a tensor of shape [N, D] where: - N = number of vertices - D = number of dimensions in the embedding space - - Return: - Full vertex embeddings, a tensor of shape [N, D] - """ - return normalize_embeddings(torch.mm(self.features, self.embeddings)) - - @torch.no_grad() - def load(self, fpath: str): - """ - Load data from a file - - Args: - fpath (str): file path to load data from - """ - with PathManager.open(fpath, "rb") as hFile: - data = pickle.load(hFile) - for name in ["features", "embeddings"]: - if name in data: - getattr(self, name).copy_( - torch.tensor(data[name]).float().to(device=getattr(self, name).device) - ) diff --git a/detectron2/projects/DensePose/densepose/modeling/densepose_checkpoint.py b/detectron2/projects/DensePose/densepose/modeling/densepose_checkpoint.py deleted file mode 100644 index c85711e976efdf56f0c6494fd19636e7411be2b4..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/densepose_checkpoint.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -from collections import OrderedDict - -from detectron2.checkpoint import DetectionCheckpointer - - -def _rename_HRNet_weights(weights): - # We detect and rename HRNet weights for DensePose. 1956 and 1716 are values that are - # common to all HRNet pretrained weights, and should be enough to accurately identify them - if ( - len(weights["model"].keys()) == 1956 - and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716 - ): - hrnet_weights = OrderedDict() - for k in weights["model"].keys(): - hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k] - return {"model": hrnet_weights} - else: - return weights - - -class DensePoseCheckpointer(DetectionCheckpointer): - """ - Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights - """ - - def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): - super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables) - - def _load_file(self, filename: str) -> object: - """ - Adding hrnet support - """ - weights = super()._load_file(filename) - return _rename_HRNet_weights(weights) diff --git a/detectron2/projects/DensePose/densepose/modeling/filter.py b/detectron2/projects/DensePose/densepose/modeling/filter.py deleted file mode 100644 index 503321004e39c1bd96be3512a3811e33fed4d008..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/filter.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import List -import torch - -from detectron2.config import CfgNode -from detectron2.structures import Instances -from detectron2.structures.boxes import matched_pairwise_iou - - -class DensePoseDataFilter: - def __init__(self, cfg: CfgNode): - self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD - self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS - - @torch.no_grad() - def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]): - """ - Filters proposals with targets to keep only the ones relevant for - DensePose training - - Args: - features (list[Tensor]): input data as a list of features, - each feature is a tensor. Axis 0 represents the number of - images `N` in the input data; axes 1-3 are channels, - height, and width, which may vary between features - (e.g., if a feature pyramid is used). - proposals_with_targets (list[Instances]): length `N` list of - `Instances`. The i-th `Instances` contains instances - (proposals, GT) for the i-th input image, - Returns: - list[Tensor]: filtered features - list[Instances]: filtered proposals - """ - proposals_filtered = [] - # TODO: the commented out code was supposed to correctly deal with situations - # where no valid DensePose GT is available for certain images. The corresponding - # image features were sliced and proposals were filtered. This led to performance - # deterioration, both in terms of runtime and in terms of evaluation results. - # - # feature_mask = torch.ones( - # len(proposals_with_targets), - # dtype=torch.bool, - # device=features[0].device if len(features) > 0 else torch.device("cpu"), - # ) - for i, proposals_per_image in enumerate(proposals_with_targets): - if not proposals_per_image.has("gt_densepose") and ( - not proposals_per_image.has("gt_masks") or not self.keep_masks - ): - # feature_mask[i] = 0 - continue - gt_boxes = proposals_per_image.gt_boxes - est_boxes = proposals_per_image.proposal_boxes - # apply match threshold for densepose head - iou = matched_pairwise_iou(gt_boxes, est_boxes) - iou_select = iou > self.iou_threshold - proposals_per_image = proposals_per_image[iou_select] # pyre-ignore[6] - - N_gt_boxes = len(proposals_per_image.gt_boxes) - assert N_gt_boxes == len(proposals_per_image.proposal_boxes), ( - f"The number of GT boxes {N_gt_boxes} is different from the " - f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}" - ) - # filter out any target without suitable annotation - if self.keep_masks: - gt_masks = ( - proposals_per_image.gt_masks - if hasattr(proposals_per_image, "gt_masks") - else [None] * N_gt_boxes - ) - else: - gt_masks = [None] * N_gt_boxes - gt_densepose = ( - proposals_per_image.gt_densepose - if hasattr(proposals_per_image, "gt_densepose") - else [None] * N_gt_boxes - ) - assert len(gt_masks) == N_gt_boxes - assert len(gt_densepose) == N_gt_boxes - selected_indices = [ - i - for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks)) - if (dp_target is not None) or (mask_target is not None) - ] - # if not len(selected_indices): - # feature_mask[i] = 0 - # continue - if len(selected_indices) != N_gt_boxes: - proposals_per_image = proposals_per_image[selected_indices] # pyre-ignore[6] - assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes) - proposals_filtered.append(proposals_per_image) - # features_filtered = [feature[feature_mask] for feature in features] - # return features_filtered, proposals_filtered - return features, proposals_filtered diff --git a/detectron2/projects/DensePose/densepose/modeling/hrfpn.py b/detectron2/projects/DensePose/densepose/modeling/hrfpn.py deleted file mode 100644 index a19c3261198798738130267cb4c35022ddf8a9e6..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/hrfpn.py +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -""" -MIT License -Copyright (c) 2019 Microsoft -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" - -import torch -import torch.nn as nn -import torch.nn.functional as F - -from detectron2.layers import ShapeSpec -from detectron2.modeling.backbone import BACKBONE_REGISTRY -from detectron2.modeling.backbone.backbone import Backbone - -from .hrnet import build_pose_hrnet_backbone - - -class HRFPN(Backbone): - """HRFPN (High Resolution Feature Pyramids) - Transforms outputs of HRNet backbone so they are suitable for the ROI_heads - arXiv: https://arxiv.org/abs/1904.04514 - Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py - Args: - bottom_up: (list) output of HRNet - in_features (list): names of the input features (output of HRNet) - in_channels (list): number of channels for each branch - out_channels (int): output channels of feature pyramids - n_out_features (int): number of output stages - pooling (str): pooling for generating feature pyramids (from {MAX, AVG}) - share_conv (bool): Have one conv per output, or share one with all the outputs - """ - - def __init__( - self, - bottom_up, - in_features, - n_out_features, - in_channels, - out_channels, - pooling="AVG", - share_conv=False, - ): - super(HRFPN, self).__init__() - assert isinstance(in_channels, list) - self.bottom_up = bottom_up - self.in_features = in_features - self.n_out_features = n_out_features - self.in_channels = in_channels - self.out_channels = out_channels - self.num_ins = len(in_channels) - self.share_conv = share_conv - - if self.share_conv: - self.fpn_conv = nn.Conv2d( - in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1 - ) - else: - self.fpn_conv = nn.ModuleList() - for _ in range(self.n_out_features): - self.fpn_conv.append( - nn.Conv2d( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - padding=1, - ) - ) - - # Custom change: Replaces a simple bilinear interpolation - self.interp_conv = nn.ModuleList() - for i in range(len(self.in_features)): - self.interp_conv.append( - nn.Sequential( - nn.ConvTranspose2d( - in_channels=in_channels[i], - out_channels=in_channels[i], - kernel_size=4, - stride=2**i, - padding=0, - output_padding=0, - bias=False, - ), - nn.BatchNorm2d(in_channels[i], momentum=0.1), - nn.ReLU(inplace=True), - ) - ) - - # Custom change: Replaces a couple (reduction conv + pooling) by one conv - self.reduction_pooling_conv = nn.ModuleList() - for i in range(self.n_out_features): - self.reduction_pooling_conv.append( - nn.Sequential( - nn.Conv2d(sum(in_channels), out_channels, kernel_size=2**i, stride=2**i), - nn.BatchNorm2d(out_channels, momentum=0.1), - nn.ReLU(inplace=True), - ) - ) - - if pooling == "MAX": - self.pooling = F.max_pool2d - else: - self.pooling = F.avg_pool2d - - self._out_features = [] - self._out_feature_channels = {} - self._out_feature_strides = {} - - for i in range(self.n_out_features): - self._out_features.append("p%d" % (i + 1)) - self._out_feature_channels.update({self._out_features[-1]: self.out_channels}) - self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)}) - - # default init_weights for conv(msra) and norm in ConvModule - def init_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, a=1) - nn.init.constant_(m.bias, 0) - - def forward(self, inputs): - bottom_up_features = self.bottom_up(inputs) - assert len(bottom_up_features) == len(self.in_features) - inputs = [bottom_up_features[f] for f in self.in_features] - - outs = [] - for i in range(len(inputs)): - outs.append(self.interp_conv[i](inputs[i])) - shape_2 = min(o.shape[2] for o in outs) - shape_3 = min(o.shape[3] for o in outs) - out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1) - outs = [] - for i in range(self.n_out_features): - outs.append(self.reduction_pooling_conv[i](out)) - for i in range(len(outs)): # Make shapes consistent - outs[-1 - i] = outs[-1 - i][ - :, :, : outs[-1].shape[2] * 2**i, : outs[-1].shape[3] * 2**i - ] - outputs = [] - for i in range(len(outs)): - if self.share_conv: - outputs.append(self.fpn_conv(outs[i])) - else: - outputs.append(self.fpn_conv[i](outs[i])) - - assert len(self._out_features) == len(outputs) - return dict(zip(self._out_features, outputs)) - - -@BACKBONE_REGISTRY.register() -def build_hrfpn_backbone(cfg, input_shape: ShapeSpec) -> HRFPN: - - in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS - in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)] - n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES) - out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS - hrnet = build_pose_hrnet_backbone(cfg, input_shape) - hrfpn = HRFPN( - hrnet, - in_features, - n_out_features, - in_channels, - out_channels, - pooling="AVG", - share_conv=False, - ) - - return hrfpn diff --git a/detectron2/projects/DensePose/densepose/modeling/hrnet.py b/detectron2/projects/DensePose/densepose/modeling/hrnet.py deleted file mode 100644 index f8e3cab545c7f999300676bb27fa0461abd143e2..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/hrnet.py +++ /dev/null @@ -1,476 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# ------------------------------------------------------------------------------ -# Copyright (c) Microsoft -# Licensed under the MIT License. -# Written by Bin Xiao (leoxiaobin@gmail.com) -# Modified by Bowen Cheng (bcheng9@illinois.edu) -# Adapted from https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation/blob/master/lib/models/pose_higher_hrnet.py # noqa -# ------------------------------------------------------------------------------ - -# pyre-unsafe - -from __future__ import absolute_import, division, print_function -import logging -import torch.nn as nn - -from detectron2.layers import ShapeSpec -from detectron2.modeling.backbone import BACKBONE_REGISTRY -from detectron2.modeling.backbone.backbone import Backbone - -BN_MOMENTUM = 0.1 -logger = logging.getLogger(__name__) - -__all__ = ["build_pose_hrnet_backbone", "PoseHigherResolutionNet"] - - -def conv3x3(in_planes, out_planes, stride=1): - """3x3 convolution with padding""" - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(BasicBlock, self).__init__() - self.conv1 = conv3x3(inplanes, planes, stride) - self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) - self.relu = nn.ReLU(inplace=True) - self.conv2 = conv3x3(planes, planes) - self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(Bottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) - self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class HighResolutionModule(nn.Module): - """HighResolutionModule - Building block of the PoseHigherResolutionNet (see lower) - arXiv: https://arxiv.org/abs/1908.10357 - Args: - num_branches (int): number of branches of the modyle - blocks (str): type of block of the module - num_blocks (int): number of blocks of the module - num_inchannels (int): number of input channels of the module - num_channels (list): number of channels of each branch - multi_scale_output (bool): only used by the last module of PoseHigherResolutionNet - """ - - def __init__( - self, - num_branches, - blocks, - num_blocks, - num_inchannels, - num_channels, - multi_scale_output=True, - ): - super(HighResolutionModule, self).__init__() - self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels) - - self.num_inchannels = num_inchannels - self.num_branches = num_branches - - self.multi_scale_output = multi_scale_output - - self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels) - self.fuse_layers = self._make_fuse_layers() - self.relu = nn.ReLU(True) - - def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels): - if num_branches != len(num_blocks): - error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(num_branches, len(num_blocks)) - logger.error(error_msg) - raise ValueError(error_msg) - - if num_branches != len(num_channels): - error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format( - num_branches, len(num_channels) - ) - logger.error(error_msg) - raise ValueError(error_msg) - - if num_branches != len(num_inchannels): - error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format( - num_branches, len(num_inchannels) - ) - logger.error(error_msg) - raise ValueError(error_msg) - - def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): - downsample = None - if ( - stride != 1 - or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion - ): - downsample = nn.Sequential( - nn.Conv2d( - self.num_inchannels[branch_index], - num_channels[branch_index] * block.expansion, - kernel_size=1, - stride=stride, - bias=False, - ), - nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM), - ) - - layers = [] - layers.append( - block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample) - ) - self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion - for _ in range(1, num_blocks[branch_index]): - layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index])) - - return nn.Sequential(*layers) - - def _make_branches(self, num_branches, block, num_blocks, num_channels): - branches = [] - - for i in range(num_branches): - branches.append(self._make_one_branch(i, block, num_blocks, num_channels)) - - return nn.ModuleList(branches) - - def _make_fuse_layers(self): - if self.num_branches == 1: - return None - - num_branches = self.num_branches - num_inchannels = self.num_inchannels - fuse_layers = [] - for i in range(num_branches if self.multi_scale_output else 1): - fuse_layer = [] - for j in range(num_branches): - if j > i: - fuse_layer.append( - nn.Sequential( - nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False), - nn.BatchNorm2d(num_inchannels[i]), - nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"), - ) - ) - elif j == i: - fuse_layer.append(None) - else: - conv3x3s = [] - for k in range(i - j): - if k == i - j - 1: - num_outchannels_conv3x3 = num_inchannels[i] - conv3x3s.append( - nn.Sequential( - nn.Conv2d( - num_inchannels[j], - num_outchannels_conv3x3, - 3, - 2, - 1, - bias=False, - ), - nn.BatchNorm2d(num_outchannels_conv3x3), - ) - ) - else: - num_outchannels_conv3x3 = num_inchannels[j] - conv3x3s.append( - nn.Sequential( - nn.Conv2d( - num_inchannels[j], - num_outchannels_conv3x3, - 3, - 2, - 1, - bias=False, - ), - nn.BatchNorm2d(num_outchannels_conv3x3), - nn.ReLU(True), - ) - ) - fuse_layer.append(nn.Sequential(*conv3x3s)) - fuse_layers.append(nn.ModuleList(fuse_layer)) - - return nn.ModuleList(fuse_layers) - - def get_num_inchannels(self): - return self.num_inchannels - - def forward(self, x): - if self.num_branches == 1: - return [self.branches[0](x[0])] - - for i in range(self.num_branches): - x[i] = self.branches[i](x[i]) - - x_fuse = [] - - for i in range(len(self.fuse_layers)): - y = x[0] if i == 0 else self.fuse_layers[i][0](x[0]) - for j in range(1, self.num_branches): - if i == j: - y = y + x[j] - else: - z = self.fuse_layers[i][j](x[j])[:, :, : y.shape[2], : y.shape[3]] - y = y + z - x_fuse.append(self.relu(y)) - - return x_fuse - - -blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck} - - -class PoseHigherResolutionNet(Backbone): - """PoseHigherResolutionNet - Composed of several HighResolutionModule tied together with ConvNets - Adapted from the GitHub version to fit with HRFPN and the Detectron2 infrastructure - arXiv: https://arxiv.org/abs/1908.10357 - """ - - def __init__(self, cfg, **kwargs): - self.inplanes = cfg.MODEL.HRNET.STEM_INPLANES - super(PoseHigherResolutionNet, self).__init__() - - # stem net - self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) - self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) - self.relu = nn.ReLU(inplace=True) - self.layer1 = self._make_layer(Bottleneck, 64, 4) - - self.stage2_cfg = cfg.MODEL.HRNET.STAGE2 - num_channels = self.stage2_cfg.NUM_CHANNELS - block = blocks_dict[self.stage2_cfg.BLOCK] - num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))] - self.transition1 = self._make_transition_layer([256], num_channels) - self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels) - - self.stage3_cfg = cfg.MODEL.HRNET.STAGE3 - num_channels = self.stage3_cfg.NUM_CHANNELS - block = blocks_dict[self.stage3_cfg.BLOCK] - num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))] - self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) - self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels) - - self.stage4_cfg = cfg.MODEL.HRNET.STAGE4 - num_channels = self.stage4_cfg.NUM_CHANNELS - block = blocks_dict[self.stage4_cfg.BLOCK] - num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))] - self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) - self.stage4, pre_stage_channels = self._make_stage( - self.stage4_cfg, num_channels, multi_scale_output=True - ) - - self._out_features = [] - self._out_feature_channels = {} - self._out_feature_strides = {} - - for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES): - self._out_features.append("p%d" % (i + 1)) - self._out_feature_channels.update( - {self._out_features[-1]: cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS[i]} - ) - self._out_feature_strides.update({self._out_features[-1]: 1}) - - def _get_deconv_cfg(self, deconv_kernel): - if deconv_kernel == 4: - padding = 1 - output_padding = 0 - elif deconv_kernel == 3: - padding = 1 - output_padding = 1 - elif deconv_kernel == 2: - padding = 0 - output_padding = 0 - - return deconv_kernel, padding, output_padding - - def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer): - num_branches_cur = len(num_channels_cur_layer) - num_branches_pre = len(num_channels_pre_layer) - - transition_layers = [] - for i in range(num_branches_cur): - if i < num_branches_pre: - if num_channels_cur_layer[i] != num_channels_pre_layer[i]: - transition_layers.append( - nn.Sequential( - nn.Conv2d( - num_channels_pre_layer[i], - num_channels_cur_layer[i], - 3, - 1, - 1, - bias=False, - ), - nn.BatchNorm2d(num_channels_cur_layer[i]), - nn.ReLU(inplace=True), - ) - ) - else: - transition_layers.append(None) - else: - conv3x3s = [] - for j in range(i + 1 - num_branches_pre): - inchannels = num_channels_pre_layer[-1] - outchannels = ( - num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels - ) - conv3x3s.append( - nn.Sequential( - nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False), - nn.BatchNorm2d(outchannels), - nn.ReLU(inplace=True), - ) - ) - transition_layers.append(nn.Sequential(*conv3x3s)) - - return nn.ModuleList(transition_layers) - - def _make_layer(self, block, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d( - self.inplanes, - planes * block.expansion, - kernel_size=1, - stride=stride, - bias=False, - ), - nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), - ) - - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) - self.inplanes = planes * block.expansion - for _ in range(1, blocks): - layers.append(block(self.inplanes, planes)) - - return nn.Sequential(*layers) - - def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True): - num_modules = layer_config["NUM_MODULES"] - num_branches = layer_config["NUM_BRANCHES"] - num_blocks = layer_config["NUM_BLOCKS"] - num_channels = layer_config["NUM_CHANNELS"] - block = blocks_dict[layer_config["BLOCK"]] - - modules = [] - for i in range(num_modules): - # multi_scale_output is only used last module - if not multi_scale_output and i == num_modules - 1: - reset_multi_scale_output = False - else: - reset_multi_scale_output = True - - modules.append( - HighResolutionModule( - num_branches, - block, - num_blocks, - num_inchannels, - num_channels, - reset_multi_scale_output, - ) - ) - num_inchannels = modules[-1].get_num_inchannels() - - return nn.Sequential(*modules), num_inchannels - - def forward(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - x = self.conv2(x) - x = self.bn2(x) - x = self.relu(x) - x = self.layer1(x) - - x_list = [] - for i in range(self.stage2_cfg.NUM_BRANCHES): - if self.transition1[i] is not None: - x_list.append(self.transition1[i](x)) - else: - x_list.append(x) - y_list = self.stage2(x_list) - - x_list = [] - for i in range(self.stage3_cfg.NUM_BRANCHES): - if self.transition2[i] is not None: - x_list.append(self.transition2[i](y_list[-1])) - else: - x_list.append(y_list[i]) - y_list = self.stage3(x_list) - - x_list = [] - for i in range(self.stage4_cfg.NUM_BRANCHES): - if self.transition3[i] is not None: - x_list.append(self.transition3[i](y_list[-1])) - else: - x_list.append(y_list[i]) - y_list = self.stage4(x_list) - - assert len(self._out_features) == len(y_list) - return dict(zip(self._out_features, y_list)) # final_outputs - - -@BACKBONE_REGISTRY.register() -def build_pose_hrnet_backbone(cfg, input_shape: ShapeSpec): - model = PoseHigherResolutionNet(cfg) - return model diff --git a/detectron2/projects/DensePose/densepose/modeling/inference.py b/detectron2/projects/DensePose/densepose/modeling/inference.py deleted file mode 100644 index a797ff9b28e61827f5553045a6147ff3390d9fe3..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/inference.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -from dataclasses import fields -from typing import Any, List -import torch - -from detectron2.structures import Instances - - -def densepose_inference(densepose_predictor_output: Any, detections: List[Instances]) -> None: - """ - Splits DensePose predictor outputs into chunks, each chunk corresponds to - detections on one image. Predictor output chunks are stored in `pred_densepose` - attribute of the corresponding `Instances` object. - - Args: - densepose_predictor_output: a dataclass instance (can be of different types, - depending on predictor used for inference). Each field can be `None` - (if the corresponding output was not inferred) or a tensor of size - [N, ...], where N = N_1 + N_2 + .. + N_k is a total number of - detections on all images, N_1 is the number of detections on image 1, - N_2 is the number of detections on image 2, etc. - detections: a list of objects of type `Instance`, k-th object corresponds - to detections on k-th image. - """ - k = 0 - for detection_i in detections: - if densepose_predictor_output is None: - # don't add `pred_densepose` attribute - continue - n_i = detection_i.__len__() - - PredictorOutput = type(densepose_predictor_output) - output_i_dict = {} - # we assume here that `densepose_predictor_output` is a dataclass object - for field in fields(densepose_predictor_output): - field_value = getattr(densepose_predictor_output, field.name) - # slice tensors - if isinstance(field_value, torch.Tensor): - output_i_dict[field.name] = field_value[k : k + n_i] - # leave others as is - else: - output_i_dict[field.name] = field_value - detection_i.pred_densepose = PredictorOutput(**output_i_dict) - k += n_i diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/__init__.py b/detectron2/projects/DensePose/densepose/modeling/losses/__init__.py deleted file mode 100644 index b028a23924b030e0bac4d554b61ed34f3110a798..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .chart import DensePoseChartLoss -from .chart_with_confidences import DensePoseChartWithConfidenceLoss -from .cse import DensePoseCseLoss -from .registry import DENSEPOSE_LOSS_REGISTRY - - -__all__ = [ - "DensePoseChartLoss", - "DensePoseChartWithConfidenceLoss", - "DensePoseCseLoss", - "DENSEPOSE_LOSS_REGISTRY", -] diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/chart.py b/detectron2/projects/DensePose/densepose/modeling/losses/chart.py deleted file mode 100644 index 770648f3d3fddbfc553c18a3e7f5101396913593..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/chart.py +++ /dev/null @@ -1,293 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Any, List -import torch -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.structures import Instances - -from .mask_or_segm import MaskOrSegmentationLoss -from .registry import DENSEPOSE_LOSS_REGISTRY -from .utils import ( - BilinearInterpolationHelper, - ChartBasedAnnotationsAccumulator, - LossDict, - extract_packed_annotations_from_matches, -) - - -@DENSEPOSE_LOSS_REGISTRY.register() -class DensePoseChartLoss: - """ - DensePose loss for chart-based training. A mesh is split into charts, - each chart is given a label (I) and parametrized by 2 coordinates referred to - as U and V. Ground truth consists of a number of points annotated with - I, U and V values and coarse segmentation S defined for all pixels of the - object bounding box. In some cases (see `COARSE_SEGM_TRAINED_BY_MASKS`), - semantic segmentation annotations can be used as ground truth inputs as well. - - Estimated values are tensors: - * U coordinates, tensor of shape [N, C, S, S] - * V coordinates, tensor of shape [N, C, S, S] - * fine segmentation estimates, tensor of shape [N, C, S, S] with raw unnormalized - scores for each fine segmentation label at each location - * coarse segmentation estimates, tensor of shape [N, D, S, S] with raw unnormalized - scores for each coarse segmentation label at each location - where N is the number of detections, C is the number of fine segmentation - labels, S is the estimate size ( = width = height) and D is the number of - coarse segmentation channels. - - The losses are: - * regression (smooth L1) loss for U and V coordinates - * cross entropy loss for fine (I) and coarse (S) segmentations - Each loss has an associated weight - """ - - def __init__(self, cfg: CfgNode): - """ - Initialize chart-based loss from configuration options - - Args: - cfg (CfgNode): configuration options - """ - # fmt: off - self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE - self.w_points = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS - self.w_part = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS - self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS - self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS - # fmt: on - self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS - self.segm_loss = MaskOrSegmentationLoss(cfg) - - def __call__( - self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any, **kwargs - ) -> LossDict: - """ - Produce chart-based DensePose losses - - Args: - proposals_with_gt (list of Instances): detections with associated ground truth data - densepose_predictor_outputs: an object of a dataclass that contains predictor outputs - with estimated values; assumed to have the following attributes: - * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S] - * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S] - * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S] - * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S] - where N is the number of detections, C is the number of fine segmentation - labels, S is the estimate size ( = width = height) and D is the number of - coarse segmentation channels. - - Return: - dict: str -> tensor: dict of losses with the following entries: - * `loss_densepose_U`: smooth L1 loss for U coordinate estimates - * `loss_densepose_V`: smooth L1 loss for V coordinate estimates - * `loss_densepose_I`: cross entropy for raw unnormalized scores for fine - segmentation estimates given ground truth labels; - * `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse - segmentation estimates given ground truth labels; - """ - # densepose outputs are computed for all images and all bounding boxes; - # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively, - # the outputs will have size(0) == 3+1+2+1 == 7 - - if not len(proposals_with_gt): - return self.produce_fake_densepose_losses(densepose_predictor_outputs) - - accumulator = ChartBasedAnnotationsAccumulator() - packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator) - - # NOTE: we need to keep the same computation graph on all the GPUs to - # perform reduction properly. Hence even if we have no data on one - # of the GPUs, we still need to generate the computation graph. - # Add fake (zero) loss in the form Tensor.sum() * 0 - if packed_annotations is None: - return self.produce_fake_densepose_losses(densepose_predictor_outputs) - - h, w = densepose_predictor_outputs.u.shape[2:] - interpolator = BilinearInterpolationHelper.from_matches( - packed_annotations, - (h, w), - ) - - j_valid_fg = interpolator.j_valid * ( # pyre-ignore[16] - packed_annotations.fine_segm_labels_gt > 0 - ) - # pyre-fixme[6]: For 1st param expected `Tensor` but got `int`. - if not torch.any(j_valid_fg): - return self.produce_fake_densepose_losses(densepose_predictor_outputs) - - losses_uv = self.produce_densepose_losses_uv( - proposals_with_gt, - densepose_predictor_outputs, - packed_annotations, - interpolator, - j_valid_fg, # pyre-ignore[6] - ) - - losses_segm = self.produce_densepose_losses_segm( - proposals_with_gt, - densepose_predictor_outputs, - packed_annotations, - interpolator, - j_valid_fg, # pyre-ignore[6] - ) - - return {**losses_uv, **losses_segm} - - def produce_fake_densepose_losses(self, densepose_predictor_outputs: Any) -> LossDict: - """ - Fake losses for fine segmentation and U/V coordinates. These are used when - no suitable ground truth data was found in a batch. The loss has a value 0 - and is primarily used to construct the computation graph, so that - `DistributedDataParallel` has similar graphs on all GPUs and can perform - reduction properly. - - Args: - densepose_predictor_outputs: DensePose predictor outputs, an object - of a dataclass that is assumed to have the following attributes: - * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S] - * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S] - * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S] - Return: - dict: str -> tensor: dict of losses with the following entries: - * `loss_densepose_U`: has value 0 - * `loss_densepose_V`: has value 0 - * `loss_densepose_I`: has value 0 - * `loss_densepose_S`: has value 0 - """ - losses_uv = self.produce_fake_densepose_losses_uv(densepose_predictor_outputs) - losses_segm = self.produce_fake_densepose_losses_segm(densepose_predictor_outputs) - return {**losses_uv, **losses_segm} - - def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict: - """ - Fake losses for U/V coordinates. These are used when no suitable ground - truth data was found in a batch. The loss has a value 0 - and is primarily used to construct the computation graph, so that - `DistributedDataParallel` has similar graphs on all GPUs and can perform - reduction properly. - - Args: - densepose_predictor_outputs: DensePose predictor outputs, an object - of a dataclass that is assumed to have the following attributes: - * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S] - * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S] - Return: - dict: str -> tensor: dict of losses with the following entries: - * `loss_densepose_U`: has value 0 - * `loss_densepose_V`: has value 0 - """ - return { - "loss_densepose_U": densepose_predictor_outputs.u.sum() * 0, - "loss_densepose_V": densepose_predictor_outputs.v.sum() * 0, - } - - def produce_fake_densepose_losses_segm(self, densepose_predictor_outputs: Any) -> LossDict: - """ - Fake losses for fine / coarse segmentation. These are used when - no suitable ground truth data was found in a batch. The loss has a value 0 - and is primarily used to construct the computation graph, so that - `DistributedDataParallel` has similar graphs on all GPUs and can perform - reduction properly. - - Args: - densepose_predictor_outputs: DensePose predictor outputs, an object - of a dataclass that is assumed to have the following attributes: - * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S] - * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S] - Return: - dict: str -> tensor: dict of losses with the following entries: - * `loss_densepose_I`: has value 0 - * `loss_densepose_S`: has value 0, added only if `segm_trained_by_masks` is False - """ - losses = { - "loss_densepose_I": densepose_predictor_outputs.fine_segm.sum() * 0, - "loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs), - } - return losses - - def produce_densepose_losses_uv( - self, - proposals_with_gt: List[Instances], - densepose_predictor_outputs: Any, - packed_annotations: Any, - interpolator: BilinearInterpolationHelper, - j_valid_fg: torch.Tensor, - ) -> LossDict: - """ - Compute losses for U/V coordinates: smooth L1 loss between - estimated coordinates and the ground truth. - - Args: - proposals_with_gt (list of Instances): detections with associated ground truth data - densepose_predictor_outputs: DensePose predictor outputs, an object - of a dataclass that is assumed to have the following attributes: - * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S] - * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S] - Return: - dict: str -> tensor: dict of losses with the following entries: - * `loss_densepose_U`: smooth L1 loss for U coordinate estimates - * `loss_densepose_V`: smooth L1 loss for V coordinate estimates - """ - u_gt = packed_annotations.u_gt[j_valid_fg] - u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg] - v_gt = packed_annotations.v_gt[j_valid_fg] - v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg] - return { - "loss_densepose_U": F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points, - "loss_densepose_V": F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points, - } - - def produce_densepose_losses_segm( - self, - proposals_with_gt: List[Instances], - densepose_predictor_outputs: Any, - packed_annotations: Any, - interpolator: BilinearInterpolationHelper, - j_valid_fg: torch.Tensor, - ) -> LossDict: - """ - Losses for fine / coarse segmentation: cross-entropy - for segmentation unnormalized scores given ground truth labels at - annotated points for fine segmentation and dense mask annotations - for coarse segmentation. - - Args: - proposals_with_gt (list of Instances): detections with associated ground truth data - densepose_predictor_outputs: DensePose predictor outputs, an object - of a dataclass that is assumed to have the following attributes: - * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S] - * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S] - Return: - dict: str -> tensor: dict of losses with the following entries: - * `loss_densepose_I`: cross entropy for raw unnormalized scores for fine - segmentation estimates given ground truth labels - * `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse - segmentation estimates given ground truth labels; - may be included if coarse segmentation is only trained - using DensePose ground truth; if additional supervision through - instance segmentation data is performed (`segm_trained_by_masks` is True), - this loss is handled by `produce_mask_losses` instead - """ - fine_segm_gt = packed_annotations.fine_segm_labels_gt[ - interpolator.j_valid # pyre-ignore[16] - ] - fine_segm_est = interpolator.extract_at_points( - densepose_predictor_outputs.fine_segm, - slice_fine_segm=slice(None), - w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16] - w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16] - w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16] - w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16] - )[interpolator.j_valid, :] - return { - "loss_densepose_I": F.cross_entropy(fine_segm_est, fine_segm_gt.long()) * self.w_part, - "loss_densepose_S": self.segm_loss( - proposals_with_gt, densepose_predictor_outputs, packed_annotations - ) - * self.w_segm, - } diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/chart_with_confidences.py b/detectron2/projects/DensePose/densepose/modeling/losses/chart_with_confidences.py deleted file mode 100644 index d061488d7d5fb8fe0e220e7dfe3f03ea2eda7977..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/chart_with_confidences.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import math -from typing import Any, List -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.structures import Instances - -from .. import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType -from .chart import DensePoseChartLoss -from .registry import DENSEPOSE_LOSS_REGISTRY -from .utils import BilinearInterpolationHelper, LossDict - - -@DENSEPOSE_LOSS_REGISTRY.register() -class DensePoseChartWithConfidenceLoss(DensePoseChartLoss): - """ """ - - def __init__(self, cfg: CfgNode): - super().__init__(cfg) - self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg) - if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO: - self.uv_loss_with_confidences = IIDIsotropicGaussianUVLoss( - self.confidence_model_cfg.uv_confidence.epsilon - ) - elif self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO: - self.uv_loss_with_confidences = IndepAnisotropicGaussianUVLoss( - self.confidence_model_cfg.uv_confidence.epsilon - ) - - def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict: - """ - Overrides fake losses for fine segmentation and U/V coordinates to - include computation graphs for additional confidence parameters. - These are used when no suitable ground truth data was found in a batch. - The loss has a value 0 and is primarily used to construct the computation graph, - so that `DistributedDataParallel` has similar graphs on all GPUs and can - perform reduction properly. - - Args: - densepose_predictor_outputs: DensePose predictor outputs, an object - of a dataclass that is assumed to have the following attributes: - * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S] - * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S] - * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S] - Return: - dict: str -> tensor: dict of losses with the following entries: - * `loss_densepose_U`: has value 0 - * `loss_densepose_V`: has value 0 - * `loss_densepose_I`: has value 0 - """ - conf_type = self.confidence_model_cfg.uv_confidence.type - if self.confidence_model_cfg.uv_confidence.enabled: - loss_uv = ( - densepose_predictor_outputs.u.sum() + densepose_predictor_outputs.v.sum() - ) * 0 - if conf_type == DensePoseUVConfidenceType.IID_ISO: - loss_uv += densepose_predictor_outputs.sigma_2.sum() * 0 - elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO: - loss_uv += ( - densepose_predictor_outputs.sigma_2.sum() - + densepose_predictor_outputs.kappa_u.sum() - + densepose_predictor_outputs.kappa_v.sum() - ) * 0 - return {"loss_densepose_UV": loss_uv} - else: - return super().produce_fake_densepose_losses_uv(densepose_predictor_outputs) - - def produce_densepose_losses_uv( - self, - proposals_with_gt: List[Instances], - densepose_predictor_outputs: Any, - packed_annotations: Any, - interpolator: BilinearInterpolationHelper, - j_valid_fg: torch.Tensor, - ) -> LossDict: - conf_type = self.confidence_model_cfg.uv_confidence.type - if self.confidence_model_cfg.uv_confidence.enabled: - u_gt = packed_annotations.u_gt[j_valid_fg] - u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg] - v_gt = packed_annotations.v_gt[j_valid_fg] - v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg] - sigma_2_est = interpolator.extract_at_points(densepose_predictor_outputs.sigma_2)[ - j_valid_fg - ] - if conf_type == DensePoseUVConfidenceType.IID_ISO: - return { - "loss_densepose_UV": ( - self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt) - * self.w_points - ) - } - elif conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]: - kappa_u_est = interpolator.extract_at_points(densepose_predictor_outputs.kappa_u)[ - j_valid_fg - ] - kappa_v_est = interpolator.extract_at_points(densepose_predictor_outputs.kappa_v)[ - j_valid_fg - ] - return { - "loss_densepose_UV": ( - self.uv_loss_with_confidences( - u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt - ) - * self.w_points - ) - } - return super().produce_densepose_losses_uv( - proposals_with_gt, - densepose_predictor_outputs, - packed_annotations, - interpolator, - j_valid_fg, - ) - - -class IIDIsotropicGaussianUVLoss(nn.Module): - """ - Loss for the case of iid residuals with isotropic covariance: - $Sigma_i = sigma_i^2 I$ - The loss (negative log likelihood) is then: - $1/2 sum_{i=1}^n (log(2 pi) + 2 log sigma_i^2 + ||delta_i||^2 / sigma_i^2)$, - where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates - difference between estimated and ground truth UV values - For details, see: - N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning - Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019 - """ - - def __init__(self, sigma_lower_bound: float): - super(IIDIsotropicGaussianUVLoss, self).__init__() - self.sigma_lower_bound = sigma_lower_bound - self.log2pi = math.log(2 * math.pi) - - def forward( - self, - u: torch.Tensor, - v: torch.Tensor, - sigma_u: torch.Tensor, - target_u: torch.Tensor, - target_v: torch.Tensor, - ): - # compute $\sigma_i^2$ - # use sigma_lower_bound to avoid degenerate solution for variance - # (sigma -> 0) - sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound - # compute \|delta_i\|^2 - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - delta_t_delta = (u - target_u) ** 2 + (v - target_v) ** 2 - # the total loss from the formula above: - loss = 0.5 * (self.log2pi + 2 * torch.log(sigma2) + delta_t_delta / sigma2) - return loss.sum() - - -class IndepAnisotropicGaussianUVLoss(nn.Module): - """ - Loss for the case of independent residuals with anisotropic covariances: - $Sigma_i = sigma_i^2 I + r_i r_i^T$ - The loss (negative log likelihood) is then: - $1/2 sum_{i=1}^n (log(2 pi) - + log sigma_i^2 (sigma_i^2 + ||r_i||^2) - + ||delta_i||^2 / sigma_i^2 - - ^2 / (sigma_i^2 * (sigma_i^2 + ||r_i||^2)))$, - where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates - difference between estimated and ground truth UV values - For details, see: - N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning - Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019 - """ - - def __init__(self, sigma_lower_bound: float): - super(IndepAnisotropicGaussianUVLoss, self).__init__() - self.sigma_lower_bound = sigma_lower_bound - self.log2pi = math.log(2 * math.pi) - - def forward( - self, - u: torch.Tensor, - v: torch.Tensor, - sigma_u: torch.Tensor, - kappa_u_est: torch.Tensor, - kappa_v_est: torch.Tensor, - target_u: torch.Tensor, - target_v: torch.Tensor, - ): - # compute $\sigma_i^2$ - sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound - # compute \|r_i\|^2 - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - r_sqnorm2 = kappa_u_est**2 + kappa_v_est**2 - delta_u = u - target_u - delta_v = v - target_v - # compute \|delta_i\|^2 - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - delta_sqnorm = delta_u**2 + delta_v**2 - delta_u_r_u = delta_u * kappa_u_est - delta_v_r_v = delta_v * kappa_v_est - # compute the scalar product - delta_r = delta_u_r_u + delta_v_r_v - # compute squared scalar product ^2 - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - delta_r_sqnorm = delta_r**2 - denom2 = sigma2 * (sigma2 + r_sqnorm2) - loss = 0.5 * ( - self.log2pi + torch.log(denom2) + delta_sqnorm / sigma2 - delta_r_sqnorm / denom2 - ) - return loss.sum() diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/cse.py b/detectron2/projects/DensePose/densepose/modeling/losses/cse.py deleted file mode 100644 index ffe219c5474392da8048bcf409257cbfce817236..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/cse.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from typing import Any, List -from torch import nn - -from detectron2.config import CfgNode -from detectron2.structures import Instances - -from .cycle_pix2shape import PixToShapeCycleLoss -from .cycle_shape2shape import ShapeToShapeCycleLoss -from .embed import EmbeddingLoss -from .embed_utils import CseAnnotationsAccumulator -from .mask_or_segm import MaskOrSegmentationLoss -from .registry import DENSEPOSE_LOSS_REGISTRY -from .soft_embed import SoftEmbeddingLoss -from .utils import BilinearInterpolationHelper, LossDict, extract_packed_annotations_from_matches - - -@DENSEPOSE_LOSS_REGISTRY.register() -class DensePoseCseLoss: - """ """ - - _EMBED_LOSS_REGISTRY = { - EmbeddingLoss.__name__: EmbeddingLoss, - SoftEmbeddingLoss.__name__: SoftEmbeddingLoss, - } - - def __init__(self, cfg: CfgNode): - """ - Initialize CSE loss from configuration options - - Args: - cfg (CfgNode): configuration options - """ - self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS - self.w_embed = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT - self.segm_loss = MaskOrSegmentationLoss(cfg) - self.embed_loss = DensePoseCseLoss.create_embed_loss(cfg) - self.do_shape2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.ENABLED - if self.do_shape2shape: - self.w_shape2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT - self.shape2shape_loss = ShapeToShapeCycleLoss(cfg) - self.do_pix2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.ENABLED - if self.do_pix2shape: - self.w_pix2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT - self.pix2shape_loss = PixToShapeCycleLoss(cfg) - - @classmethod - def create_embed_loss(cls, cfg: CfgNode): - # registry not used here, since embedding losses are currently local - # and are not used anywhere else - return cls._EMBED_LOSS_REGISTRY[cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME](cfg) - - def __call__( - self, - proposals_with_gt: List[Instances], - densepose_predictor_outputs: Any, - embedder: nn.Module, - ) -> LossDict: - if not len(proposals_with_gt): - return self.produce_fake_losses(densepose_predictor_outputs, embedder) - accumulator = CseAnnotationsAccumulator() - packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator) - if packed_annotations is None: - return self.produce_fake_losses(densepose_predictor_outputs, embedder) - h, w = densepose_predictor_outputs.embedding.shape[2:] - interpolator = BilinearInterpolationHelper.from_matches( - packed_annotations, - (h, w), - ) - meshid_to_embed_losses = self.embed_loss( - proposals_with_gt, - densepose_predictor_outputs, - packed_annotations, - interpolator, - embedder, - ) - embed_loss_dict = { - f"loss_densepose_E{meshid}": self.w_embed * meshid_to_embed_losses[meshid] - for meshid in meshid_to_embed_losses - } - all_loss_dict = { - "loss_densepose_S": self.w_segm - * self.segm_loss(proposals_with_gt, densepose_predictor_outputs, packed_annotations), - **embed_loss_dict, - } - if self.do_shape2shape: - all_loss_dict["loss_shape2shape"] = self.w_shape2shape * self.shape2shape_loss(embedder) - if self.do_pix2shape: - all_loss_dict["loss_pix2shape"] = self.w_pix2shape * self.pix2shape_loss( - proposals_with_gt, densepose_predictor_outputs, packed_annotations, embedder - ) - return all_loss_dict - - def produce_fake_losses( - self, densepose_predictor_outputs: Any, embedder: nn.Module - ) -> LossDict: - meshname_to_embed_losses = self.embed_loss.fake_values( - densepose_predictor_outputs, embedder=embedder - ) - embed_loss_dict = { - f"loss_densepose_E{mesh_name}": meshname_to_embed_losses[mesh_name] - for mesh_name in meshname_to_embed_losses - } - all_loss_dict = { - "loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs), - **embed_loss_dict, - } - if self.do_shape2shape: - all_loss_dict["loss_shape2shape"] = self.shape2shape_loss.fake_value(embedder) - if self.do_pix2shape: - all_loss_dict["loss_pix2shape"] = self.pix2shape_loss.fake_value( - densepose_predictor_outputs, embedder - ) - return all_loss_dict diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/cycle_pix2shape.py b/detectron2/projects/DensePose/densepose/modeling/losses/cycle_pix2shape.py deleted file mode 100644 index 0fc4298ffad31709290c8c904443ba83a039c1b6..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/cycle_pix2shape.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from typing import Any, List -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.structures import Instances - -from densepose.data.meshes.catalog import MeshCatalog -from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix - -from .embed_utils import PackedCseAnnotations -from .mask import extract_data_for_mask_loss_from_matches - - -def _create_pixel_dist_matrix(grid_size: int) -> torch.Tensor: - rows = torch.arange(grid_size) - cols = torch.arange(grid_size) - # at index `i` contains [row, col], where - # row = i // grid_size - # col = i % grid_size - pix_coords = ( - torch.stack(torch.meshgrid(rows, cols), -1).reshape((grid_size * grid_size, 2)).float() - ) - return squared_euclidean_distance_matrix(pix_coords, pix_coords) - - -def _sample_fg_pixels_randperm(fg_mask: torch.Tensor, sample_size: int) -> torch.Tensor: - fg_mask_flattened = fg_mask.reshape((-1,)) - num_pixels = int(fg_mask_flattened.sum().item()) - fg_pixel_indices = fg_mask_flattened.nonzero(as_tuple=True)[0] - if (sample_size <= 0) or (num_pixels <= sample_size): - return fg_pixel_indices - sample_indices = torch.randperm(num_pixels, device=fg_mask.device)[:sample_size] - return fg_pixel_indices[sample_indices] - - -def _sample_fg_pixels_multinomial(fg_mask: torch.Tensor, sample_size: int) -> torch.Tensor: - fg_mask_flattened = fg_mask.reshape((-1,)) - num_pixels = int(fg_mask_flattened.sum().item()) - if (sample_size <= 0) or (num_pixels <= sample_size): - return fg_mask_flattened.nonzero(as_tuple=True)[0] - return fg_mask_flattened.float().multinomial(sample_size, replacement=False) - - -class PixToShapeCycleLoss(nn.Module): - """ - Cycle loss for pixel-vertex correspondence - """ - - def __init__(self, cfg: CfgNode): - super().__init__() - self.shape_names = list(cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.keys()) - self.embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE - self.norm_p = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P - self.use_all_meshes_not_gt_only = ( - cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY - ) - self.num_pixels_to_sample = ( - cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE - ) - self.pix_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA - self.temperature_pix_to_vertex = ( - cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX - ) - self.temperature_vertex_to_pix = ( - cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL - ) - self.pixel_dists = _create_pixel_dist_matrix(cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE) - - def forward( - self, - proposals_with_gt: List[Instances], - densepose_predictor_outputs: Any, - packed_annotations: PackedCseAnnotations, - embedder: nn.Module, - ): - """ - Args: - proposals_with_gt (list of Instances): detections with associated - ground truth data; each item corresponds to instances detected - on 1 image; the number of items corresponds to the number of - images in a batch - densepose_predictor_outputs: an object of a dataclass that contains predictor - outputs with estimated values; assumed to have the following attributes: - * embedding - embedding estimates, tensor of shape [N, D, S, S], where - N = number of instances (= sum N_i, where N_i is the number of - instances on image i) - D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE) - S = output size (width and height) - packed_annotations (PackedCseAnnotations): contains various data useful - for loss computation, each data is packed into a single tensor - embedder (nn.Module): module that computes vertex embeddings for different meshes - """ - pix_embeds = densepose_predictor_outputs.embedding - if self.pixel_dists.device != pix_embeds.device: - # should normally be done only once - self.pixel_dists = self.pixel_dists.to(device=pix_embeds.device) - with torch.no_grad(): - mask_loss_data = extract_data_for_mask_loss_from_matches( - proposals_with_gt, densepose_predictor_outputs.coarse_segm - ) - # GT masks - tensor of shape [N, S, S] of int64 - masks_gt = mask_loss_data.masks_gt.long() # pyre-ignore[16] - assert len(pix_embeds) == len(masks_gt), ( - f"Number of instances with embeddings {len(pix_embeds)} != " - f"number of instances with GT masks {len(masks_gt)}" - ) - losses = [] - mesh_names = ( - self.shape_names - if self.use_all_meshes_not_gt_only - else [ - MeshCatalog.get_mesh_name(mesh_id.item()) - for mesh_id in packed_annotations.vertex_mesh_ids_gt.unique() - ] - ) - for pixel_embeddings, mask_gt in zip(pix_embeds, masks_gt): - # pixel_embeddings [D, S, S] - # mask_gt [S, S] - for mesh_name in mesh_names: - mesh_vertex_embeddings = embedder(mesh_name) - # pixel indices [M] - pixel_indices_flattened = _sample_fg_pixels_randperm( - mask_gt, self.num_pixels_to_sample - ) - # pixel distances [M, M] - pixel_dists = self.pixel_dists.to(pixel_embeddings.device)[ - torch.meshgrid(pixel_indices_flattened, pixel_indices_flattened) - ] - # pixel embeddings [M, D] - pixel_embeddings_sampled = normalize_embeddings( - pixel_embeddings.reshape((self.embed_size, -1))[:, pixel_indices_flattened].T - ) - # pixel-vertex similarity [M, K] - sim_matrix = pixel_embeddings_sampled.mm(mesh_vertex_embeddings.T) - c_pix_vertex = F.softmax(sim_matrix / self.temperature_pix_to_vertex, dim=1) - c_vertex_pix = F.softmax(sim_matrix.T / self.temperature_vertex_to_pix, dim=1) - c_cycle = c_pix_vertex.mm(c_vertex_pix) - loss_cycle = torch.norm(pixel_dists * c_cycle, p=self.norm_p) - losses.append(loss_cycle) - - if len(losses) == 0: - return pix_embeds.sum() * 0 - return torch.stack(losses, dim=0).mean() - - def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module): - # pyre-fixme[29]: `Union[(self: Tensor) -> Any, Module, Tensor]` is not a - # function. - losses = [embedder(mesh_name).sum() * 0 for mesh_name in embedder.mesh_names] - losses.append(densepose_predictor_outputs.embedding.sum() * 0) - return torch.mean(torch.stack(losses)) diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/cycle_shape2shape.py b/detectron2/projects/DensePose/densepose/modeling/losses/cycle_shape2shape.py deleted file mode 100644 index 0eb7a8ee3acac990806f6c6a064a020c3d87519e..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/cycle_shape2shape.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -import random -from typing import Tuple -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import CfgNode - -from densepose.structures.mesh import create_mesh - -from .utils import sample_random_indices - - -class ShapeToShapeCycleLoss(nn.Module): - """ - Cycle Loss for Shapes. - Inspired by: - "Mapping in a Cycle: Sinkhorn Regularized Unsupervised Learning for Point Cloud Shapes". - """ - - def __init__(self, cfg: CfgNode): - super().__init__() - self.shape_names = list(cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.keys()) - self.all_shape_pairs = [ - (x, y) for i, x in enumerate(self.shape_names) for y in self.shape_names[i + 1 :] - ] - random.shuffle(self.all_shape_pairs) - self.cur_pos = 0 - self.norm_p = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P - self.temperature = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE - self.max_num_vertices = ( - cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES - ) - - def _sample_random_pair(self) -> Tuple[str, str]: - """ - Produce a random pair of different mesh names - - Return: - tuple(str, str): a pair of different mesh names - """ - if self.cur_pos >= len(self.all_shape_pairs): - random.shuffle(self.all_shape_pairs) - self.cur_pos = 0 - shape_pair = self.all_shape_pairs[self.cur_pos] - self.cur_pos += 1 - return shape_pair - - def forward(self, embedder: nn.Module): - """ - Do a forward pass with a random pair (src, dst) pair of shapes - Args: - embedder (nn.Module): module that computes vertex embeddings for different meshes - """ - src_mesh_name, dst_mesh_name = self._sample_random_pair() - return self._forward_one_pair(embedder, src_mesh_name, dst_mesh_name) - - def fake_value(self, embedder: nn.Module): - losses = [] - # pyre-fixme[29]: `Union[(self: Tensor) -> Any, Module, Tensor]` is not a - # function. - for mesh_name in embedder.mesh_names: - losses.append(embedder(mesh_name).sum() * 0) - return torch.mean(torch.stack(losses)) - - def _get_embeddings_and_geodists_for_mesh( - self, embedder: nn.Module, mesh_name: str - ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Produces embeddings and geodesic distance tensors for a given mesh. May subsample - the mesh, if it contains too many vertices (controlled by - SHAPE_CYCLE_LOSS_MAX_NUM_VERTICES parameter). - Args: - embedder (nn.Module): module that computes embeddings for mesh vertices - mesh_name (str): mesh name - Return: - embeddings (torch.Tensor of size [N, D]): embeddings for selected mesh - vertices (N = number of selected vertices, D = embedding space dim) - geodists (torch.Tensor of size [N, N]): geodesic distances for the selected - mesh vertices (N = number of selected vertices) - """ - embeddings = embedder(mesh_name) - indices = sample_random_indices( - embeddings.shape[0], self.max_num_vertices, embeddings.device - ) - mesh = create_mesh(mesh_name, embeddings.device) - geodists = mesh.geodists - if indices is not None: - embeddings = embeddings[indices] - geodists = geodists[torch.meshgrid(indices, indices)] - return embeddings, geodists - - def _forward_one_pair( - self, embedder: nn.Module, mesh_name_1: str, mesh_name_2: str - ) -> torch.Tensor: - """ - Do a forward pass with a selected pair of meshes - Args: - embedder (nn.Module): module that computes vertex embeddings for different meshes - mesh_name_1 (str): first mesh name - mesh_name_2 (str): second mesh name - Return: - Tensor containing the loss value - """ - embeddings_1, geodists_1 = self._get_embeddings_and_geodists_for_mesh(embedder, mesh_name_1) - embeddings_2, geodists_2 = self._get_embeddings_and_geodists_for_mesh(embedder, mesh_name_2) - sim_matrix_12 = embeddings_1.mm(embeddings_2.T) - - c_12 = F.softmax(sim_matrix_12 / self.temperature, dim=1) - c_21 = F.softmax(sim_matrix_12.T / self.temperature, dim=1) - c_11 = c_12.mm(c_21) - c_22 = c_21.mm(c_12) - - loss_cycle_11 = torch.norm(geodists_1 * c_11, p=self.norm_p) - loss_cycle_22 = torch.norm(geodists_2 * c_22, p=self.norm_p) - - return loss_cycle_11 + loss_cycle_22 diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/embed.py b/detectron2/projects/DensePose/densepose/modeling/losses/embed.py deleted file mode 100644 index 20790073b8b96c064d162c122a8ff6624d1694da..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/embed.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from typing import Any, Dict, List -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.structures import Instances - -from densepose.data.meshes.catalog import MeshCatalog -from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix - -from .embed_utils import PackedCseAnnotations -from .utils import BilinearInterpolationHelper - - -class EmbeddingLoss: - """ - Computes losses for estimated embeddings given annotated vertices. - Instances in a minibatch that correspond to the same mesh are grouped - together. For each group, loss is computed as cross-entropy for - unnormalized scores given ground truth mesh vertex ids. - Scores are based on squared distances between estimated vertex embeddings - and mesh vertex embeddings. - """ - - def __init__(self, cfg: CfgNode): - """ - Initialize embedding loss from config - """ - self.embdist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA - - def __call__( - self, - proposals_with_gt: List[Instances], - densepose_predictor_outputs: Any, - packed_annotations: PackedCseAnnotations, - interpolator: BilinearInterpolationHelper, - embedder: nn.Module, - ) -> Dict[int, torch.Tensor]: - """ - Produces losses for estimated embeddings given annotated vertices. - Embeddings for all the vertices of a mesh are computed by the embedder. - Embeddings for observed pixels are estimated by a predictor. - Losses are computed as cross-entropy for squared distances between - observed vertex embeddings and all mesh vertex embeddings given - ground truth vertex IDs. - - Args: - proposals_with_gt (list of Instances): detections with associated - ground truth data; each item corresponds to instances detected - on 1 image; the number of items corresponds to the number of - images in a batch - densepose_predictor_outputs: an object of a dataclass that contains predictor - outputs with estimated values; assumed to have the following attributes: - * embedding - embedding estimates, tensor of shape [N, D, S, S], where - N = number of instances (= sum N_i, where N_i is the number of - instances on image i) - D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE) - S = output size (width and height) - packed_annotations (PackedCseAnnotations): contains various data useful - for loss computation, each data is packed into a single tensor - interpolator (BilinearInterpolationHelper): bilinear interpolation helper - embedder (nn.Module): module that computes vertex embeddings for different meshes - Return: - dict(int -> tensor): losses for different mesh IDs - """ - losses = {} - for mesh_id_tensor in packed_annotations.vertex_mesh_ids_gt.unique(): - mesh_id = mesh_id_tensor.item() - mesh_name = MeshCatalog.get_mesh_name(mesh_id) - # valid points are those that fall into estimated bbox - # and correspond to the current mesh - j_valid = interpolator.j_valid * ( # pyre-ignore[16] - packed_annotations.vertex_mesh_ids_gt == mesh_id - ) - if not torch.any(j_valid): - continue - # extract estimated embeddings for valid points - # -> tensor [J, D] - vertex_embeddings_i = normalize_embeddings( - interpolator.extract_at_points( - densepose_predictor_outputs.embedding, - slice_fine_segm=slice(None), - w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16] - w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16] - w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16] - w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16] - )[j_valid, :] - ) - # extract vertex ids for valid points - # -> tensor [J] - vertex_indices_i = packed_annotations.vertex_ids_gt[j_valid] - # embeddings for all mesh vertices - # -> tensor [K, D] - mesh_vertex_embeddings = embedder(mesh_name) - # unnormalized scores for valid points - # -> tensor [J, K] - scores = squared_euclidean_distance_matrix( - vertex_embeddings_i, mesh_vertex_embeddings - ) / (-self.embdist_gauss_sigma) - losses[mesh_name] = F.cross_entropy(scores, vertex_indices_i, ignore_index=-1) - - # pyre-fixme[29]: `Union[(self: Tensor) -> Any, Module, Tensor]` is not a - # function. - for mesh_name in embedder.mesh_names: - if mesh_name not in losses: - losses[mesh_name] = self.fake_value( - densepose_predictor_outputs, embedder, mesh_name - ) - return losses - - def fake_values(self, densepose_predictor_outputs: Any, embedder: nn.Module): - losses = {} - # pyre-fixme[29]: `Union[(self: Tensor) -> Any, Module, Tensor]` is not a - # function. - for mesh_name in embedder.mesh_names: - losses[mesh_name] = self.fake_value(densepose_predictor_outputs, embedder, mesh_name) - return losses - - def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module, mesh_name: str): - return densepose_predictor_outputs.embedding.sum() * 0 + embedder(mesh_name).sum() * 0 diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/embed_utils.py b/detectron2/projects/DensePose/densepose/modeling/losses/embed_utils.py deleted file mode 100644 index 92210f002c0c181c4893a9115e84aaaad512f8e3..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/embed_utils.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from dataclasses import dataclass -from typing import Any, Optional -import torch - -from detectron2.structures import BoxMode, Instances - -from .utils import AnnotationsAccumulator - - -@dataclass -class PackedCseAnnotations: - x_gt: torch.Tensor - y_gt: torch.Tensor - coarse_segm_gt: Optional[torch.Tensor] - vertex_mesh_ids_gt: torch.Tensor - vertex_ids_gt: torch.Tensor - bbox_xywh_gt: torch.Tensor - bbox_xywh_est: torch.Tensor - point_bbox_with_dp_indices: torch.Tensor - point_bbox_indices: torch.Tensor - bbox_indices: torch.Tensor - - -class CseAnnotationsAccumulator(AnnotationsAccumulator): - """ - Accumulates annotations by batches that correspond to objects detected on - individual images. Can pack them together into single tensors. - """ - - def __init__(self): - self.x_gt = [] - self.y_gt = [] - self.s_gt = [] - self.vertex_mesh_ids_gt = [] - self.vertex_ids_gt = [] - self.bbox_xywh_gt = [] - self.bbox_xywh_est = [] - self.point_bbox_with_dp_indices = [] - self.point_bbox_indices = [] - self.bbox_indices = [] - self.nxt_bbox_with_dp_index = 0 - self.nxt_bbox_index = 0 - - def accumulate(self, instances_one_image: Instances): - """ - Accumulate instances data for one image - - Args: - instances_one_image (Instances): instances data to accumulate - """ - boxes_xywh_est = BoxMode.convert( - instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS - ) - boxes_xywh_gt = BoxMode.convert( - instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS - ) - n_matches = len(boxes_xywh_gt) - assert n_matches == len( - boxes_xywh_est - ), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes" - if not n_matches: - # no detection - GT matches - return - if ( - not hasattr(instances_one_image, "gt_densepose") - or instances_one_image.gt_densepose is None - ): - # no densepose GT for the detections, just increase the bbox index - self.nxt_bbox_index += n_matches - return - for box_xywh_est, box_xywh_gt, dp_gt in zip( - boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose - ): - if (dp_gt is not None) and (len(dp_gt.x) > 0): - # pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`. - # pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`. - self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt) - self.nxt_bbox_index += 1 - - def _do_accumulate(self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: Any): - """ - Accumulate instances data for one image, given that the data is not empty - - Args: - box_xywh_gt (tensor): GT bounding box - box_xywh_est (tensor): estimated bounding box - dp_gt: GT densepose data with the following attributes: - - x: normalized X coordinates - - y: normalized Y coordinates - - segm: tensor of size [S, S] with coarse segmentation - - - """ - self.x_gt.append(dp_gt.x) - self.y_gt.append(dp_gt.y) - if hasattr(dp_gt, "segm"): - self.s_gt.append(dp_gt.segm.unsqueeze(0)) - self.vertex_ids_gt.append(dp_gt.vertex_ids) - self.vertex_mesh_ids_gt.append(torch.full_like(dp_gt.vertex_ids, dp_gt.mesh_id)) - self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4)) - self.bbox_xywh_est.append(box_xywh_est.view(-1, 4)) - self.point_bbox_with_dp_indices.append( - torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_with_dp_index) - ) - self.point_bbox_indices.append(torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_index)) - self.bbox_indices.append(self.nxt_bbox_index) - self.nxt_bbox_with_dp_index += 1 - - def pack(self) -> Optional[PackedCseAnnotations]: - """ - Pack data into tensors - """ - if not len(self.x_gt): - # TODO: - # returning proper empty annotations would require - # creating empty tensors of appropriate shape and - # type on an appropriate device; - # we return None so far to indicate empty annotations - return None - return PackedCseAnnotations( - x_gt=torch.cat(self.x_gt, 0), - y_gt=torch.cat(self.y_gt, 0), - vertex_mesh_ids_gt=torch.cat(self.vertex_mesh_ids_gt, 0), - vertex_ids_gt=torch.cat(self.vertex_ids_gt, 0), - # ignore segmentation annotations, if not all the instances contain those - coarse_segm_gt=( - torch.cat(self.s_gt, 0) if len(self.s_gt) == len(self.bbox_xywh_gt) else None - ), - bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0), - bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0), - point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0), - point_bbox_indices=torch.cat(self.point_bbox_indices, 0), - bbox_indices=torch.as_tensor( - self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device - ), - ) diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/mask.py b/detectron2/projects/DensePose/densepose/modeling/losses/mask.py deleted file mode 100644 index 2f8f75a425d288e1167eaf8cb48e4dc0f851ff45..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/mask.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from dataclasses import dataclass -from typing import Any, Iterable, List, Optional -import torch -from torch.nn import functional as F - -from detectron2.structures import Instances - - -@dataclass -class DataForMaskLoss: - """ - Contains mask GT and estimated data for proposals from multiple images: - """ - - # tensor of size (K, H, W) containing GT labels - masks_gt: Optional[torch.Tensor] = None - # tensor of size (K, C, H, W) containing estimated scores - masks_est: Optional[torch.Tensor] = None - - -def extract_data_for_mask_loss_from_matches( - proposals_targets: Iterable[Instances], estimated_segm: torch.Tensor -) -> DataForMaskLoss: - """ - Extract data for mask loss from instances that contain matched GT and - estimated bounding boxes. - Args: - proposals_targets: Iterable[Instances] - matched GT and estimated results, each item in the iterable - corresponds to data in 1 image - estimated_segm: tensor(K, C, S, S) of float - raw unnormalized - segmentation scores, here S is the size to which GT masks are - to be resized - Return: - masks_est: tensor(K, C, S, S) of float - class scores - masks_gt: tensor(K, S, S) of int64 - labels - """ - data = DataForMaskLoss() - masks_gt = [] - offset = 0 - assert estimated_segm.shape[2] == estimated_segm.shape[3], ( - f"Expected estimated segmentation to have a square shape, " - f"but the actual shape is {estimated_segm.shape[2:]}" - ) - mask_size = estimated_segm.shape[2] - num_proposals = sum(inst.proposal_boxes.tensor.size(0) for inst in proposals_targets) - num_estimated = estimated_segm.shape[0] - assert ( - num_proposals == num_estimated - ), "The number of proposals {} must be equal to the number of estimates {}".format( - num_proposals, num_estimated - ) - - for proposals_targets_per_image in proposals_targets: - n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0) - if not n_i: - continue - gt_masks_per_image = proposals_targets_per_image.gt_masks.crop_and_resize( - proposals_targets_per_image.proposal_boxes.tensor, mask_size - ).to(device=estimated_segm.device) - masks_gt.append(gt_masks_per_image) - offset += n_i - if masks_gt: - data.masks_est = estimated_segm - data.masks_gt = torch.cat(masks_gt, dim=0) - return data - - -class MaskLoss: - """ - Mask loss as cross-entropy for raw unnormalized scores given ground truth labels. - Mask ground truth labels are defined for the whole image and not only the - bounding box of interest. They are stored as objects that are assumed to implement - the `crop_and_resize` interface (e.g. BitMasks, PolygonMasks). - """ - - def __call__( - self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any - ) -> torch.Tensor: - """ - Computes segmentation loss as cross-entropy for raw unnormalized - scores given ground truth labels. - - Args: - proposals_with_gt (list of Instances): detections with associated ground truth data - densepose_predictor_outputs: an object of a dataclass that contains predictor outputs - with estimated values; assumed to have the following attribute: - * coarse_segm (tensor of shape [N, D, S, S]): coarse segmentation estimates - as raw unnormalized scores - where N is the number of detections, S is the estimate size ( = width = height) - and D is the number of coarse segmentation channels. - Return: - Cross entropy for raw unnormalized scores for coarse segmentation given - ground truth labels from masks - """ - if not len(proposals_with_gt): - return self.fake_value(densepose_predictor_outputs) - # densepose outputs are computed for all images and all bounding boxes; - # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively, - # the outputs will have size(0) == 3+1+2+1 == 7 - with torch.no_grad(): - mask_loss_data = extract_data_for_mask_loss_from_matches( - proposals_with_gt, densepose_predictor_outputs.coarse_segm - ) - if (mask_loss_data.masks_gt is None) or (mask_loss_data.masks_est is None): - return self.fake_value(densepose_predictor_outputs) - return F.cross_entropy(mask_loss_data.masks_est, mask_loss_data.masks_gt.long()) - - def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor: - """ - Fake segmentation loss used when no suitable ground truth data - was found in a batch. The loss has a value 0 and is primarily used to - construct the computation graph, so that `DistributedDataParallel` - has similar graphs on all GPUs and can perform reduction properly. - - Args: - densepose_predictor_outputs: DensePose predictor outputs, an object - of a dataclass that is assumed to have `coarse_segm` - attribute - Return: - Zero value loss with proper computation graph - """ - return densepose_predictor_outputs.coarse_segm.sum() * 0 diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/mask_or_segm.py b/detectron2/projects/DensePose/densepose/modeling/losses/mask_or_segm.py deleted file mode 100644 index 350a2ebf81b13839c3a16545984c05c1aa68f5bf..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/mask_or_segm.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from typing import Any, List -import torch - -from detectron2.config import CfgNode -from detectron2.structures import Instances - -from .mask import MaskLoss -from .segm import SegmentationLoss - - -class MaskOrSegmentationLoss: - """ - Mask or segmentation loss as cross-entropy for raw unnormalized scores - given ground truth labels. Ground truth labels are either defined by coarse - segmentation annotation, or by mask annotation, depending on the config - value MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS - """ - - def __init__(self, cfg: CfgNode): - """ - Initialize segmentation loss from configuration options - - Args: - cfg (CfgNode): configuration options - """ - self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS - if self.segm_trained_by_masks: - self.mask_loss = MaskLoss() - self.segm_loss = SegmentationLoss(cfg) - - def __call__( - self, - proposals_with_gt: List[Instances], - densepose_predictor_outputs: Any, - packed_annotations: Any, - ) -> torch.Tensor: - """ - Compute segmentation loss as cross-entropy between aligned unnormalized - score estimates and ground truth; with ground truth given - either by masks, or by coarse segmentation annotations. - - Args: - proposals_with_gt (list of Instances): detections with associated ground truth data - densepose_predictor_outputs: an object of a dataclass that contains predictor outputs - with estimated values; assumed to have the following attributes: - * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S] - packed_annotations: packed annotations for efficient loss computation - Return: - tensor: loss value as cross-entropy for raw unnormalized scores - given ground truth labels - """ - if self.segm_trained_by_masks: - return self.mask_loss(proposals_with_gt, densepose_predictor_outputs) - return self.segm_loss(proposals_with_gt, densepose_predictor_outputs, packed_annotations) - - def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor: - """ - Fake segmentation loss used when no suitable ground truth data - was found in a batch. The loss has a value 0 and is primarily used to - construct the computation graph, so that `DistributedDataParallel` - has similar graphs on all GPUs and can perform reduction properly. - - Args: - densepose_predictor_outputs: DensePose predictor outputs, an object - of a dataclass that is assumed to have `coarse_segm` - attribute - Return: - Zero value loss with proper computation graph - """ - return densepose_predictor_outputs.coarse_segm.sum() * 0 diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/registry.py b/detectron2/projects/DensePose/densepose/modeling/losses/registry.py deleted file mode 100644 index 3e8db8e82343abd352482e3d740a6922a1e12ac5..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/registry.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from detectron2.utils.registry import Registry - -DENSEPOSE_LOSS_REGISTRY = Registry("DENSEPOSE_LOSS") diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/segm.py b/detectron2/projects/DensePose/densepose/modeling/losses/segm.py deleted file mode 100644 index cd04d129c1d05ee0f3273bc7256a60cf7cbe64b9..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/segm.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from typing import Any, List -import torch -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.structures import Instances - -from .utils import resample_data - - -class SegmentationLoss: - """ - Segmentation loss as cross-entropy for raw unnormalized scores given ground truth - labels. Segmentation ground truth labels are defined for the bounding box of - interest at some fixed resolution [S, S], where - S = MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE. - """ - - def __init__(self, cfg: CfgNode): - """ - Initialize segmentation loss from configuration options - - Args: - cfg (CfgNode): configuration options - """ - self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE - self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS - - def __call__( - self, - proposals_with_gt: List[Instances], - densepose_predictor_outputs: Any, - packed_annotations: Any, - ) -> torch.Tensor: - """ - Compute segmentation loss as cross-entropy on aligned segmentation - ground truth and estimated scores. - - Args: - proposals_with_gt (list of Instances): detections with associated ground truth data - densepose_predictor_outputs: an object of a dataclass that contains predictor outputs - with estimated values; assumed to have the following attributes: - * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S] - packed_annotations: packed annotations for efficient loss computation; - the following attributes are used: - - coarse_segm_gt - - bbox_xywh_gt - - bbox_xywh_est - """ - if packed_annotations.coarse_segm_gt is None: - return self.fake_value(densepose_predictor_outputs) - coarse_segm_est = densepose_predictor_outputs.coarse_segm[packed_annotations.bbox_indices] - with torch.no_grad(): - coarse_segm_gt = resample_data( - packed_annotations.coarse_segm_gt.unsqueeze(1), - packed_annotations.bbox_xywh_gt, - packed_annotations.bbox_xywh_est, - self.heatmap_size, - self.heatmap_size, - mode="nearest", - padding_mode="zeros", - ).squeeze(1) - if self.n_segm_chan == 2: - coarse_segm_gt = coarse_segm_gt > 0 - return F.cross_entropy(coarse_segm_est, coarse_segm_gt.long()) - - def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor: - """ - Fake segmentation loss used when no suitable ground truth data - was found in a batch. The loss has a value 0 and is primarily used to - construct the computation graph, so that `DistributedDataParallel` - has similar graphs on all GPUs and can perform reduction properly. - - Args: - densepose_predictor_outputs: DensePose predictor outputs, an object - of a dataclass that is assumed to have `coarse_segm` - attribute - Return: - Zero value loss with proper computation graph - """ - return densepose_predictor_outputs.coarse_segm.sum() * 0 diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/soft_embed.py b/detectron2/projects/DensePose/densepose/modeling/losses/soft_embed.py deleted file mode 100644 index 1ace3dac30813ff6e6a7e5a818bbf3df1b2b015c..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/soft_embed.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from typing import Any, Dict, List -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.structures import Instances - -from densepose.data.meshes.catalog import MeshCatalog -from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix -from densepose.structures.mesh import create_mesh - -from .embed_utils import PackedCseAnnotations -from .utils import BilinearInterpolationHelper - - -class SoftEmbeddingLoss: - """ - Computes losses for estimated embeddings given annotated vertices. - Instances in a minibatch that correspond to the same mesh are grouped - together. For each group, loss is computed as cross-entropy for - unnormalized scores given ground truth mesh vertex ids. - Scores are based on: - 1) squared distances between estimated vertex embeddings - and mesh vertex embeddings; - 2) geodesic distances between vertices of a mesh - """ - - def __init__(self, cfg: CfgNode): - """ - Initialize embedding loss from config - """ - self.embdist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA - self.geodist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA - - def __call__( - self, - proposals_with_gt: List[Instances], - densepose_predictor_outputs: Any, - packed_annotations: PackedCseAnnotations, - interpolator: BilinearInterpolationHelper, - embedder: nn.Module, - ) -> Dict[int, torch.Tensor]: - """ - Produces losses for estimated embeddings given annotated vertices. - Embeddings for all the vertices of a mesh are computed by the embedder. - Embeddings for observed pixels are estimated by a predictor. - Losses are computed as cross-entropy for unnormalized scores given - ground truth vertex IDs. - 1) squared distances between estimated vertex embeddings - and mesh vertex embeddings; - 2) geodesic distances between vertices of a mesh - - Args: - proposals_with_gt (list of Instances): detections with associated - ground truth data; each item corresponds to instances detected - on 1 image; the number of items corresponds to the number of - images in a batch - densepose_predictor_outputs: an object of a dataclass that contains predictor - outputs with estimated values; assumed to have the following attributes: - * embedding - embedding estimates, tensor of shape [N, D, S, S], where - N = number of instances (= sum N_i, where N_i is the number of - instances on image i) - D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE) - S = output size (width and height) - packed_annotations (PackedCseAnnotations): contains various data useful - for loss computation, each data is packed into a single tensor - interpolator (BilinearInterpolationHelper): bilinear interpolation helper - embedder (nn.Module): module that computes vertex embeddings for different meshes - Return: - dict(int -> tensor): losses for different mesh IDs - """ - losses = {} - for mesh_id_tensor in packed_annotations.vertex_mesh_ids_gt.unique(): - mesh_id = mesh_id_tensor.item() - mesh_name = MeshCatalog.get_mesh_name(mesh_id) - # valid points are those that fall into estimated bbox - # and correspond to the current mesh - j_valid = interpolator.j_valid * ( # pyre-ignore[16] - packed_annotations.vertex_mesh_ids_gt == mesh_id - ) - if not torch.any(j_valid): - continue - # extract estimated embeddings for valid points - # -> tensor [J, D] - vertex_embeddings_i = normalize_embeddings( - interpolator.extract_at_points( - densepose_predictor_outputs.embedding, - slice_fine_segm=slice(None), - w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16] - w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16] - w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16] - w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16] - )[j_valid, :] - ) - # extract vertex ids for valid points - # -> tensor [J] - vertex_indices_i = packed_annotations.vertex_ids_gt[j_valid] - # embeddings for all mesh vertices - # -> tensor [K, D] - mesh_vertex_embeddings = embedder(mesh_name) - # softmax values of geodesic distances for GT mesh vertices - # -> tensor [J, K] - mesh = create_mesh(mesh_name, mesh_vertex_embeddings.device) - geodist_softmax_values = F.softmax( - mesh.geodists[vertex_indices_i] / (-self.geodist_gauss_sigma), dim=1 - ) - # logsoftmax values for valid points - # -> tensor [J, K] - embdist_logsoftmax_values = F.log_softmax( - squared_euclidean_distance_matrix(vertex_embeddings_i, mesh_vertex_embeddings) - / (-self.embdist_gauss_sigma), - dim=1, - ) - losses[mesh_name] = (-geodist_softmax_values * embdist_logsoftmax_values).sum(1).mean() - - # pyre-fixme[29]: `Union[(self: Tensor) -> Any, Module, Tensor]` is not a - # function. - for mesh_name in embedder.mesh_names: - if mesh_name not in losses: - losses[mesh_name] = self.fake_value( - densepose_predictor_outputs, embedder, mesh_name - ) - return losses - - def fake_values(self, densepose_predictor_outputs: Any, embedder: nn.Module): - losses = {} - # pyre-fixme[29]: `Union[(self: Tensor) -> Any, Module, Tensor]` is not a - # function. - for mesh_name in embedder.mesh_names: - losses[mesh_name] = self.fake_value(densepose_predictor_outputs, embedder, mesh_name) - return losses - - def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module, mesh_name: str): - return densepose_predictor_outputs.embedding.sum() * 0 + embedder(mesh_name).sum() * 0 diff --git a/detectron2/projects/DensePose/densepose/modeling/losses/utils.py b/detectron2/projects/DensePose/densepose/modeling/losses/utils.py deleted file mode 100644 index f4475820c591743b9a13d57f39ce172bcbb7d529..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/losses/utils.py +++ /dev/null @@ -1,446 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Tuple -import torch -from torch.nn import functional as F - -from detectron2.structures import BoxMode, Instances - -from densepose import DensePoseDataRelative - -LossDict = Dict[str, torch.Tensor] - - -def _linear_interpolation_utilities(v_norm, v0_src, size_src, v0_dst, size_dst, size_z): - """ - Computes utility values for linear interpolation at points v. - The points are given as normalized offsets in the source interval - (v0_src, v0_src + size_src), more precisely: - v = v0_src + v_norm * size_src / 256.0 - The computed utilities include lower points v_lo, upper points v_hi, - interpolation weights v_w and flags j_valid indicating whether the - points falls into the destination interval (v0_dst, v0_dst + size_dst). - - Args: - v_norm (:obj: `torch.Tensor`): tensor of size N containing - normalized point offsets - v0_src (:obj: `torch.Tensor`): tensor of size N containing - left bounds of source intervals for normalized points - size_src (:obj: `torch.Tensor`): tensor of size N containing - source interval sizes for normalized points - v0_dst (:obj: `torch.Tensor`): tensor of size N containing - left bounds of destination intervals - size_dst (:obj: `torch.Tensor`): tensor of size N containing - destination interval sizes - size_z (int): interval size for data to be interpolated - - Returns: - v_lo (:obj: `torch.Tensor`): int tensor of size N containing - indices of lower values used for interpolation, all values are - integers from [0, size_z - 1] - v_hi (:obj: `torch.Tensor`): int tensor of size N containing - indices of upper values used for interpolation, all values are - integers from [0, size_z - 1] - v_w (:obj: `torch.Tensor`): float tensor of size N containing - interpolation weights - j_valid (:obj: `torch.Tensor`): uint8 tensor of size N containing - 0 for points outside the estimation interval - (v0_est, v0_est + size_est) and 1 otherwise - """ - v = v0_src + v_norm * size_src / 256.0 - j_valid = (v - v0_dst >= 0) * (v - v0_dst < size_dst) - v_grid = (v - v0_dst) * size_z / size_dst - v_lo = v_grid.floor().long().clamp(min=0, max=size_z - 1) - v_hi = (v_lo + 1).clamp(max=size_z - 1) - v_grid = torch.min(v_hi.float(), v_grid) - v_w = v_grid - v_lo.float() - return v_lo, v_hi, v_w, j_valid - - -class BilinearInterpolationHelper: - """ - Args: - packed_annotations: object that contains packed annotations - j_valid (:obj: `torch.Tensor`): uint8 tensor of size M containing - 0 for points to be discarded and 1 for points to be selected - y_lo (:obj: `torch.Tensor`): int tensor of indices of upper values - in z_est for each point - y_hi (:obj: `torch.Tensor`): int tensor of indices of lower values - in z_est for each point - x_lo (:obj: `torch.Tensor`): int tensor of indices of left values - in z_est for each point - x_hi (:obj: `torch.Tensor`): int tensor of indices of right values - in z_est for each point - w_ylo_xlo (:obj: `torch.Tensor`): float tensor of size M; - contains upper-left value weight for each point - w_ylo_xhi (:obj: `torch.Tensor`): float tensor of size M; - contains upper-right value weight for each point - w_yhi_xlo (:obj: `torch.Tensor`): float tensor of size M; - contains lower-left value weight for each point - w_yhi_xhi (:obj: `torch.Tensor`): float tensor of size M; - contains lower-right value weight for each point - """ - - def __init__( - self, - packed_annotations: Any, - j_valid: torch.Tensor, - y_lo: torch.Tensor, - y_hi: torch.Tensor, - x_lo: torch.Tensor, - x_hi: torch.Tensor, - w_ylo_xlo: torch.Tensor, - w_ylo_xhi: torch.Tensor, - w_yhi_xlo: torch.Tensor, - w_yhi_xhi: torch.Tensor, - ): - for k, v in locals().items(): - if k != "self": - setattr(self, k, v) - - @staticmethod - def from_matches( - packed_annotations: Any, densepose_outputs_size_hw: Tuple[int, int] - ) -> "BilinearInterpolationHelper": - """ - Args: - packed_annotations: annotations packed into tensors, the following - attributes are required: - - bbox_xywh_gt - - bbox_xywh_est - - x_gt - - y_gt - - point_bbox_with_dp_indices - - point_bbox_indices - densepose_outputs_size_hw (tuple [int, int]): resolution of - DensePose predictor outputs (H, W) - Return: - An instance of `BilinearInterpolationHelper` used to perform - interpolation for the given annotation points and output resolution - """ - - zh, zw = densepose_outputs_size_hw - x0_gt, y0_gt, w_gt, h_gt = packed_annotations.bbox_xywh_gt[ - packed_annotations.point_bbox_with_dp_indices - ].unbind(dim=1) - x0_est, y0_est, w_est, h_est = packed_annotations.bbox_xywh_est[ - packed_annotations.point_bbox_with_dp_indices - ].unbind(dim=1) - x_lo, x_hi, x_w, jx_valid = _linear_interpolation_utilities( - packed_annotations.x_gt, x0_gt, w_gt, x0_est, w_est, zw - ) - y_lo, y_hi, y_w, jy_valid = _linear_interpolation_utilities( - packed_annotations.y_gt, y0_gt, h_gt, y0_est, h_est, zh - ) - j_valid = jx_valid * jy_valid - - w_ylo_xlo = (1.0 - x_w) * (1.0 - y_w) - w_ylo_xhi = x_w * (1.0 - y_w) - w_yhi_xlo = (1.0 - x_w) * y_w - w_yhi_xhi = x_w * y_w - - return BilinearInterpolationHelper( - packed_annotations, - j_valid, - y_lo, - y_hi, - x_lo, - x_hi, - w_ylo_xlo, # pyre-ignore[6] - w_ylo_xhi, - # pyre-fixme[6]: Expected `Tensor` for 9th param but got `float`. - w_yhi_xlo, - w_yhi_xhi, - ) - - def extract_at_points( - self, - z_est, - slice_fine_segm=None, - w_ylo_xlo=None, - w_ylo_xhi=None, - w_yhi_xlo=None, - w_yhi_xhi=None, - ): - """ - Extract ground truth values z_gt for valid point indices and estimated - values z_est using bilinear interpolation over top-left (y_lo, x_lo), - top-right (y_lo, x_hi), bottom-left (y_hi, x_lo) and bottom-right - (y_hi, x_hi) values in z_est with corresponding weights: - w_ylo_xlo, w_ylo_xhi, w_yhi_xlo and w_yhi_xhi. - Use slice_fine_segm to slice dim=1 in z_est - """ - slice_fine_segm = ( - self.packed_annotations.fine_segm_labels_gt - if slice_fine_segm is None - else slice_fine_segm - ) - w_ylo_xlo = self.w_ylo_xlo if w_ylo_xlo is None else w_ylo_xlo - w_ylo_xhi = self.w_ylo_xhi if w_ylo_xhi is None else w_ylo_xhi - w_yhi_xlo = self.w_yhi_xlo if w_yhi_xlo is None else w_yhi_xlo - w_yhi_xhi = self.w_yhi_xhi if w_yhi_xhi is None else w_yhi_xhi - - index_bbox = self.packed_annotations.point_bbox_indices - z_est_sampled = ( - z_est[index_bbox, slice_fine_segm, self.y_lo, self.x_lo] * w_ylo_xlo - + z_est[index_bbox, slice_fine_segm, self.y_lo, self.x_hi] * w_ylo_xhi - + z_est[index_bbox, slice_fine_segm, self.y_hi, self.x_lo] * w_yhi_xlo - + z_est[index_bbox, slice_fine_segm, self.y_hi, self.x_hi] * w_yhi_xhi - ) - return z_est_sampled - - -def resample_data( - z, bbox_xywh_src, bbox_xywh_dst, wout, hout, mode: str = "nearest", padding_mode: str = "zeros" -): - """ - Args: - z (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with data to be - resampled - bbox_xywh_src (:obj: `torch.Tensor`): tensor of size (N,4) containing - source bounding boxes in format XYWH - bbox_xywh_dst (:obj: `torch.Tensor`): tensor of size (N,4) containing - destination bounding boxes in format XYWH - Return: - zresampled (:obj: `torch.Tensor`): tensor of size (N, C, Hout, Wout) - with resampled values of z, where D is the discretization size - """ - n = bbox_xywh_src.size(0) - assert n == bbox_xywh_dst.size(0), ( - "The number of " - "source ROIs for resampling ({}) should be equal to the number " - "of destination ROIs ({})".format(bbox_xywh_src.size(0), bbox_xywh_dst.size(0)) - ) - x0src, y0src, wsrc, hsrc = bbox_xywh_src.unbind(dim=1) - x0dst, y0dst, wdst, hdst = bbox_xywh_dst.unbind(dim=1) - x0dst_norm = 2 * (x0dst - x0src) / wsrc - 1 - y0dst_norm = 2 * (y0dst - y0src) / hsrc - 1 - x1dst_norm = 2 * (x0dst + wdst - x0src) / wsrc - 1 - y1dst_norm = 2 * (y0dst + hdst - y0src) / hsrc - 1 - grid_w = torch.arange(wout, device=z.device, dtype=torch.float) / wout - grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout - grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout) - grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout) - # pyre-fixme[16]: `float` has no attribute `__getitem__`. - dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout) - dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout) - x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout) - y0_expanded = y0dst_norm[:, None, None].expand(n, hout, wout) - grid_x = grid_w_expanded * dx_expanded + x0_expanded - grid_y = grid_h_expanded * dy_expanded + y0_expanded - grid = torch.stack((grid_x, grid_y), dim=3) - # resample Z from (N, C, H, W) into (N, C, Hout, Wout) - zresampled = F.grid_sample(z, grid, mode=mode, padding_mode=padding_mode, align_corners=True) - return zresampled - - -class AnnotationsAccumulator(ABC): - """ - Abstract class for an accumulator for annotations that can produce - dense annotations packed into tensors. - """ - - @abstractmethod - def accumulate(self, instances_one_image: Instances): - """ - Accumulate instances data for one image - - Args: - instances_one_image (Instances): instances data to accumulate - """ - pass - - @abstractmethod - def pack(self) -> Any: - """ - Pack data into tensors - """ - pass - - -@dataclass -class PackedChartBasedAnnotations: - """ - Packed annotations for chart-based model training. The following attributes - are defined: - - fine_segm_labels_gt (tensor [K] of `int64`): GT fine segmentation point labels - - x_gt (tensor [K] of `float32`): GT normalized X point coordinates - - y_gt (tensor [K] of `float32`): GT normalized Y point coordinates - - u_gt (tensor [K] of `float32`): GT point U values - - v_gt (tensor [K] of `float32`): GT point V values - - coarse_segm_gt (tensor [N, S, S] of `float32`): GT segmentation for bounding boxes - - bbox_xywh_gt (tensor [N, 4] of `float32`): selected GT bounding boxes in - XYWH format - - bbox_xywh_est (tensor [N, 4] of `float32`): selected matching estimated - bounding boxes in XYWH format - - point_bbox_with_dp_indices (tensor [K] of `int64`): indices of bounding boxes - with DensePose annotations that correspond to the point data - - point_bbox_indices (tensor [K] of `int64`): indices of bounding boxes - (not necessarily the selected ones with DensePose data) that correspond - to the point data - - bbox_indices (tensor [N] of `int64`): global indices of selected bounding - boxes with DensePose annotations; these indices could be used to access - features that are computed for all bounding boxes, not only the ones with - DensePose annotations. - Here K is the total number of points and N is the total number of instances - with DensePose annotations. - """ - - fine_segm_labels_gt: torch.Tensor - x_gt: torch.Tensor - y_gt: torch.Tensor - u_gt: torch.Tensor - v_gt: torch.Tensor - coarse_segm_gt: Optional[torch.Tensor] - bbox_xywh_gt: torch.Tensor - bbox_xywh_est: torch.Tensor - point_bbox_with_dp_indices: torch.Tensor - point_bbox_indices: torch.Tensor - bbox_indices: torch.Tensor - - -class ChartBasedAnnotationsAccumulator(AnnotationsAccumulator): - """ - Accumulates annotations by batches that correspond to objects detected on - individual images. Can pack them together into single tensors. - """ - - def __init__(self): - self.i_gt = [] - self.x_gt = [] - self.y_gt = [] - self.u_gt = [] - self.v_gt = [] - self.s_gt = [] - self.bbox_xywh_gt = [] - self.bbox_xywh_est = [] - self.point_bbox_with_dp_indices = [] - self.point_bbox_indices = [] - self.bbox_indices = [] - self.nxt_bbox_with_dp_index = 0 - self.nxt_bbox_index = 0 - - def accumulate(self, instances_one_image: Instances): - """ - Accumulate instances data for one image - - Args: - instances_one_image (Instances): instances data to accumulate - """ - boxes_xywh_est = BoxMode.convert( - instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS - ) - boxes_xywh_gt = BoxMode.convert( - instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS - ) - n_matches = len(boxes_xywh_gt) - assert n_matches == len( - boxes_xywh_est - ), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes" - if not n_matches: - # no detection - GT matches - return - if ( - not hasattr(instances_one_image, "gt_densepose") - or instances_one_image.gt_densepose is None - ): - # no densepose GT for the detections, just increase the bbox index - self.nxt_bbox_index += n_matches - return - for box_xywh_est, box_xywh_gt, dp_gt in zip( - boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose - ): - if (dp_gt is not None) and (len(dp_gt.x) > 0): - # pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`. - # pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`. - self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt) - self.nxt_bbox_index += 1 - - def _do_accumulate( - self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: DensePoseDataRelative - ): - """ - Accumulate instances data for one image, given that the data is not empty - - Args: - box_xywh_gt (tensor): GT bounding box - box_xywh_est (tensor): estimated bounding box - dp_gt (DensePoseDataRelative): GT densepose data - """ - self.i_gt.append(dp_gt.i) - self.x_gt.append(dp_gt.x) - self.y_gt.append(dp_gt.y) - self.u_gt.append(dp_gt.u) - self.v_gt.append(dp_gt.v) - if hasattr(dp_gt, "segm"): - self.s_gt.append(dp_gt.segm.unsqueeze(0)) - self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4)) - self.bbox_xywh_est.append(box_xywh_est.view(-1, 4)) - self.point_bbox_with_dp_indices.append( - torch.full_like(dp_gt.i, self.nxt_bbox_with_dp_index) - ) - self.point_bbox_indices.append(torch.full_like(dp_gt.i, self.nxt_bbox_index)) - self.bbox_indices.append(self.nxt_bbox_index) - self.nxt_bbox_with_dp_index += 1 - - def pack(self) -> Optional[PackedChartBasedAnnotations]: - """ - Pack data into tensors - """ - if not len(self.i_gt): - # TODO: - # returning proper empty annotations would require - # creating empty tensors of appropriate shape and - # type on an appropriate device; - # we return None so far to indicate empty annotations - return None - return PackedChartBasedAnnotations( - fine_segm_labels_gt=torch.cat(self.i_gt, 0).long(), - x_gt=torch.cat(self.x_gt, 0), - y_gt=torch.cat(self.y_gt, 0), - u_gt=torch.cat(self.u_gt, 0), - v_gt=torch.cat(self.v_gt, 0), - # ignore segmentation annotations, if not all the instances contain those - coarse_segm_gt=( - torch.cat(self.s_gt, 0) if len(self.s_gt) == len(self.bbox_xywh_gt) else None - ), - bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0), - bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0), - point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0).long(), - point_bbox_indices=torch.cat(self.point_bbox_indices, 0).long(), - bbox_indices=torch.as_tensor( - self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device - ).long(), - ) - - -def extract_packed_annotations_from_matches( - proposals_with_targets: List[Instances], accumulator: AnnotationsAccumulator -) -> Any: - for proposals_targets_per_image in proposals_with_targets: - accumulator.accumulate(proposals_targets_per_image) - return accumulator.pack() - - -def sample_random_indices( - n_indices: int, n_samples: int, device: Optional[torch.device] = None -) -> Optional[torch.Tensor]: - """ - Samples `n_samples` random indices from range `[0..n_indices - 1]`. - If `n_indices` is smaller than `n_samples`, returns `None` meaning that all indices - are selected. - Args: - n_indices (int): total number of indices - n_samples (int): number of indices to sample - device (torch.device): the desired device of returned tensor - Return: - Tensor of selected vertex indices, or `None`, if all vertices are selected - """ - if (n_samples <= 0) or (n_indices <= n_samples): - return None - indices = torch.randperm(n_indices, device=device)[:n_samples] - return indices diff --git a/detectron2/projects/DensePose/densepose/modeling/predictors/__init__.py b/detectron2/projects/DensePose/densepose/modeling/predictors/__init__.py deleted file mode 100644 index c749ea264690d0b4c85abc520e7476bc4365175d..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/predictors/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .chart import DensePoseChartPredictor -from .chart_confidence import DensePoseChartConfidencePredictorMixin -from .chart_with_confidence import DensePoseChartWithConfidencePredictor -from .cse import DensePoseEmbeddingPredictor -from .cse_confidence import DensePoseEmbeddingConfidencePredictorMixin -from .cse_with_confidence import DensePoseEmbeddingWithConfidencePredictor -from .registry import DENSEPOSE_PREDICTOR_REGISTRY diff --git a/detectron2/projects/DensePose/densepose/modeling/predictors/chart.py b/detectron2/projects/DensePose/densepose/modeling/predictors/chart.py deleted file mode 100644 index 67fc401d70fe5e7d7baec3530d435955d4a23f7c..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/predictors/chart.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import torch -from torch import nn - -from detectron2.config import CfgNode -from detectron2.layers import ConvTranspose2d, interpolate - -from ...structures import DensePoseChartPredictorOutput -from ..utils import initialize_module_params -from .registry import DENSEPOSE_PREDICTOR_REGISTRY - - -@DENSEPOSE_PREDICTOR_REGISTRY.register() -class DensePoseChartPredictor(nn.Module): - """ - Predictor (last layers of a DensePose model) that takes DensePose head outputs as an input - and produces 4 tensors which represent DensePose results for predefined body parts - (patches / charts): - * coarse segmentation, a tensor of shape [N, K, Hout, Wout] - * fine segmentation, a tensor of shape [N, C, Hout, Wout] - * U coordinates, a tensor of shape [N, C, Hout, Wout] - * V coordinates, a tensor of shape [N, C, Hout, Wout] - where - - N is the number of instances - - K is the number of coarse segmentation channels ( - 2 = foreground / background, - 15 = one of 14 body parts / background) - - C is the number of fine segmentation channels ( - 24 fine body parts / background) - - Hout and Wout are height and width of predictions - """ - - def __init__(self, cfg: CfgNode, input_channels: int): - """ - Initialize predictor using configuration options - - Args: - cfg (CfgNode): configuration options - input_channels (int): input tensor size along the channel dimension - """ - super().__init__() - dim_in = input_channels - n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS - dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1 - kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL - # coarse segmentation - self.ann_index_lowres = ConvTranspose2d( - dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - # fine segmentation - self.index_uv_lowres = ConvTranspose2d( - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - # U - self.u_lowres = ConvTranspose2d( - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - # V - self.v_lowres = ConvTranspose2d( - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE - initialize_module_params(self) - - def interp2d(self, tensor_nchw: torch.Tensor): - """ - Bilinear interpolation method to be used for upscaling - - Args: - tensor_nchw (tensor): tensor of shape (N, C, H, W) - Return: - tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed - by applying the scale factor to H and W - """ - return interpolate( - tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False - ) - - def forward(self, head_outputs: torch.Tensor): - """ - Perform forward step on DensePose head outputs - - Args: - head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W] - Return: - An instance of DensePoseChartPredictorOutput - """ - return DensePoseChartPredictorOutput( - coarse_segm=self.interp2d(self.ann_index_lowres(head_outputs)), - fine_segm=self.interp2d(self.index_uv_lowres(head_outputs)), - u=self.interp2d(self.u_lowres(head_outputs)), - v=self.interp2d(self.v_lowres(head_outputs)), - ) diff --git a/detectron2/projects/DensePose/densepose/modeling/predictors/chart_confidence.py b/detectron2/projects/DensePose/densepose/modeling/predictors/chart_confidence.py deleted file mode 100644 index d2220efa3a8c48e8f86bb4d1d11b3643c3cd6157..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/predictors/chart_confidence.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Any -import torch -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.layers import ConvTranspose2d - -from ...structures import decorate_predictor_output_class_with_confidences -from ..confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType -from ..utils import initialize_module_params - - -class DensePoseChartConfidencePredictorMixin: - """ - Predictor contains the last layers of a DensePose model that take DensePose head - outputs as an input and produce model outputs. Confidence predictor mixin is used - to generate confidences for segmentation and UV tensors estimated by some - base predictor. Several assumptions need to hold for the base predictor: - 1) the `forward` method must return SIUV tuple as the first result ( - S = coarse segmentation, I = fine segmentation, U and V are intrinsic - chart coordinates) - 2) `interp2d` method must be defined to perform bilinear interpolation; - the same method is typically used for SIUV and confidences - Confidence predictor mixin provides confidence estimates, as described in: - N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences - from Noisy Labels, NeurIPS 2019 - A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020 - """ - - def __init__(self, cfg: CfgNode, input_channels: int): - """ - Initialize confidence predictor using configuration options. - - Args: - cfg (CfgNode): configuration options - input_channels (int): number of input channels - """ - # we rely on base predictor to call nn.Module.__init__ - super().__init__(cfg, input_channels) # pyre-ignore[19] - self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg) - self._initialize_confidence_estimation_layers(cfg, input_channels) - self._registry = {} - initialize_module_params(self) # pyre-ignore[6] - - def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int): - """ - Initialize confidence estimation layers based on configuration options - - Args: - cfg (CfgNode): configuration options - dim_in (int): number of input channels - """ - dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1 - kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL - if self.confidence_model_cfg.uv_confidence.enabled: - if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO: - self.sigma_2_lowres = ConvTranspose2d( # pyre-ignore[16] - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - elif ( - self.confidence_model_cfg.uv_confidence.type - == DensePoseUVConfidenceType.INDEP_ANISO - ): - self.sigma_2_lowres = ConvTranspose2d( - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - self.kappa_u_lowres = ConvTranspose2d( # pyre-ignore[16] - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - self.kappa_v_lowres = ConvTranspose2d( # pyre-ignore[16] - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - else: - raise ValueError( - f"Unknown confidence model type: " - f"{self.confidence_model_cfg.confidence_model_type}" - ) - if self.confidence_model_cfg.segm_confidence.enabled: - self.fine_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16] - dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16] - dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - - def forward(self, head_outputs: torch.Tensor): - """ - Perform forward operation on head outputs used as inputs for the predictor. - Calls forward method from the base predictor and uses its outputs to compute - confidences. - - Args: - head_outputs (Tensor): head outputs used as predictor inputs - Return: - An instance of outputs with confidences, - see `decorate_predictor_output_class_with_confidences` - """ - # assuming base class returns SIUV estimates in its first result - base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16] - - # create output instance by extending base predictor outputs: - output = self._create_output_instance(base_predictor_outputs) - - if self.confidence_model_cfg.uv_confidence.enabled: - if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO: - # assuming base class defines interp2d method for bilinear interpolation - output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs)) # pyre-ignore[16] - elif ( - self.confidence_model_cfg.uv_confidence.type - == DensePoseUVConfidenceType.INDEP_ANISO - ): - # assuming base class defines interp2d method for bilinear interpolation - output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs)) - output.kappa_u = self.interp2d(self.kappa_u_lowres(head_outputs)) # pyre-ignore[16] - output.kappa_v = self.interp2d(self.kappa_v_lowres(head_outputs)) # pyre-ignore[16] - else: - raise ValueError( - f"Unknown confidence model type: " - f"{self.confidence_model_cfg.confidence_model_type}" - ) - if self.confidence_model_cfg.segm_confidence.enabled: - # base predictor outputs are assumed to have `fine_segm` and `coarse_segm` attributes - # base predictor is assumed to define `interp2d` method for bilinear interpolation - output.fine_segm_confidence = ( - F.softplus( - self.interp2d(self.fine_segm_confidence_lowres(head_outputs)) # pyre-ignore[16] - ) - + self.confidence_model_cfg.segm_confidence.epsilon - ) - output.fine_segm = base_predictor_outputs.fine_segm * torch.repeat_interleave( - output.fine_segm_confidence, base_predictor_outputs.fine_segm.shape[1], dim=1 - ) - output.coarse_segm_confidence = ( - F.softplus( - self.interp2d( - self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16] - ) - ) - + self.confidence_model_cfg.segm_confidence.epsilon - ) - output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave( - output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1 - ) - - return output - - def _create_output_instance(self, base_predictor_outputs: Any): - """ - Create an instance of predictor outputs by copying the outputs from the - base predictor and initializing confidence - - Args: - base_predictor_outputs: an instance of base predictor outputs - (the outputs type is assumed to be a dataclass) - Return: - An instance of outputs with confidences - """ - PredictorOutput = decorate_predictor_output_class_with_confidences( - type(base_predictor_outputs) # pyre-ignore[6] - ) - # base_predictor_outputs is assumed to be a dataclass - # reassign all the fields from base_predictor_outputs (no deep copy!), add new fields - output = PredictorOutput( - **base_predictor_outputs.__dict__, - coarse_segm_confidence=None, - fine_segm_confidence=None, - sigma_1=None, - sigma_2=None, - kappa_u=None, - kappa_v=None, - ) - return output diff --git a/detectron2/projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py b/detectron2/projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py deleted file mode 100644 index 902032c77c65408e0268077f776bd957e80091a1..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from . import DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor -from .registry import DENSEPOSE_PREDICTOR_REGISTRY - - -@DENSEPOSE_PREDICTOR_REGISTRY.register() -class DensePoseChartWithConfidencePredictor( - DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor -): - """ - Predictor that combines chart and chart confidence estimation - """ - - pass diff --git a/detectron2/projects/DensePose/densepose/modeling/predictors/cse.py b/detectron2/projects/DensePose/densepose/modeling/predictors/cse.py deleted file mode 100644 index 8494b7975bab1f64e704c4d7c6bdcca4a43ba817..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/predictors/cse.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -import torch -from torch import nn - -from detectron2.config import CfgNode -from detectron2.layers import ConvTranspose2d, interpolate - -from ...structures import DensePoseEmbeddingPredictorOutput -from ..utils import initialize_module_params -from .registry import DENSEPOSE_PREDICTOR_REGISTRY - - -@DENSEPOSE_PREDICTOR_REGISTRY.register() -class DensePoseEmbeddingPredictor(nn.Module): - """ - Last layers of a DensePose model that take DensePose head outputs as an input - and produce model outputs for continuous surface embeddings (CSE). - """ - - def __init__(self, cfg: CfgNode, input_channels: int): - """ - Initialize predictor using configuration options - - Args: - cfg (CfgNode): configuration options - input_channels (int): input tensor size along the channel dimension - """ - super().__init__() - dim_in = input_channels - n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS - embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE - kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL - # coarse segmentation - self.coarse_segm_lowres = ConvTranspose2d( - dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - # embedding - self.embed_lowres = ConvTranspose2d( - dim_in, embed_size, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE - initialize_module_params(self) - - def interp2d(self, tensor_nchw: torch.Tensor): - """ - Bilinear interpolation method to be used for upscaling - - Args: - tensor_nchw (tensor): tensor of shape (N, C, H, W) - Return: - tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed - by applying the scale factor to H and W - """ - return interpolate( - tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False - ) - - def forward(self, head_outputs): - """ - Perform forward step on DensePose head outputs - - Args: - head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W] - """ - embed_lowres = self.embed_lowres(head_outputs) - coarse_segm_lowres = self.coarse_segm_lowres(head_outputs) - embed = self.interp2d(embed_lowres) - coarse_segm = self.interp2d(coarse_segm_lowres) - return DensePoseEmbeddingPredictorOutput(embedding=embed, coarse_segm=coarse_segm) diff --git a/detectron2/projects/DensePose/densepose/modeling/predictors/cse_confidence.py b/detectron2/projects/DensePose/densepose/modeling/predictors/cse_confidence.py deleted file mode 100644 index 7d19b354fa14eb5f79e584c090f2bc0cb4d28c5f..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/predictors/cse_confidence.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from typing import Any -import torch -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.layers import ConvTranspose2d - -from densepose.modeling.confidence import DensePoseConfidenceModelConfig -from densepose.modeling.utils import initialize_module_params -from densepose.structures import decorate_cse_predictor_output_class_with_confidences - - -class DensePoseEmbeddingConfidencePredictorMixin: - """ - Predictor contains the last layers of a DensePose model that take DensePose head - outputs as an input and produce model outputs. Confidence predictor mixin is used - to generate confidences for coarse segmentation estimated by some - base predictor. Several assumptions need to hold for the base predictor: - 1) the `forward` method must return CSE DensePose head outputs, - tensor of shape [N, D, H, W] - 2) `interp2d` method must be defined to perform bilinear interpolation; - the same method is typically used for masks and confidences - Confidence predictor mixin provides confidence estimates, as described in: - N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences - from Noisy Labels, NeurIPS 2019 - A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020 - """ - - def __init__(self, cfg: CfgNode, input_channels: int): - """ - Initialize confidence predictor using configuration options. - - Args: - cfg (CfgNode): configuration options - input_channels (int): number of input channels - """ - # we rely on base predictor to call nn.Module.__init__ - super().__init__(cfg, input_channels) # pyre-ignore[19] - self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg) - self._initialize_confidence_estimation_layers(cfg, input_channels) - self._registry = {} - initialize_module_params(self) # pyre-ignore[6] - - def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int): - """ - Initialize confidence estimation layers based on configuration options - - Args: - cfg (CfgNode): configuration options - dim_in (int): number of input channels - """ - kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL - if self.confidence_model_cfg.segm_confidence.enabled: - self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16] - dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - - def forward(self, head_outputs: torch.Tensor): - """ - Perform forward operation on head outputs used as inputs for the predictor. - Calls forward method from the base predictor and uses its outputs to compute - confidences. - - Args: - head_outputs (Tensor): head outputs used as predictor inputs - Return: - An instance of outputs with confidences, - see `decorate_cse_predictor_output_class_with_confidences` - """ - # assuming base class returns SIUV estimates in its first result - base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16] - - # create output instance by extending base predictor outputs: - output = self._create_output_instance(base_predictor_outputs) - - if self.confidence_model_cfg.segm_confidence.enabled: - # base predictor outputs are assumed to have `coarse_segm` attribute - # base predictor is assumed to define `interp2d` method for bilinear interpolation - output.coarse_segm_confidence = ( - F.softplus( - self.interp2d( # pyre-ignore[16] - self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16] - ) - ) - + self.confidence_model_cfg.segm_confidence.epsilon - ) - output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave( - output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1 - ) - - return output - - def _create_output_instance(self, base_predictor_outputs: Any): - """ - Create an instance of predictor outputs by copying the outputs from the - base predictor and initializing confidence - - Args: - base_predictor_outputs: an instance of base predictor outputs - (the outputs type is assumed to be a dataclass) - Return: - An instance of outputs with confidences - """ - PredictorOutput = decorate_cse_predictor_output_class_with_confidences( - type(base_predictor_outputs) # pyre-ignore[6] - ) - # base_predictor_outputs is assumed to be a dataclass - # reassign all the fields from base_predictor_outputs (no deep copy!), add new fields - output = PredictorOutput( - **base_predictor_outputs.__dict__, - coarse_segm_confidence=None, - ) - return output diff --git a/detectron2/projects/DensePose/densepose/modeling/predictors/cse_with_confidence.py b/detectron2/projects/DensePose/densepose/modeling/predictors/cse_with_confidence.py deleted file mode 100644 index 02389dbcbe734c89e6eb86757d877c9657fd12b1..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/predictors/cse_with_confidence.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from . import DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor -from .registry import DENSEPOSE_PREDICTOR_REGISTRY - - -@DENSEPOSE_PREDICTOR_REGISTRY.register() -class DensePoseEmbeddingWithConfidencePredictor( - DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor -): - """ - Predictor that combines CSE and CSE confidence estimation - """ - - pass diff --git a/detectron2/projects/DensePose/densepose/modeling/predictors/registry.py b/detectron2/projects/DensePose/densepose/modeling/predictors/registry.py deleted file mode 100644 index c883ba3538e8d8e5b11c68811fdf1990a2964a71..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/predictors/registry.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from detectron2.utils.registry import Registry - -DENSEPOSE_PREDICTOR_REGISTRY = Registry("DENSEPOSE_PREDICTOR") diff --git a/detectron2/projects/DensePose/densepose/modeling/roi_heads/__init__.py b/detectron2/projects/DensePose/densepose/modeling/roi_heads/__init__.py deleted file mode 100644 index 0a055a65454517876107c621ba53e4742fa5eb54..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/roi_heads/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .v1convx import DensePoseV1ConvXHead -from .deeplab import DensePoseDeepLabHead -from .registry import ROI_DENSEPOSE_HEAD_REGISTRY -from .roi_head import Decoder, DensePoseROIHeads diff --git a/detectron2/projects/DensePose/densepose/modeling/roi_heads/deeplab.py b/detectron2/projects/DensePose/densepose/modeling/roi_heads/deeplab.py deleted file mode 100644 index 6f42d20681a34b319c15967548839ffffa77c89a..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/roi_heads/deeplab.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.layers import Conv2d - -from .registry import ROI_DENSEPOSE_HEAD_REGISTRY - - -@ROI_DENSEPOSE_HEAD_REGISTRY.register() -class DensePoseDeepLabHead(nn.Module): - """ - DensePose head using DeepLabV3 model from - "Rethinking Atrous Convolution for Semantic Image Segmentation" - . - """ - - def __init__(self, cfg: CfgNode, input_channels: int): - super(DensePoseDeepLabHead, self).__init__() - # fmt: off - hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM - kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL - norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM - self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS - self.use_nonlocal = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON - # fmt: on - pad_size = kernel_size // 2 - n_channels = input_channels - - self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels) # 6, 12, 56 - self.add_module("ASPP", self.ASPP) - - if self.use_nonlocal: - self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True) - self.add_module("NLBlock", self.NLBlock) - # weight_init.c2_msra_fill(self.ASPP) - - for i in range(self.n_stacked_convs): - norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None - layer = Conv2d( - n_channels, - hidden_dim, - kernel_size, - stride=1, - padding=pad_size, - bias=not norm, - norm=norm_module, - ) - weight_init.c2_msra_fill(layer) - n_channels = hidden_dim - layer_name = self._get_layer_name(i) - self.add_module(layer_name, layer) - self.n_out_channels = hidden_dim - # initialize_module_params(self) - - def forward(self, features): - x0 = features - x = self.ASPP(x0) - if self.use_nonlocal: - x = self.NLBlock(x) - output = x - for i in range(self.n_stacked_convs): - layer_name = self._get_layer_name(i) - x = getattr(self, layer_name)(x) - x = F.relu(x) - output = x - return output - - def _get_layer_name(self, i: int): - layer_name = "body_conv_fcn{}".format(i + 1) - return layer_name - - -# Copied from -# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py -# See https://arxiv.org/pdf/1706.05587.pdf for details -class ASPPConv(nn.Sequential): - def __init__(self, in_channels, out_channels, dilation): - modules = [ - nn.Conv2d( - in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False - ), - nn.GroupNorm(32, out_channels), - nn.ReLU(), - ] - super(ASPPConv, self).__init__(*modules) - - -class ASPPPooling(nn.Sequential): - def __init__(self, in_channels, out_channels): - super(ASPPPooling, self).__init__( - nn.AdaptiveAvgPool2d(1), - nn.Conv2d(in_channels, out_channels, 1, bias=False), - nn.GroupNorm(32, out_channels), - nn.ReLU(), - ) - - def forward(self, x): - size = x.shape[-2:] - x = super(ASPPPooling, self).forward(x) - return F.interpolate(x, size=size, mode="bilinear", align_corners=False) - - -class ASPP(nn.Module): - def __init__(self, in_channels, atrous_rates, out_channels): - super(ASPP, self).__init__() - modules = [] - modules.append( - nn.Sequential( - nn.Conv2d(in_channels, out_channels, 1, bias=False), - nn.GroupNorm(32, out_channels), - nn.ReLU(), - ) - ) - - rate1, rate2, rate3 = tuple(atrous_rates) - modules.append(ASPPConv(in_channels, out_channels, rate1)) - modules.append(ASPPConv(in_channels, out_channels, rate2)) - modules.append(ASPPConv(in_channels, out_channels, rate3)) - modules.append(ASPPPooling(in_channels, out_channels)) - - self.convs = nn.ModuleList(modules) - - self.project = nn.Sequential( - nn.Conv2d(5 * out_channels, out_channels, 1, bias=False), - # nn.BatchNorm2d(out_channels), - nn.ReLU(), - # nn.Dropout(0.5) - ) - - def forward(self, x): - res = [] - for conv in self.convs: - res.append(conv(x)) - res = torch.cat(res, dim=1) - return self.project(res) - - -# copied from -# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py -# See https://arxiv.org/abs/1711.07971 for details -class _NonLocalBlockND(nn.Module): - def __init__( - self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True - ): - super(_NonLocalBlockND, self).__init__() - - assert dimension in [1, 2, 3] - - self.dimension = dimension - self.sub_sample = sub_sample - - self.in_channels = in_channels - self.inter_channels = inter_channels - - if self.inter_channels is None: - self.inter_channels = in_channels // 2 - if self.inter_channels == 0: - self.inter_channels = 1 - - if dimension == 3: - conv_nd = nn.Conv3d - max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) - bn = nn.GroupNorm # (32, hidden_dim) #nn.BatchNorm3d - elif dimension == 2: - conv_nd = nn.Conv2d - max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) - bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm2d - else: - conv_nd = nn.Conv1d - max_pool_layer = nn.MaxPool1d(kernel_size=2) - bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm1d - - self.g = conv_nd( - in_channels=self.in_channels, - out_channels=self.inter_channels, - kernel_size=1, - stride=1, - padding=0, - ) - - if bn_layer: - self.W = nn.Sequential( - conv_nd( - in_channels=self.inter_channels, - out_channels=self.in_channels, - kernel_size=1, - stride=1, - padding=0, - ), - bn(32, self.in_channels), - ) - nn.init.constant_(self.W[1].weight, 0) - nn.init.constant_(self.W[1].bias, 0) - else: - self.W = conv_nd( - in_channels=self.inter_channels, - out_channels=self.in_channels, - kernel_size=1, - stride=1, - padding=0, - ) - nn.init.constant_(self.W.weight, 0) - nn.init.constant_(self.W.bias, 0) - - self.theta = conv_nd( - in_channels=self.in_channels, - out_channels=self.inter_channels, - kernel_size=1, - stride=1, - padding=0, - ) - self.phi = conv_nd( - in_channels=self.in_channels, - out_channels=self.inter_channels, - kernel_size=1, - stride=1, - padding=0, - ) - - if sub_sample: - self.g = nn.Sequential(self.g, max_pool_layer) - self.phi = nn.Sequential(self.phi, max_pool_layer) - - def forward(self, x): - """ - :param x: (b, c, t, h, w) - :return: - """ - - batch_size = x.size(0) - - g_x = self.g(x).view(batch_size, self.inter_channels, -1) - g_x = g_x.permute(0, 2, 1) - - theta_x = self.theta(x).view(batch_size, self.inter_channels, -1) - theta_x = theta_x.permute(0, 2, 1) - phi_x = self.phi(x).view(batch_size, self.inter_channels, -1) - f = torch.matmul(theta_x, phi_x) - f_div_C = F.softmax(f, dim=-1) - - y = torch.matmul(f_div_C, g_x) - y = y.permute(0, 2, 1).contiguous() - y = y.view(batch_size, self.inter_channels, *x.size()[2:]) - W_y = self.W(y) - z = W_y + x - - return z - - -class NONLocalBlock2D(_NonLocalBlockND): - def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True): - super(NONLocalBlock2D, self).__init__( - in_channels, - inter_channels=inter_channels, - dimension=2, - sub_sample=sub_sample, - bn_layer=bn_layer, - ) diff --git a/detectron2/projects/DensePose/densepose/modeling/roi_heads/registry.py b/detectron2/projects/DensePose/densepose/modeling/roi_heads/registry.py deleted file mode 100644 index 89514279ffba6a65fc499e03bc0177ed8039482f..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/roi_heads/registry.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from detectron2.utils.registry import Registry - -ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD") diff --git a/detectron2/projects/DensePose/densepose/modeling/roi_heads/roi_head.py b/detectron2/projects/DensePose/densepose/modeling/roi_heads/roi_head.py deleted file mode 100644 index d274f2f412d512d2c7e8b5478246ff0b5e0aa1f9..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/roi_heads/roi_head.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import numpy as np -from typing import Dict, List, Optional -import fvcore.nn.weight_init as weight_init -import torch -import torch.nn as nn -from torch.nn import functional as F - -from detectron2.layers import Conv2d, ShapeSpec, get_norm -from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads -from detectron2.modeling.poolers import ROIPooler -from detectron2.modeling.roi_heads import select_foreground_proposals -from detectron2.structures import ImageList, Instances - -from .. import ( - build_densepose_data_filter, - build_densepose_embedder, - build_densepose_head, - build_densepose_losses, - build_densepose_predictor, - densepose_inference, -) - - -class Decoder(nn.Module): - """ - A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper - (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from - all levels of the FPN into single output. - """ - - def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features): - super(Decoder, self).__init__() - - # fmt: off - self.in_features = in_features - feature_strides = {k: v.stride for k, v in input_shape.items()} - feature_channels = {k: v.channels for k, v in input_shape.items()} - num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES - conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS - self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE - norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM - # fmt: on - - self.scale_heads = [] - for in_feature in self.in_features: - head_ops = [] - head_length = max( - 1, - # pyre-fixme[6]: For 1st argument expected `Union[bytes, complex, - # float, int, generic, str]` but got `Optional[int]`. - int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride)), - ) - for k in range(head_length): - conv = Conv2d( - feature_channels[in_feature] if k == 0 else conv_dims, - conv_dims, - kernel_size=3, - stride=1, - padding=1, - bias=not norm, - norm=get_norm(norm, conv_dims), - activation=F.relu, - ) - weight_init.c2_msra_fill(conv) - head_ops.append(conv) - if feature_strides[in_feature] != self.common_stride: - head_ops.append( - nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False) - ) - self.scale_heads.append(nn.Sequential(*head_ops)) - self.add_module(in_feature, self.scale_heads[-1]) - self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0) - weight_init.c2_msra_fill(self.predictor) - - def forward(self, features: List[torch.Tensor]): - for i, _ in enumerate(self.in_features): - if i == 0: - x = self.scale_heads[i](features[i]) - else: - x = x + self.scale_heads[i](features[i]) - x = self.predictor(x) - return x - - -@ROI_HEADS_REGISTRY.register() -class DensePoseROIHeads(StandardROIHeads): - """ - A Standard ROIHeads which contains an addition of DensePose head. - """ - - def __init__(self, cfg, input_shape): - super().__init__(cfg, input_shape) - self._init_densepose_head(cfg, input_shape) - - def _init_densepose_head(self, cfg, input_shape): - # fmt: off - self.densepose_on = cfg.MODEL.DENSEPOSE_ON - if not self.densepose_on: - return - self.densepose_data_filter = build_densepose_data_filter(cfg) - dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION - dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO - dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE - self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON - # fmt: on - if self.use_decoder: - dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,) - else: - dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) - in_channels = [input_shape[f].channels for f in self.in_features][0] - - if self.use_decoder: - self.decoder = Decoder(cfg, input_shape, self.in_features) - - self.densepose_pooler = ROIPooler( - output_size=dp_pooler_resolution, - scales=dp_pooler_scales, - sampling_ratio=dp_pooler_sampling_ratio, - pooler_type=dp_pooler_type, - ) - self.densepose_head = build_densepose_head(cfg, in_channels) - self.densepose_predictor = build_densepose_predictor( - cfg, self.densepose_head.n_out_channels - ) - self.densepose_losses = build_densepose_losses(cfg) - self.embedder = build_densepose_embedder(cfg) - - def _forward_densepose(self, features: Dict[str, torch.Tensor], instances: List[Instances]): - """ - Forward logic of the densepose prediction branch. - - Args: - features (dict[str, Tensor]): input data as a mapping from feature - map name to tensor. Axis 0 represents the number of images `N` in - the input data; axes 1-3 are channels, height, and width, which may - vary between feature maps (e.g., if a feature pyramid is used). - instances (list[Instances]): length `N` list of `Instances`. The i-th - `Instances` contains instances for the i-th input image, - In training, they can be the proposals. - In inference, they can be the predicted boxes. - - Returns: - In training, a dict of losses. - In inference, update `instances` with new fields "densepose" and return it. - """ - if not self.densepose_on: - return {} if self.training else instances - - features_list = [features[f] for f in self.in_features] - if self.training: - proposals, _ = select_foreground_proposals(instances, self.num_classes) - features_list, proposals = self.densepose_data_filter(features_list, proposals) - if len(proposals) > 0: - proposal_boxes = [x.proposal_boxes for x in proposals] - - if self.use_decoder: - # pyre-fixme[29]: `Union[Module, Tensor]` is not a function. - features_list = [self.decoder(features_list)] - - features_dp = self.densepose_pooler(features_list, proposal_boxes) - densepose_head_outputs = self.densepose_head(features_dp) - densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs) - densepose_loss_dict = self.densepose_losses( - proposals, densepose_predictor_outputs, embedder=self.embedder - ) - return densepose_loss_dict - else: - pred_boxes = [x.pred_boxes for x in instances] - - if self.use_decoder: - # pyre-fixme[29]: `Union[Module, Tensor]` is not a function. - features_list = [self.decoder(features_list)] - - features_dp = self.densepose_pooler(features_list, pred_boxes) - if len(features_dp) > 0: - densepose_head_outputs = self.densepose_head(features_dp) - densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs) - else: - densepose_predictor_outputs = None - - densepose_inference(densepose_predictor_outputs, instances) - return instances - - def forward( - self, - images: ImageList, - features: Dict[str, torch.Tensor], - proposals: List[Instances], - targets: Optional[List[Instances]] = None, - ): - instances, losses = super().forward(images, features, proposals, targets) - del targets, images - - if self.training: - losses.update(self._forward_densepose(features, instances)) - return instances, losses - - def forward_with_given_boxes( - self, features: Dict[str, torch.Tensor], instances: List[Instances] - ): - """ - Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. - - This is useful for downstream tasks where a box is known, but need to obtain - other attributes (outputs of other heads). - Test-time augmentation also uses this. - - Args: - features: same as in `forward()` - instances (list[Instances]): instances to predict other outputs. Expect the keys - "pred_boxes" and "pred_classes" to exist. - - Returns: - instances (list[Instances]): - the same `Instances` objects, with extra - fields such as `pred_masks` or `pred_keypoints`. - """ - - instances = super().forward_with_given_boxes(features, instances) - instances = self._forward_densepose(features, instances) - return instances diff --git a/detectron2/projects/DensePose/densepose/modeling/roi_heads/v1convx.py b/detectron2/projects/DensePose/densepose/modeling/roi_heads/v1convx.py deleted file mode 100644 index d81c375c5a488af4cb9ab41676d5e6688f740e61..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/roi_heads/v1convx.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.layers import Conv2d - -from ..utils import initialize_module_params -from .registry import ROI_DENSEPOSE_HEAD_REGISTRY - - -@ROI_DENSEPOSE_HEAD_REGISTRY.register() -class DensePoseV1ConvXHead(nn.Module): - """ - Fully convolutional DensePose head. - """ - - def __init__(self, cfg: CfgNode, input_channels: int): - """ - Initialize DensePose fully convolutional head - - Args: - cfg (CfgNode): configuration options - input_channels (int): number of input channels - """ - super(DensePoseV1ConvXHead, self).__init__() - # fmt: off - hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM - kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL - self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS - # fmt: on - pad_size = kernel_size // 2 - n_channels = input_channels - for i in range(self.n_stacked_convs): - layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size) - layer_name = self._get_layer_name(i) - self.add_module(layer_name, layer) - n_channels = hidden_dim - self.n_out_channels = n_channels - initialize_module_params(self) - - def forward(self, features: torch.Tensor): - """ - Apply DensePose fully convolutional head to the input features - - Args: - features (tensor): input features - Result: - A tensor of DensePose head outputs - """ - x = features - output = x - for i in range(self.n_stacked_convs): - layer_name = self._get_layer_name(i) - x = getattr(self, layer_name)(x) - x = F.relu(x) - output = x - return output - - def _get_layer_name(self, i: int): - layer_name = "body_conv_fcn{}".format(i + 1) - return layer_name diff --git a/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py b/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py deleted file mode 100644 index 2e4cfa038f2fe3072a2520978ff4408df9bca5b3..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import copy -import numpy as np -import torch -from fvcore.transforms import HFlipTransform, TransformList -from torch.nn import functional as F - -from detectron2.data.transforms import RandomRotation, RotationTransform, apply_transform_gens -from detectron2.modeling.postprocessing import detector_postprocess -from detectron2.modeling.test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA - -from ..converters import HFlipConverter - - -class DensePoseDatasetMapperTTA(DatasetMapperTTA): - def __init__(self, cfg): - super().__init__(cfg=cfg) - self.angles = cfg.TEST.AUG.ROTATION_ANGLES - - def __call__(self, dataset_dict): - ret = super().__call__(dataset_dict=dataset_dict) - numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy() - for angle in self.angles: - rotate = RandomRotation(angle=angle, expand=True) - new_numpy_image, tfms = apply_transform_gens([rotate], np.copy(numpy_image)) - torch_image = torch.from_numpy(np.ascontiguousarray(new_numpy_image.transpose(2, 0, 1))) - dic = copy.deepcopy(dataset_dict) - # In DatasetMapperTTA, there is a pre_tfm transform (resize or no-op) that is - # added at the beginning of each TransformList. That's '.transforms[0]'. - dic["transforms"] = TransformList( - [ret[-1]["transforms"].transforms[0]] + tfms.transforms - ) - dic["image"] = torch_image - ret.append(dic) - return ret - - -class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA): - def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1): - """ - Args: - cfg (CfgNode): - model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on. - transform_data (DensePoseTransformData): contains symmetry label - transforms used for horizontal flip - tta_mapper (callable): takes a dataset dict and returns a list of - augmented versions of the dataset dict. Defaults to - `DatasetMapperTTA(cfg)`. - batch_size (int): batch the augmented images into this batch size for inference. - """ - self._transform_data = transform_data.to(model.device) - super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size) - - # the implementation follows closely the one from detectron2/modeling - def _inference_one_image(self, input): - """ - Args: - input (dict): one dataset dict with "image" field being a CHW tensor - - Returns: - dict: one output dict - """ - orig_shape = (input["height"], input["width"]) - # For some reason, resize with uint8 slightly increases box AP but decreases densepose AP - input["image"] = input["image"].to(torch.uint8) - augmented_inputs, tfms = self._get_augmented_inputs(input) - # Detect boxes from all augmented versions - with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]): - # temporarily disable roi heads - all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms) - merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape) - - if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON: - # Use the detected boxes to obtain new fields - augmented_instances = self._rescale_detected_boxes( - augmented_inputs, merged_instances, tfms - ) - # run forward on the detected boxes - outputs = self._batch_inference(augmented_inputs, augmented_instances) - # Delete now useless variables to avoid being out of memory - del augmented_inputs, augmented_instances - # average the predictions - if self.cfg.MODEL.MASK_ON: - merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms) - if self.cfg.MODEL.DENSEPOSE_ON: - merged_instances.pred_densepose = self._reduce_pred_densepose(outputs, tfms) - # postprocess - merged_instances = detector_postprocess(merged_instances, *orig_shape) - return {"instances": merged_instances} - else: - return {"instances": merged_instances} - - def _get_augmented_boxes(self, augmented_inputs, tfms): - # Heavily based on detectron2/modeling/test_time_augmentation.py - # Only difference is that RotationTransform is excluded from bbox computation - # 1: forward with all augmented images - outputs = self._batch_inference(augmented_inputs) - # 2: union the results - all_boxes = [] - all_scores = [] - all_classes = [] - for output, tfm in zip(outputs, tfms): - # Need to inverse the transforms on boxes, to obtain results on original image - if not any(isinstance(t, RotationTransform) for t in tfm.transforms): - # Some transforms can't compute bbox correctly - pred_boxes = output.pred_boxes.tensor - original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy()) - all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device)) - all_scores.extend(output.scores) - all_classes.extend(output.pred_classes) - all_boxes = torch.cat(all_boxes, dim=0) - return all_boxes, all_scores, all_classes - - def _reduce_pred_densepose(self, outputs, tfms): - # Should apply inverse transforms on densepose preds. - # We assume only rotation, resize & flip are used. pred_masks is a scale-invariant - # representation, so we handle the other ones specially - for idx, (output, tfm) in enumerate(zip(outputs, tfms)): - for t in tfm.transforms: - for attr in ["coarse_segm", "fine_segm", "u", "v"]: - setattr( - output.pred_densepose, - attr, - _inverse_rotation( - getattr(output.pred_densepose, attr), output.pred_boxes.tensor, t - ), - ) - if any(isinstance(t, HFlipTransform) for t in tfm.transforms): - output.pred_densepose = HFlipConverter.convert( - output.pred_densepose, self._transform_data - ) - self._incremental_avg_dp(outputs[0].pred_densepose, output.pred_densepose, idx) - return outputs[0].pred_densepose - - # incrementally computed average: u_(n + 1) = u_n + (x_(n+1) - u_n) / (n + 1). - def _incremental_avg_dp(self, avg, new_el, idx): - for attr in ["coarse_segm", "fine_segm", "u", "v"]: - setattr(avg, attr, (getattr(avg, attr) * idx + getattr(new_el, attr)) / (idx + 1)) - if idx: - # Deletion of the > 0 index intermediary values to prevent GPU OOM - setattr(new_el, attr, None) - return avg - - -def _inverse_rotation(densepose_attrs, boxes, transform): - # resample outputs to image size and rotate back the densepose preds - # on the rotated images to the space of the original image - if len(boxes) == 0 or not isinstance(transform, RotationTransform): - return densepose_attrs - boxes = boxes.int().cpu().numpy() - wh_boxes = boxes[:, 2:] - boxes[:, :2] # bboxes in the rotated space - inv_boxes = rotate_box_inverse(transform, boxes).astype(int) # bboxes in original image - wh_diff = (inv_boxes[:, 2:] - inv_boxes[:, :2] - wh_boxes) // 2 # diff between new/old bboxes - rotation_matrix = torch.tensor([transform.rm_image]).to(device=densepose_attrs.device).float() - rotation_matrix[:, :, -1] = 0 - # To apply grid_sample for rotation, we need to have enough space to fit the original and - # rotated bboxes. l_bds and r_bds are the left/right bounds that will be used to - # crop the difference once the rotation is done - l_bds = np.maximum(0, -wh_diff) - for i in range(len(densepose_attrs)): - if min(wh_boxes[i]) <= 0: - continue - densepose_attr = densepose_attrs[[i]].clone() - # 1. Interpolate densepose attribute to size of the rotated bbox - densepose_attr = F.interpolate(densepose_attr, wh_boxes[i].tolist()[::-1], mode="bilinear") - # 2. Pad the interpolated attribute so it has room for the original + rotated bbox - densepose_attr = F.pad(densepose_attr, tuple(np.repeat(np.maximum(0, wh_diff[i]), 2))) - # 3. Compute rotation grid and transform - grid = F.affine_grid(rotation_matrix, size=densepose_attr.shape) - densepose_attr = F.grid_sample(densepose_attr, grid) - # 4. Compute right bounds and crop the densepose_attr to the size of the original bbox - r_bds = densepose_attr.shape[2:][::-1] - l_bds[i] - densepose_attr = densepose_attr[:, :, l_bds[i][1] : r_bds[1], l_bds[i][0] : r_bds[0]] - if min(densepose_attr.shape) > 0: - # Interpolate back to the original size of the densepose attribute - densepose_attr = F.interpolate( - densepose_attr, densepose_attrs.shape[-2:], mode="bilinear" - ) - # Adding a very small probability to the background class to fill padded zones - densepose_attr[:, 0] += 1e-10 - densepose_attrs[i] = densepose_attr - return densepose_attrs - - -def rotate_box_inverse(rot_tfm, rotated_box): - """ - rotated_box is a N * 4 array of [x0, y0, x1, y1] boxes - When a bbox is rotated, it gets bigger, because we need to surround the tilted bbox - So when a bbox is rotated then inverse-rotated, it is much bigger than the original - This function aims to invert the rotation on the box, but also resize it to its original size - """ - # 1. Compute the inverse rotation of the rotated bboxes (bigger than it ) - invrot_box = rot_tfm.inverse().apply_box(rotated_box) - h, w = rotated_box[:, 3] - rotated_box[:, 1], rotated_box[:, 2] - rotated_box[:, 0] - ih, iw = invrot_box[:, 3] - invrot_box[:, 1], invrot_box[:, 2] - invrot_box[:, 0] - assert 2 * rot_tfm.abs_sin**2 != 1, "45 degrees angle can't be inverted" - # 2. Inverse the corresponding computation in the rotation transform - # to get the original height/width of the rotated boxes - orig_h = (h * rot_tfm.abs_cos - w * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2) - orig_w = (w * rot_tfm.abs_cos - h * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2) - # 3. Resize the inverse-rotated bboxes to their original size - invrot_box[:, 0] += (iw - orig_w) / 2 - invrot_box[:, 1] += (ih - orig_h) / 2 - invrot_box[:, 2] -= (iw - orig_w) / 2 - invrot_box[:, 3] -= (ih - orig_h) / 2 - - return invrot_box diff --git a/detectron2/projects/DensePose/densepose/modeling/utils.py b/detectron2/projects/DensePose/densepose/modeling/utils.py deleted file mode 100644 index 64f53369b5ae3bc69f064c590e0837583ebc213e..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/modeling/utils.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from torch import nn - - -def initialize_module_params(module: nn.Module) -> None: - for name, param in module.named_parameters(): - if "bias" in name: - nn.init.constant_(param, 0) - elif "weight" in name: - nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") diff --git a/detectron2/projects/DensePose/densepose/structures/__init__.py b/detectron2/projects/DensePose/densepose/structures/__init__.py deleted file mode 100644 index 4ee84836219994a54bb1249c90a7d0d6f8b72e8b..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/structures/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from .chart import DensePoseChartPredictorOutput -from .chart_confidence import decorate_predictor_output_class_with_confidences -from .cse_confidence import decorate_cse_predictor_output_class_with_confidences -from .chart_result import ( - DensePoseChartResult, - DensePoseChartResultWithConfidences, - quantize_densepose_chart_result, - compress_quantized_densepose_chart_result, - decompress_compressed_densepose_chart_result, -) -from .cse import DensePoseEmbeddingPredictorOutput -from .data_relative import DensePoseDataRelative -from .list import DensePoseList -from .mesh import Mesh, create_mesh -from .transform_data import DensePoseTransformData, normalized_coords_transform diff --git a/detectron2/projects/DensePose/densepose/structures/chart.py b/detectron2/projects/DensePose/densepose/structures/chart.py deleted file mode 100644 index 4f8640ef3dc9ca7e66e1a639e2e23211300dbbac..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/structures/chart.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from dataclasses import dataclass -from typing import Union -import torch - - -@dataclass -class DensePoseChartPredictorOutput: - """ - Predictor output that contains segmentation and inner coordinates predictions for predefined - body parts: - * coarse segmentation, a tensor of shape [N, K, Hout, Wout] - * fine segmentation, a tensor of shape [N, C, Hout, Wout] - * U coordinates, a tensor of shape [N, C, Hout, Wout] - * V coordinates, a tensor of shape [N, C, Hout, Wout] - where - - N is the number of instances - - K is the number of coarse segmentation channels ( - 2 = foreground / background, - 15 = one of 14 body parts / background) - - C is the number of fine segmentation channels ( - 24 fine body parts / background) - - Hout and Wout are height and width of predictions - """ - - coarse_segm: torch.Tensor - fine_segm: torch.Tensor - u: torch.Tensor - v: torch.Tensor - - def __len__(self): - """ - Number of instances (N) in the output - """ - return self.coarse_segm.size(0) - - def __getitem__( - self, item: Union[int, slice, torch.BoolTensor] - ) -> "DensePoseChartPredictorOutput": - """ - Get outputs for the selected instance(s) - - Args: - item (int or slice or tensor): selected items - """ - if isinstance(item, int): - return DensePoseChartPredictorOutput( - coarse_segm=self.coarse_segm[item].unsqueeze(0), - fine_segm=self.fine_segm[item].unsqueeze(0), - u=self.u[item].unsqueeze(0), - v=self.v[item].unsqueeze(0), - ) - else: - return DensePoseChartPredictorOutput( - coarse_segm=self.coarse_segm[item], - fine_segm=self.fine_segm[item], - u=self.u[item], - v=self.v[item], - ) - - def to(self, device: torch.device): - """ - Transfers all tensors to the given device - """ - coarse_segm = self.coarse_segm.to(device) - fine_segm = self.fine_segm.to(device) - u = self.u.to(device) - v = self.v.to(device) - return DensePoseChartPredictorOutput(coarse_segm=coarse_segm, fine_segm=fine_segm, u=u, v=v) diff --git a/detectron2/projects/DensePose/densepose/structures/chart_confidence.py b/detectron2/projects/DensePose/densepose/structures/chart_confidence.py deleted file mode 100644 index faec3a0f161939591a8424058871d50198327b08..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/structures/chart_confidence.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from dataclasses import make_dataclass -from functools import lru_cache -from typing import Any, Optional -import torch - - -@lru_cache(maxsize=None) -def decorate_predictor_output_class_with_confidences(BasePredictorOutput: type) -> type: - """ - Create a new output class from an existing one by adding new attributes - related to confidence estimation: - - sigma_1 (tensor) - - sigma_2 (tensor) - - kappa_u (tensor) - - kappa_v (tensor) - - fine_segm_confidence (tensor) - - coarse_segm_confidence (tensor) - - Details on confidence estimation parameters can be found in: - N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning - Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019 - A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020 - - The new class inherits the provided `BasePredictorOutput` class, - it's name is composed of the name of the provided class and - "WithConfidences" suffix. - - Args: - BasePredictorOutput (type): output type to which confidence data - is to be added, assumed to be a dataclass - Return: - New dataclass derived from the provided one that has attributes - for confidence estimation - """ - - PredictorOutput = make_dataclass( - BasePredictorOutput.__name__ + "WithConfidences", - fields=[ - ("sigma_1", Optional[torch.Tensor], None), - ("sigma_2", Optional[torch.Tensor], None), - ("kappa_u", Optional[torch.Tensor], None), - ("kappa_v", Optional[torch.Tensor], None), - ("fine_segm_confidence", Optional[torch.Tensor], None), - ("coarse_segm_confidence", Optional[torch.Tensor], None), - ], - bases=(BasePredictorOutput,), - ) - - # add possibility to index PredictorOutput - - def slice_if_not_none(data, item): - if data is None: - return None - if isinstance(item, int): - return data[item].unsqueeze(0) - return data[item] - - def PredictorOutput_getitem(self, item): - PredictorOutput = type(self) - base_predictor_output_sliced = super(PredictorOutput, self).__getitem__(item) - return PredictorOutput( - **base_predictor_output_sliced.__dict__, - coarse_segm_confidence=slice_if_not_none(self.coarse_segm_confidence, item), - fine_segm_confidence=slice_if_not_none(self.fine_segm_confidence, item), - sigma_1=slice_if_not_none(self.sigma_1, item), - sigma_2=slice_if_not_none(self.sigma_2, item), - kappa_u=slice_if_not_none(self.kappa_u, item), - kappa_v=slice_if_not_none(self.kappa_v, item), - ) - - PredictorOutput.__getitem__ = PredictorOutput_getitem - - def PredictorOutput_to(self, device: torch.device): - """ - Transfers all tensors to the given device - """ - PredictorOutput = type(self) - base_predictor_output_to = super(PredictorOutput, self).to(device) # pyre-ignore[16] - - def to_device_if_tensor(var: Any): - if isinstance(var, torch.Tensor): - return var.to(device) - return var - - return PredictorOutput( - **base_predictor_output_to.__dict__, - sigma_1=to_device_if_tensor(self.sigma_1), - sigma_2=to_device_if_tensor(self.sigma_2), - kappa_u=to_device_if_tensor(self.kappa_u), - kappa_v=to_device_if_tensor(self.kappa_v), - fine_segm_confidence=to_device_if_tensor(self.fine_segm_confidence), - coarse_segm_confidence=to_device_if_tensor(self.coarse_segm_confidence), - ) - - PredictorOutput.to = PredictorOutput_to - return PredictorOutput diff --git a/detectron2/projects/DensePose/densepose/structures/chart_result.py b/detectron2/projects/DensePose/densepose/structures/chart_result.py deleted file mode 100644 index 3a9e56dee9fb81fd6a6596c524dcd9f2e471af19..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/structures/chart_result.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from dataclasses import dataclass -from typing import Any, Optional, Tuple -import torch - - -@dataclass -class DensePoseChartResult: - """ - DensePose results for chart-based methods represented by labels and inner - coordinates (U, V) of individual charts. Each chart is a 2D manifold - that has an associated label and is parameterized by two coordinates U and V. - Both U and V take values in [0, 1]. - Thus the results are represented by two tensors: - - labels (tensor [H, W] of long): contains estimated label for each pixel of - the detection bounding box of size (H, W) - - uv (tensor [2, H, W] of float): contains estimated U and V coordinates - for each pixel of the detection bounding box of size (H, W) - """ - - labels: torch.Tensor - uv: torch.Tensor - - def to(self, device: torch.device): - """ - Transfers all tensors to the given device - """ - labels = self.labels.to(device) - uv = self.uv.to(device) - return DensePoseChartResult(labels=labels, uv=uv) - - -@dataclass -class DensePoseChartResultWithConfidences: - """ - We add confidence values to DensePoseChartResult - Thus the results are represented by two tensors: - - labels (tensor [H, W] of long): contains estimated label for each pixel of - the detection bounding box of size (H, W) - - uv (tensor [2, H, W] of float): contains estimated U and V coordinates - for each pixel of the detection bounding box of size (H, W) - Plus one [H, W] tensor of float for each confidence type - """ - - labels: torch.Tensor - uv: torch.Tensor - sigma_1: Optional[torch.Tensor] = None - sigma_2: Optional[torch.Tensor] = None - kappa_u: Optional[torch.Tensor] = None - kappa_v: Optional[torch.Tensor] = None - fine_segm_confidence: Optional[torch.Tensor] = None - coarse_segm_confidence: Optional[torch.Tensor] = None - - def to(self, device: torch.device): - """ - Transfers all tensors to the given device, except if their value is None - """ - - def to_device_if_tensor(var: Any): - if isinstance(var, torch.Tensor): - return var.to(device) - return var - - return DensePoseChartResultWithConfidences( - labels=self.labels.to(device), - uv=self.uv.to(device), - sigma_1=to_device_if_tensor(self.sigma_1), - sigma_2=to_device_if_tensor(self.sigma_2), - kappa_u=to_device_if_tensor(self.kappa_u), - kappa_v=to_device_if_tensor(self.kappa_v), - fine_segm_confidence=to_device_if_tensor(self.fine_segm_confidence), - coarse_segm_confidence=to_device_if_tensor(self.coarse_segm_confidence), - ) - - -@dataclass -class DensePoseChartResultQuantized: - """ - DensePose results for chart-based methods represented by labels and quantized - inner coordinates (U, V) of individual charts. Each chart is a 2D manifold - that has an associated label and is parameterized by two coordinates U and V. - Both U and V take values in [0, 1]. - Quantized coordinates Uq and Vq have uint8 values which are obtained as: - Uq = U * 255 (hence 0 <= Uq <= 255) - Vq = V * 255 (hence 0 <= Vq <= 255) - Thus the results are represented by one tensor: - - labels_uv_uint8 (tensor [3, H, W] of uint8): contains estimated label - and quantized coordinates Uq and Vq for each pixel of the detection - bounding box of size (H, W) - """ - - labels_uv_uint8: torch.Tensor - - def to(self, device: torch.device): - """ - Transfers all tensors to the given device - """ - labels_uv_uint8 = self.labels_uv_uint8.to(device) - return DensePoseChartResultQuantized(labels_uv_uint8=labels_uv_uint8) - - -@dataclass -class DensePoseChartResultCompressed: - """ - DensePose results for chart-based methods represented by a PNG-encoded string. - The tensor of quantized DensePose results of size [3, H, W] is considered - as an image with 3 color channels. PNG compression is applied and the result - is stored as a Base64-encoded string. The following attributes are defined: - - shape_chw (tuple of 3 int): contains shape of the result tensor - (number of channels, height, width) - - labels_uv_str (str): contains Base64-encoded results tensor of size - [3, H, W] compressed with PNG compression methods - """ - - shape_chw: Tuple[int, int, int] - labels_uv_str: str - - -def quantize_densepose_chart_result(result: DensePoseChartResult) -> DensePoseChartResultQuantized: - """ - Applies quantization to DensePose chart-based result. - - Args: - result (DensePoseChartResult): DensePose chart-based result - Return: - Quantized DensePose chart-based result (DensePoseChartResultQuantized) - """ - h, w = result.labels.shape - labels_uv_uint8 = torch.zeros([3, h, w], dtype=torch.uint8, device=result.labels.device) - labels_uv_uint8[0] = result.labels - labels_uv_uint8[1:] = (result.uv * 255).clamp(0, 255).byte() - return DensePoseChartResultQuantized(labels_uv_uint8=labels_uv_uint8) - - -def compress_quantized_densepose_chart_result( - result: DensePoseChartResultQuantized, -) -> DensePoseChartResultCompressed: - """ - Compresses quantized DensePose chart-based result - - Args: - result (DensePoseChartResultQuantized): quantized DensePose chart-based result - Return: - Compressed DensePose chart-based result (DensePoseChartResultCompressed) - """ - import base64 - import numpy as np - from io import BytesIO - from PIL import Image - - labels_uv_uint8_np_chw = result.labels_uv_uint8.cpu().numpy() - labels_uv_uint8_np_hwc = np.moveaxis(labels_uv_uint8_np_chw, 0, -1) - im = Image.fromarray(labels_uv_uint8_np_hwc) - fstream = BytesIO() - im.save(fstream, format="png", optimize=True) - labels_uv_str = base64.encodebytes(fstream.getvalue()).decode() - shape_chw = labels_uv_uint8_np_chw.shape - return DensePoseChartResultCompressed(labels_uv_str=labels_uv_str, shape_chw=shape_chw) - - -def decompress_compressed_densepose_chart_result( - result: DensePoseChartResultCompressed, -) -> DensePoseChartResultQuantized: - """ - Decompresses DensePose chart-based result encoded into a base64 string - - Args: - result (DensePoseChartResultCompressed): compressed DensePose chart result - Return: - Quantized DensePose chart-based result (DensePoseChartResultQuantized) - """ - import base64 - import numpy as np - from io import BytesIO - from PIL import Image - - fstream = BytesIO(base64.decodebytes(result.labels_uv_str.encode())) - im = Image.open(fstream) - labels_uv_uint8_np_chw = np.moveaxis(np.array(im, dtype=np.uint8), -1, 0) - return DensePoseChartResultQuantized( - labels_uv_uint8=torch.from_numpy(labels_uv_uint8_np_chw.reshape(result.shape_chw)) - ) diff --git a/detectron2/projects/DensePose/densepose/structures/cse.py b/detectron2/projects/DensePose/densepose/structures/cse.py deleted file mode 100644 index 381f1384a8d4d42f81cda8ff1558002149bdea74..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/structures/cse.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -from dataclasses import dataclass -from typing import Union -import torch - - -@dataclass -class DensePoseEmbeddingPredictorOutput: - """ - Predictor output that contains embedding and coarse segmentation data: - * embedding: float tensor of size [N, D, H, W], contains estimated embeddings - * coarse_segm: float tensor of size [N, K, H, W] - Here D = MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE - K = MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS - """ - - embedding: torch.Tensor - coarse_segm: torch.Tensor - - def __len__(self): - """ - Number of instances (N) in the output - """ - return self.coarse_segm.size(0) - - def __getitem__( - self, item: Union[int, slice, torch.BoolTensor] - ) -> "DensePoseEmbeddingPredictorOutput": - """ - Get outputs for the selected instance(s) - - Args: - item (int or slice or tensor): selected items - """ - if isinstance(item, int): - return DensePoseEmbeddingPredictorOutput( - coarse_segm=self.coarse_segm[item].unsqueeze(0), - embedding=self.embedding[item].unsqueeze(0), - ) - else: - return DensePoseEmbeddingPredictorOutput( - coarse_segm=self.coarse_segm[item], embedding=self.embedding[item] - ) - - def to(self, device: torch.device): - """ - Transfers all tensors to the given device - """ - coarse_segm = self.coarse_segm.to(device) - embedding = self.embedding.to(device) - return DensePoseEmbeddingPredictorOutput(coarse_segm=coarse_segm, embedding=embedding) diff --git a/detectron2/projects/DensePose/densepose/structures/cse_confidence.py b/detectron2/projects/DensePose/densepose/structures/cse_confidence.py deleted file mode 100644 index 251a7e823e38931fb1b86b017417538af5350944..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/structures/cse_confidence.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe - -from dataclasses import make_dataclass -from functools import lru_cache -from typing import Any, Optional -import torch - - -@lru_cache(maxsize=None) -def decorate_cse_predictor_output_class_with_confidences(BasePredictorOutput: type) -> type: - """ - Create a new output class from an existing one by adding new attributes - related to confidence estimation: - - coarse_segm_confidence (tensor) - - Details on confidence estimation parameters can be found in: - N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning - Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019 - A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020 - - The new class inherits the provided `BasePredictorOutput` class, - it's name is composed of the name of the provided class and - "WithConfidences" suffix. - - Args: - BasePredictorOutput (type): output type to which confidence data - is to be added, assumed to be a dataclass - Return: - New dataclass derived from the provided one that has attributes - for confidence estimation - """ - - PredictorOutput = make_dataclass( - BasePredictorOutput.__name__ + "WithConfidences", - fields=[ - ("coarse_segm_confidence", Optional[torch.Tensor], None), - ], - bases=(BasePredictorOutput,), - ) - - # add possibility to index PredictorOutput - - def slice_if_not_none(data, item): - if data is None: - return None - if isinstance(item, int): - return data[item].unsqueeze(0) - return data[item] - - def PredictorOutput_getitem(self, item): - PredictorOutput = type(self) - base_predictor_output_sliced = super(PredictorOutput, self).__getitem__(item) - return PredictorOutput( - **base_predictor_output_sliced.__dict__, - coarse_segm_confidence=slice_if_not_none(self.coarse_segm_confidence, item), - ) - - PredictorOutput.__getitem__ = PredictorOutput_getitem - - def PredictorOutput_to(self, device: torch.device): - """ - Transfers all tensors to the given device - """ - PredictorOutput = type(self) - base_predictor_output_to = super(PredictorOutput, self).to(device) # pyre-ignore[16] - - def to_device_if_tensor(var: Any): - if isinstance(var, torch.Tensor): - return var.to(device) - return var - - return PredictorOutput( - **base_predictor_output_to.__dict__, - coarse_segm_confidence=to_device_if_tensor(self.coarse_segm_confidence), - ) - - PredictorOutput.to = PredictorOutput_to - return PredictorOutput diff --git a/detectron2/projects/DensePose/densepose/structures/data_relative.py b/detectron2/projects/DensePose/densepose/structures/data_relative.py deleted file mode 100644 index bcf27ef9bb69f5d9f74d6499e55408e8d4ec5803..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/structures/data_relative.py +++ /dev/null @@ -1,245 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import numpy as np -import torch -from torch.nn import functional as F - -from densepose.data.meshes.catalog import MeshCatalog -from densepose.structures.mesh import load_mesh_symmetry -from densepose.structures.transform_data import DensePoseTransformData - - -class DensePoseDataRelative: - """ - Dense pose relative annotations that can be applied to any bounding box: - x - normalized X coordinates [0, 255] of annotated points - y - normalized Y coordinates [0, 255] of annotated points - i - body part labels 0,...,24 for annotated points - u - body part U coordinates [0, 1] for annotated points - v - body part V coordinates [0, 1] for annotated points - segm - 256x256 segmentation mask with values 0,...,14 - To obtain absolute x and y data wrt some bounding box one needs to first - divide the data by 256, multiply by the respective bounding box size - and add bounding box offset: - x_img = x0 + x_norm * w / 256.0 - y_img = y0 + y_norm * h / 256.0 - Segmentation masks are typically sampled to get image-based masks. - """ - - # Key for normalized X coordinates in annotation dict - X_KEY = "dp_x" - # Key for normalized Y coordinates in annotation dict - Y_KEY = "dp_y" - # Key for U part coordinates in annotation dict (used in chart-based annotations) - U_KEY = "dp_U" - # Key for V part coordinates in annotation dict (used in chart-based annotations) - V_KEY = "dp_V" - # Key for I point labels in annotation dict (used in chart-based annotations) - I_KEY = "dp_I" - # Key for segmentation mask in annotation dict - S_KEY = "dp_masks" - # Key for vertex ids (used in continuous surface embeddings annotations) - VERTEX_IDS_KEY = "dp_vertex" - # Key for mesh id (used in continuous surface embeddings annotations) - MESH_NAME_KEY = "ref_model" - # Number of body parts in segmentation masks - N_BODY_PARTS = 14 - # Number of parts in point labels - N_PART_LABELS = 24 - MASK_SIZE = 256 - - def __init__(self, annotation, cleanup=False): - self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY]) - self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY]) - if ( - DensePoseDataRelative.I_KEY in annotation - and DensePoseDataRelative.U_KEY in annotation - and DensePoseDataRelative.V_KEY in annotation - ): - self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY]) - self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY]) - self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY]) - if ( - DensePoseDataRelative.VERTEX_IDS_KEY in annotation - and DensePoseDataRelative.MESH_NAME_KEY in annotation - ): - self.vertex_ids = torch.as_tensor( - annotation[DensePoseDataRelative.VERTEX_IDS_KEY], dtype=torch.long - ) - self.mesh_id = MeshCatalog.get_mesh_id(annotation[DensePoseDataRelative.MESH_NAME_KEY]) - if DensePoseDataRelative.S_KEY in annotation: - self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation) - self.device = torch.device("cpu") - if cleanup: - DensePoseDataRelative.cleanup_annotation(annotation) - - def to(self, device): - if self.device == device: - return self - new_data = DensePoseDataRelative.__new__(DensePoseDataRelative) - new_data.x = self.x.to(device) - new_data.y = self.y.to(device) - for attr in ["i", "u", "v", "vertex_ids", "segm"]: - if hasattr(self, attr): - setattr(new_data, attr, getattr(self, attr).to(device)) - if hasattr(self, "mesh_id"): - new_data.mesh_id = self.mesh_id - new_data.device = device - return new_data - - @staticmethod - def extract_segmentation_mask(annotation): - import pycocotools.mask as mask_utils - - # TODO: annotation instance is accepted if it contains either - # DensePose segmentation or instance segmentation. However, here we - # only rely on DensePose segmentation - poly_specs = annotation[DensePoseDataRelative.S_KEY] - if isinstance(poly_specs, torch.Tensor): - # data is already given as mask tensors, no need to decode - return poly_specs - segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32) - if isinstance(poly_specs, dict): - if poly_specs: - mask = mask_utils.decode(poly_specs) - segm[mask > 0] = 1 - else: - for i in range(len(poly_specs)): - poly_i = poly_specs[i] - if poly_i: - mask_i = mask_utils.decode(poly_i) - segm[mask_i > 0] = i + 1 - return segm - - @staticmethod - def validate_annotation(annotation): - for key in [ - DensePoseDataRelative.X_KEY, - DensePoseDataRelative.Y_KEY, - ]: - if key not in annotation: - return False, "no {key} data in the annotation".format(key=key) - valid_for_iuv_setting = all( - key in annotation - for key in [ - DensePoseDataRelative.I_KEY, - DensePoseDataRelative.U_KEY, - DensePoseDataRelative.V_KEY, - ] - ) - valid_for_cse_setting = all( - key in annotation - for key in [ - DensePoseDataRelative.VERTEX_IDS_KEY, - DensePoseDataRelative.MESH_NAME_KEY, - ] - ) - if not valid_for_iuv_setting and not valid_for_cse_setting: - return ( - False, - "expected either {} (IUV setting) or {} (CSE setting) annotations".format( - ", ".join( - [ - DensePoseDataRelative.I_KEY, - DensePoseDataRelative.U_KEY, - DensePoseDataRelative.V_KEY, - ] - ), - ", ".join( - [ - DensePoseDataRelative.VERTEX_IDS_KEY, - DensePoseDataRelative.MESH_NAME_KEY, - ] - ), - ), - ) - return True, None - - @staticmethod - def cleanup_annotation(annotation): - for key in [ - DensePoseDataRelative.X_KEY, - DensePoseDataRelative.Y_KEY, - DensePoseDataRelative.I_KEY, - DensePoseDataRelative.U_KEY, - DensePoseDataRelative.V_KEY, - DensePoseDataRelative.S_KEY, - DensePoseDataRelative.VERTEX_IDS_KEY, - DensePoseDataRelative.MESH_NAME_KEY, - ]: - if key in annotation: - del annotation[key] - - def apply_transform(self, transforms, densepose_transform_data): - self._transform_pts(transforms, densepose_transform_data) - if hasattr(self, "segm"): - self._transform_segm(transforms, densepose_transform_data) - - def _transform_pts(self, transforms, dp_transform_data): - import detectron2.data.transforms as T - - # NOTE: This assumes that HorizFlipTransform is the only one that does flip - do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 - if do_hflip: - self.x = self.MASK_SIZE - self.x - if hasattr(self, "i"): - self._flip_iuv_semantics(dp_transform_data) - if hasattr(self, "vertex_ids"): - self._flip_vertices() - - for t in transforms.transforms: - if isinstance(t, T.RotationTransform): - xy_scale = np.array((t.w, t.h)) / DensePoseDataRelative.MASK_SIZE - xy = t.apply_coords(np.stack((self.x, self.y), axis=1) * xy_scale) - self.x, self.y = torch.tensor(xy / xy_scale, dtype=self.x.dtype).T - - def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None: - i_old = self.i.clone() - uv_symmetries = dp_transform_data.uv_symmetries - pt_label_symmetries = dp_transform_data.point_label_symmetries - for i in range(self.N_PART_LABELS): - if i + 1 in i_old: - annot_indices_i = i_old == i + 1 - if pt_label_symmetries[i + 1] != i + 1: - self.i[annot_indices_i] = pt_label_symmetries[i + 1] - u_loc = (self.u[annot_indices_i] * 255).long() - v_loc = (self.v[annot_indices_i] * 255).long() - self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to( - device=self.u.device - ) - self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to( - device=self.v.device - ) - - def _flip_vertices(self): - mesh_info = MeshCatalog[MeshCatalog.get_mesh_name(self.mesh_id)] - mesh_symmetry = ( - load_mesh_symmetry(mesh_info.symmetry) if mesh_info.symmetry is not None else None - ) - self.vertex_ids = mesh_symmetry["vertex_transforms"][self.vertex_ids] - - def _transform_segm(self, transforms, dp_transform_data): - import detectron2.data.transforms as T - - # NOTE: This assumes that HorizFlipTransform is the only one that does flip - do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 - if do_hflip: - self.segm = torch.flip(self.segm, [1]) - self._flip_segm_semantics(dp_transform_data) - - for t in transforms.transforms: - if isinstance(t, T.RotationTransform): - self._transform_segm_rotation(t) - - def _flip_segm_semantics(self, dp_transform_data): - old_segm = self.segm.clone() - mask_label_symmetries = dp_transform_data.mask_label_symmetries - for i in range(self.N_BODY_PARTS): - if mask_label_symmetries[i + 1] != i + 1: - self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1] - - def _transform_segm_rotation(self, rotation): - self.segm = F.interpolate(self.segm[None, None, :], (rotation.h, rotation.w)).numpy() - self.segm = torch.tensor(rotation.apply_segmentation(self.segm[0, 0]))[None, None, :] - self.segm = F.interpolate(self.segm, [DensePoseDataRelative.MASK_SIZE] * 2)[0, 0] diff --git a/detectron2/projects/DensePose/densepose/structures/list.py b/detectron2/projects/DensePose/densepose/structures/list.py deleted file mode 100644 index e7dde3acd42ff33c103a50bcf6eebff21a59ce53..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/structures/list.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import torch - -from densepose.structures.data_relative import DensePoseDataRelative - - -class DensePoseList: - - _TORCH_DEVICE_CPU = torch.device("cpu") - - def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU): - assert len(densepose_datas) == len( - boxes_xyxy_abs - ), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format( - len(densepose_datas), len(boxes_xyxy_abs) - ) - self.densepose_datas = [] - for densepose_data in densepose_datas: - assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, ( - "Attempt to initialize DensePoseList with DensePose datas " - "of type {}, expected DensePoseDataRelative".format(type(densepose_data)) - ) - densepose_data_ondevice = ( - densepose_data.to(device) if densepose_data is not None else None - ) - self.densepose_datas.append(densepose_data_ondevice) - self.boxes_xyxy_abs = boxes_xyxy_abs.to(device) - self.image_size_hw = image_size_hw - self.device = device - - def to(self, device): - if self.device == device: - return self - return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device) - - def __iter__(self): - return iter(self.densepose_datas) - - def __len__(self): - return len(self.densepose_datas) - - def __repr__(self): - s = self.__class__.__name__ + "(" - s += "num_instances={}, ".format(len(self.densepose_datas)) - s += "image_width={}, ".format(self.image_size_hw[1]) - s += "image_height={})".format(self.image_size_hw[0]) - return s - - def __getitem__(self, item): - if isinstance(item, int): - densepose_data_rel = self.densepose_datas[item] - return densepose_data_rel - elif isinstance(item, slice): - densepose_datas_rel = self.densepose_datas[item] - boxes_xyxy_abs = self.boxes_xyxy_abs[item] - return DensePoseList( - densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device - ) - elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool): - densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0] - boxes_xyxy_abs = self.boxes_xyxy_abs[item] - return DensePoseList( - densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device - ) - else: - densepose_datas_rel = [self.densepose_datas[i] for i in item] - boxes_xyxy_abs = self.boxes_xyxy_abs[item] - return DensePoseList( - densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device - ) diff --git a/detectron2/projects/DensePose/densepose/structures/mesh.py b/detectron2/projects/DensePose/densepose/structures/mesh.py deleted file mode 100644 index faaad9cb5650f5e6a1bef76c599d5fd370238e4c..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/structures/mesh.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe - -import pickle -from functools import lru_cache -from typing import Dict, Optional, Tuple -import torch - -from detectron2.utils.file_io import PathManager - -from densepose.data.meshes.catalog import MeshCatalog, MeshInfo - - -def _maybe_copy_to_device( - attribute: Optional[torch.Tensor], device: torch.device -) -> Optional[torch.Tensor]: - if attribute is None: - return None - return attribute.to(device) - - -class Mesh: - def __init__( - self, - vertices: Optional[torch.Tensor] = None, - faces: Optional[torch.Tensor] = None, - geodists: Optional[torch.Tensor] = None, - symmetry: Optional[Dict[str, torch.Tensor]] = None, - texcoords: Optional[torch.Tensor] = None, - mesh_info: Optional[MeshInfo] = None, - device: Optional[torch.device] = None, - ): - """ - Args: - vertices (tensor [N, 3] of float32): vertex coordinates in 3D - faces (tensor [M, 3] of long): triangular face represented as 3 - vertex indices - geodists (tensor [N, N] of float32): geodesic distances from - vertex `i` to vertex `j` (optional, default: None) - symmetry (dict: str -> tensor): various mesh symmetry data: - - "vertex_transforms": vertex mapping under horizontal flip, - tensor of size [N] of type long; vertex `i` is mapped to - vertex `tensor[i]` (optional, default: None) - texcoords (tensor [N, 2] of float32): texture coordinates, i.e. global - and normalized mesh UVs (optional, default: None) - mesh_info (MeshInfo type): necessary to load the attributes on-the-go, - can be used instead of passing all the variables one by one - device (torch.device): device of the Mesh. If not provided, will use - the device of the vertices - """ - self._vertices = vertices - self._faces = faces - self._geodists = geodists - self._symmetry = symmetry - self._texcoords = texcoords - self.mesh_info = mesh_info - self.device = device - - assert self._vertices is not None or self.mesh_info is not None - - all_fields = [self._vertices, self._faces, self._geodists, self._texcoords] - - if self.device is None: - for field in all_fields: - if field is not None: - self.device = field.device - break - if self.device is None and symmetry is not None: - for key in symmetry: - self.device = symmetry[key].device - break - self.device = torch.device("cpu") if self.device is None else self.device - - assert all([var.device == self.device for var in all_fields if var is not None]) - if symmetry: - assert all(symmetry[key].device == self.device for key in symmetry) - if texcoords and vertices: - assert len(vertices) == len(texcoords) - - def to(self, device: torch.device): - device_symmetry = self._symmetry - if device_symmetry: - device_symmetry = {key: value.to(device) for key, value in device_symmetry.items()} - return Mesh( - _maybe_copy_to_device(self._vertices, device), - _maybe_copy_to_device(self._faces, device), - _maybe_copy_to_device(self._geodists, device), - device_symmetry, - _maybe_copy_to_device(self._texcoords, device), - self.mesh_info, - device, - ) - - @property - def vertices(self): - if self._vertices is None and self.mesh_info is not None: - self._vertices = load_mesh_data(self.mesh_info.data, "vertices", self.device) - return self._vertices - - @property - def faces(self): - if self._faces is None and self.mesh_info is not None: - self._faces = load_mesh_data(self.mesh_info.data, "faces", self.device) - return self._faces - - @property - def geodists(self): - if self._geodists is None and self.mesh_info is not None: - self._geodists = load_mesh_auxiliary_data(self.mesh_info.geodists, self.device) - return self._geodists - - @property - def symmetry(self): - if self._symmetry is None and self.mesh_info is not None: - self._symmetry = load_mesh_symmetry(self.mesh_info.symmetry, self.device) - return self._symmetry - - @property - def texcoords(self): - if self._texcoords is None and self.mesh_info is not None: - self._texcoords = load_mesh_auxiliary_data(self.mesh_info.texcoords, self.device) - return self._texcoords - - def get_geodists(self): - if self.geodists is None: - self.geodists = self._compute_geodists() - return self.geodists - - def _compute_geodists(self): - # TODO: compute using Laplace-Beltrami - geodists = None - return geodists - - -def load_mesh_data( - mesh_fpath: str, field: str, device: Optional[torch.device] = None -) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor]]: - with PathManager.open(mesh_fpath, "rb") as hFile: - # pyre-fixme[7]: Expected `Tuple[Optional[Tensor], Optional[Tensor]]` but - # got `Tensor`. - return torch.as_tensor(pickle.load(hFile)[field], dtype=torch.float).to(device) - return None - - -def load_mesh_auxiliary_data( - fpath: str, device: Optional[torch.device] = None -) -> Optional[torch.Tensor]: - fpath_local = PathManager.get_local_path(fpath) - with PathManager.open(fpath_local, "rb") as hFile: - return torch.as_tensor(pickle.load(hFile), dtype=torch.float).to(device) - return None - - -@lru_cache() -def load_mesh_symmetry( - symmetry_fpath: str, device: Optional[torch.device] = None -) -> Optional[Dict[str, torch.Tensor]]: - with PathManager.open(symmetry_fpath, "rb") as hFile: - symmetry_loaded = pickle.load(hFile) - symmetry = { - "vertex_transforms": torch.as_tensor( - symmetry_loaded["vertex_transforms"], dtype=torch.long - ).to(device), - } - return symmetry - return None - - -@lru_cache() -def create_mesh(mesh_name: str, device: Optional[torch.device] = None) -> Mesh: - return Mesh(mesh_info=MeshCatalog[mesh_name], device=device) diff --git a/detectron2/projects/DensePose/densepose/structures/transform_data.py b/detectron2/projects/DensePose/densepose/structures/transform_data.py deleted file mode 100644 index c85ec88514205679d39808a794c00613a8c0f495..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/structures/transform_data.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -from typing import BinaryIO, Dict, Union -import torch - - -def normalized_coords_transform(x0, y0, w, h): - """ - Coordinates transform that maps top left corner to (-1, -1) and bottom - right corner to (1, 1). Used for torch.grid_sample to initialize the - grid - """ - - def f(p): - return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1) - - return f - - -class DensePoseTransformData: - - # Horizontal symmetry label transforms used for horizontal flip - MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14] - # fmt: off - POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23] # noqa - # fmt: on - - def __init__(self, uv_symmetries: Dict[str, torch.Tensor], device: torch.device): - self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES - self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES - self.uv_symmetries = uv_symmetries - self.device = torch.device("cpu") - - def to(self, device: torch.device, copy: bool = False) -> "DensePoseTransformData": - """ - Convert transform data to the specified device - - Args: - device (torch.device): device to convert the data to - copy (bool): flag that specifies whether to copy or to reference the data - in case the device is the same - Return: - An instance of `DensePoseTransformData` with data stored on the specified device - """ - if self.device == device and not copy: - return self - uv_symmetry_map = {} - for key in self.uv_symmetries: - uv_symmetry_map[key] = self.uv_symmetries[key].to(device=device, copy=copy) - return DensePoseTransformData(uv_symmetry_map, device) - - @staticmethod - def load(io: Union[str, BinaryIO]): - """ - Args: - io: (str or binary file-like object): input file to load data from - Returns: - An instance of `DensePoseTransformData` with transforms loaded from the file - """ - import scipy.io - - uv_symmetry_map = scipy.io.loadmat(io) - uv_symmetry_map_torch = {} - for key in ["U_transforms", "V_transforms"]: - uv_symmetry_map_torch[key] = [] - map_src = uv_symmetry_map[key] - map_dst = uv_symmetry_map_torch[key] - for i in range(map_src.shape[1]): - map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float)) - uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0) - transform_data = DensePoseTransformData(uv_symmetry_map_torch, device=torch.device("cpu")) - return transform_data diff --git a/detectron2/projects/DensePose/densepose/utils/__init__.py b/detectron2/projects/DensePose/densepose/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/detectron2/projects/DensePose/densepose/utils/dbhelper.py b/detectron2/projects/DensePose/densepose/utils/dbhelper.py deleted file mode 100644 index ba380303a06f42674aa59f03690504f825b56ed7..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/utils/dbhelper.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -from typing import Any, Dict, Optional, Tuple - - -class EntrySelector: - """ - Base class for entry selectors - """ - - @staticmethod - def from_string(spec: str) -> "EntrySelector": - if spec == "*": - return AllEntrySelector() - return FieldEntrySelector(spec) - - -class AllEntrySelector(EntrySelector): - """ - Selector that accepts all entries - """ - - SPECIFIER = "*" - - def __call__(self, entry): - return True - - -class FieldEntrySelector(EntrySelector): - """ - Selector that accepts only entries that match provided field - specifier(s). Only a limited set of specifiers is supported for now: - ::=[] - ::=[] - is a valid identifier - ::= "int" | "str" - ::= "=" - ::= "," - ::= ":" - ::= | - ::= - ::= "-" - is a string without spaces and special symbols - (e.g. , , , ) - """ - - _SPEC_DELIM = "," - _TYPE_DELIM = ":" - _RANGE_DELIM = "-" - _EQUAL = "=" - _ERROR_PREFIX = "Invalid field selector specifier" - - class _FieldEntryValuePredicate: - """ - Predicate that checks strict equality for the specified entry field - """ - - def __init__(self, name: str, typespec: Optional[str], value: str): - import builtins - - self.name = name - self.type = getattr(builtins, typespec) if typespec is not None else str - self.value = value - - def __call__(self, entry): - return entry[self.name] == self.type(self.value) - - class _FieldEntryRangePredicate: - """ - Predicate that checks whether an entry field falls into the specified range - """ - - def __init__(self, name: str, typespec: Optional[str], vmin: str, vmax: str): - import builtins - - self.name = name - self.type = getattr(builtins, typespec) if typespec is not None else str - self.vmin = vmin - self.vmax = vmax - - def __call__(self, entry): - return (entry[self.name] >= self.type(self.vmin)) and ( - entry[self.name] <= self.type(self.vmax) - ) - - def __init__(self, spec: str): - self._predicates = self._parse_specifier_into_predicates(spec) - - def __call__(self, entry: Dict[str, Any]): - for predicate in self._predicates: - if not predicate(entry): - return False - return True - - def _parse_specifier_into_predicates(self, spec: str): - predicates = [] - specs = spec.split(self._SPEC_DELIM) - for subspec in specs: - eq_idx = subspec.find(self._EQUAL) - if eq_idx > 0: - field_name_with_type = subspec[:eq_idx] - field_name, field_type = self._parse_field_name_type(field_name_with_type) - field_value_or_range = subspec[eq_idx + 1 :] - if self._is_range_spec(field_value_or_range): - vmin, vmax = self._get_range_spec(field_value_or_range) - predicate = FieldEntrySelector._FieldEntryRangePredicate( - field_name, field_type, vmin, vmax - ) - else: - predicate = FieldEntrySelector._FieldEntryValuePredicate( - field_name, field_type, field_value_or_range - ) - predicates.append(predicate) - elif eq_idx == 0: - self._parse_error(f'"{subspec}", field name is empty!') - else: - self._parse_error(f'"{subspec}", should have format ' "=!") - return predicates - - def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]: - type_delim_idx = field_name_with_type.find(self._TYPE_DELIM) - if type_delim_idx > 0: - field_name = field_name_with_type[:type_delim_idx] - field_type = field_name_with_type[type_delim_idx + 1 :] - elif type_delim_idx == 0: - self._parse_error(f'"{field_name_with_type}", field name is empty!') - else: - field_name = field_name_with_type - field_type = None - # pyre-fixme[61]: `field_name` may not be initialized here. - # pyre-fixme[61]: `field_type` may not be initialized here. - return field_name, field_type - - def _is_range_spec(self, field_value_or_range): - delim_idx = field_value_or_range.find(self._RANGE_DELIM) - return delim_idx > 0 - - def _get_range_spec(self, field_value_or_range): - if self._is_range_spec(field_value_or_range): - delim_idx = field_value_or_range.find(self._RANGE_DELIM) - vmin = field_value_or_range[:delim_idx] - vmax = field_value_or_range[delim_idx + 1 :] - return vmin, vmax - else: - self._parse_error('"field_value_or_range", range of values expected!') - - def _parse_error(self, msg): - raise ValueError(f"{self._ERROR_PREFIX}: {msg}") diff --git a/detectron2/projects/DensePose/densepose/utils/logger.py b/detectron2/projects/DensePose/densepose/utils/logger.py deleted file mode 100644 index 7aad2c0895afff0514c59b10cc80d01e47d50918..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/utils/logger.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import logging - - -def verbosity_to_level(verbosity) -> int: - if verbosity is not None: - if verbosity == 0: - return logging.WARNING - elif verbosity == 1: - return logging.INFO - elif verbosity >= 2: - return logging.DEBUG - return logging.WARNING diff --git a/detectron2/projects/DensePose/densepose/utils/transform.py b/detectron2/projects/DensePose/densepose/utils/transform.py deleted file mode 100644 index 5f8a8ba038588bf8c014390f8b8feadfcdc40307..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/utils/transform.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -from detectron2.data import MetadataCatalog -from detectron2.utils.file_io import PathManager - -from densepose import DensePoseTransformData - - -def load_for_dataset(dataset_name): - path = MetadataCatalog.get(dataset_name).densepose_transform_src - densepose_transform_data_fpath = PathManager.get_local_path(path) - return DensePoseTransformData.load(densepose_transform_data_fpath) - - -def load_from_cfg(cfg): - return load_for_dataset(cfg.DATASETS.TEST[0]) diff --git a/detectron2/projects/DensePose/densepose/vis/__init__.py b/detectron2/projects/DensePose/densepose/vis/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/detectron2/projects/DensePose/densepose/vis/base.py b/detectron2/projects/DensePose/densepose/vis/base.py deleted file mode 100644 index 6a7b07000c41f49386de5d7752c0d277b9da1979..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/vis/base.py +++ /dev/null @@ -1,193 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import logging -import numpy as np -import cv2 -import torch - -Image = np.ndarray -Boxes = torch.Tensor - - -class MatrixVisualizer: - """ - Base visualizer for matrix data - """ - - def __init__( - self, - inplace=True, - cmap=cv2.COLORMAP_PARULA, - val_scale=1.0, - alpha=0.7, - interp_method_matrix=cv2.INTER_LINEAR, - interp_method_mask=cv2.INTER_NEAREST, - ): - self.inplace = inplace - self.cmap = cmap - self.val_scale = val_scale - self.alpha = alpha - self.interp_method_matrix = interp_method_matrix - self.interp_method_mask = interp_method_mask - - def visualize(self, image_bgr, mask, matrix, bbox_xywh): - self._check_image(image_bgr) - self._check_mask_matrix(mask, matrix) - if self.inplace: - image_target_bgr = image_bgr - else: - image_target_bgr = image_bgr * 0 - x, y, w, h = [int(v) for v in bbox_xywh] - if w <= 0 or h <= 0: - return image_bgr - mask, matrix = self._resize(mask, matrix, w, h) - mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3]) - matrix_scaled = matrix.astype(np.float32) * self.val_scale - _EPSILON = 1e-6 - if np.any(matrix_scaled > 255 + _EPSILON): - logger = logging.getLogger(__name__) - logger.warning( - f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]" - ) - matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8) - matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap) - matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg] - image_target_bgr[y : y + h, x : x + w, :] = ( - image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha - ) - return image_target_bgr.astype(np.uint8) - - def _resize(self, mask, matrix, w, h): - if (w != mask.shape[1]) or (h != mask.shape[0]): - mask = cv2.resize(mask, (w, h), self.interp_method_mask) - if (w != matrix.shape[1]) or (h != matrix.shape[0]): - matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix) - return mask, matrix - - def _check_image(self, image_rgb): - assert len(image_rgb.shape) == 3 - assert image_rgb.shape[2] == 3 - assert image_rgb.dtype == np.uint8 - - def _check_mask_matrix(self, mask, matrix): - assert len(matrix.shape) == 2 - assert len(mask.shape) == 2 - assert mask.dtype == np.uint8 - - -class RectangleVisualizer: - - _COLOR_GREEN = (18, 127, 15) - - def __init__(self, color=_COLOR_GREEN, thickness=1): - self.color = color - self.thickness = thickness - - def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None): - x, y, w, h = bbox_xywh - color = color or self.color - thickness = thickness or self.thickness - cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness) - return image_bgr - - -class PointsVisualizer: - - _COLOR_GREEN = (18, 127, 15) - - def __init__(self, color_bgr=_COLOR_GREEN, r=5): - self.color_bgr = color_bgr - self.r = r - - def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None): - for j, pt_xy in enumerate(pts_xy): - x, y = pt_xy - color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr - r = rs[j] if rs is not None else self.r - cv2.circle(image_bgr, (x, y), r, color_bgr, -1) - return image_bgr - - -class TextVisualizer: - - _COLOR_GRAY = (218, 227, 218) - _COLOR_WHITE = (255, 255, 255) - - def __init__( - self, - font_face=cv2.FONT_HERSHEY_SIMPLEX, - font_color_bgr=_COLOR_GRAY, - font_scale=0.35, - font_line_type=cv2.LINE_AA, - font_line_thickness=1, - fill_color_bgr=_COLOR_WHITE, - fill_color_transparency=1.0, - frame_color_bgr=_COLOR_WHITE, - frame_color_transparency=1.0, - frame_thickness=1, - ): - self.font_face = font_face - self.font_color_bgr = font_color_bgr - self.font_scale = font_scale - self.font_line_type = font_line_type - self.font_line_thickness = font_line_thickness - self.fill_color_bgr = fill_color_bgr - self.fill_color_transparency = fill_color_transparency - self.frame_color_bgr = frame_color_bgr - self.frame_color_transparency = frame_color_transparency - self.frame_thickness = frame_thickness - - def visualize(self, image_bgr, txt, topleft_xy): - txt_w, txt_h = self.get_text_size_wh(txt) - topleft_xy = tuple(map(int, topleft_xy)) - x, y = topleft_xy - if self.frame_color_transparency < 1.0: - t = self.frame_thickness - image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = ( - image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] - * self.frame_color_transparency - + np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency) - ).astype(float) - if self.fill_color_transparency < 1.0: - image_bgr[y : y + txt_h, x : x + txt_w, :] = ( - image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency - + np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency) - ).astype(float) - cv2.putText( - image_bgr, - txt, - topleft_xy, - self.font_face, - self.font_scale, - self.font_color_bgr, - self.font_line_thickness, - self.font_line_type, - ) - return image_bgr - - def get_text_size_wh(self, txt): - ((txt_w, txt_h), _) = cv2.getTextSize( - txt, self.font_face, self.font_scale, self.font_line_thickness - ) - return txt_w, txt_h - - -class CompoundVisualizer: - def __init__(self, visualizers): - self.visualizers = visualizers - - def visualize(self, image_bgr, data): - assert len(data) == len( - self.visualizers - ), "The number of datas {} should match the number of visualizers" " {}".format( - len(data), len(self.visualizers) - ) - image = image_bgr - for i, visualizer in enumerate(self.visualizers): - image = visualizer.visualize(image, data[i]) - return image - - def __str__(self): - visualizer_str = ", ".join([str(v) for v in self.visualizers]) - return "Compound Visualizer [{}]".format(visualizer_str) diff --git a/detectron2/projects/DensePose/densepose/vis/bounding_box.py b/detectron2/projects/DensePose/densepose/vis/bounding_box.py deleted file mode 100644 index a88ba0ce74b8da539ea3a25c703a9795be8163a6..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/vis/bounding_box.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -from .base import RectangleVisualizer, TextVisualizer - - -class BoundingBoxVisualizer: - def __init__(self): - self.rectangle_visualizer = RectangleVisualizer() - - def visualize(self, image_bgr, boxes_xywh): - for bbox_xywh in boxes_xywh: - image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh) - return image_bgr - - -class ScoredBoundingBoxVisualizer: - def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None, **kwargs): - if bbox_visualizer_params is None: - bbox_visualizer_params = {} - if score_visualizer_params is None: - score_visualizer_params = {} - self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params) - self.visualizer_score = TextVisualizer(**score_visualizer_params) - - def visualize(self, image_bgr, scored_bboxes): - boxes_xywh, box_scores = scored_bboxes - assert len(boxes_xywh) == len( - box_scores - ), "Number of bounding boxes {} should be equal to the number of scores {}".format( - len(boxes_xywh), len(box_scores) - ) - for i, box_xywh in enumerate(boxes_xywh): - score_i = box_scores[i] - image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh) - score_txt = "{0:6.4f}".format(score_i) - topleft_xy = box_xywh[0], box_xywh[1] - image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy) - return image_bgr diff --git a/detectron2/projects/DensePose/densepose/vis/densepose_data_points.py b/detectron2/projects/DensePose/densepose/vis/densepose_data_points.py deleted file mode 100644 index de809f64ee09a50291999774d91443e3edd869ea..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/vis/densepose_data_points.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import numpy as np -from typing import Iterable, Optional, Tuple -import cv2 - -from densepose.structures import DensePoseDataRelative - -from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer - - -class DensePoseDataCoarseSegmentationVisualizer: - """ - Visualizer for ground truth segmentation - """ - - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs): - self.mask_visualizer = MatrixVisualizer( - inplace=inplace, - cmap=cmap, - val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS, - alpha=alpha, - ) - - def visualize( - self, - image_bgr: Image, - bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]], - ) -> Image: - if bbox_densepose_datas is None: - return image_bgr - for bbox_xywh, densepose_data in zip(*bbox_densepose_datas): - matrix = densepose_data.segm.numpy() - mask = np.zeros(matrix.shape, dtype=np.uint8) - mask[matrix > 0] = 1 - image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy()) - return image_bgr - - -class DensePoseDataPointsVisualizer: - def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA, **kwargs): - self.points_visualizer = PointsVisualizer() - self.densepose_data_to_value_fn = densepose_data_to_value_fn - self.cmap = cmap - - def visualize( - self, - image_bgr: Image, - bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]], - ) -> Image: - if bbox_densepose_datas is None: - return image_bgr - for bbox_xywh, densepose_data in zip(*bbox_densepose_datas): - x0, y0, w, h = bbox_xywh.numpy() - x = densepose_data.x.numpy() * w / 255.0 + x0 - y = densepose_data.y.numpy() * h / 255.0 + y0 - pts_xy = zip(x, y) - if self.densepose_data_to_value_fn is None: - image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy) - else: - v = self.densepose_data_to_value_fn(densepose_data) - img_colors_bgr = cv2.applyColorMap(v, self.cmap) - colors_bgr = [ - [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr - ] - image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr) - return image_bgr - - -def _densepose_data_u_for_cmap(densepose_data): - u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0 - return u.astype(np.uint8) - - -def _densepose_data_v_for_cmap(densepose_data): - v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0 - return v.astype(np.uint8) - - -def _densepose_data_i_for_cmap(densepose_data): - i = ( - np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS) - * 255.0 - / DensePoseDataRelative.N_PART_LABELS - ) - return i.astype(np.uint8) - - -class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer): - def __init__(self, **kwargs): - super(DensePoseDataPointsUVisualizer, self).__init__( - densepose_data_to_value_fn=_densepose_data_u_for_cmap, **kwargs - ) - - -class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer): - def __init__(self, **kwargs): - super(DensePoseDataPointsVVisualizer, self).__init__( - densepose_data_to_value_fn=_densepose_data_v_for_cmap, **kwargs - ) - - -class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer): - def __init__(self, **kwargs): - super(DensePoseDataPointsIVisualizer, self).__init__( - densepose_data_to_value_fn=_densepose_data_i_for_cmap, **kwargs - ) diff --git a/detectron2/projects/DensePose/densepose/vis/densepose_outputs_iuv.py b/detectron2/projects/DensePose/densepose/vis/densepose_outputs_iuv.py deleted file mode 100644 index 960ffba0d4146eda0a4dcd2220c724d944834b33..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/vis/densepose_outputs_iuv.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import numpy as np -from typing import Optional, Tuple -import cv2 - -from densepose.structures import DensePoseDataRelative - -from ..structures import DensePoseChartPredictorOutput -from .base import Boxes, Image, MatrixVisualizer - - -class DensePoseOutputsVisualizer: - def __init__( - self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, to_visualize=None, **kwargs - ): - assert to_visualize in "IUV", "can only visualize IUV" - self.to_visualize = to_visualize - - if self.to_visualize == "I": - val_scale = 255.0 / DensePoseDataRelative.N_PART_LABELS - else: - val_scale = 1.0 - self.mask_visualizer = MatrixVisualizer( - inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha - ) - - def visualize( - self, - image_bgr: Image, - dp_output_with_bboxes: Tuple[Optional[DensePoseChartPredictorOutput], Optional[Boxes]], - ) -> Image: - densepose_output, bboxes_xywh = dp_output_with_bboxes - if densepose_output is None or bboxes_xywh is None: - return image_bgr - - assert isinstance( - densepose_output, DensePoseChartPredictorOutput - ), "DensePoseChartPredictorOutput expected, {} encountered".format(type(densepose_output)) - - S = densepose_output.coarse_segm - I = densepose_output.fine_segm # noqa - U = densepose_output.u - V = densepose_output.v - N = S.size(0) - assert N == I.size( - 0 - ), "densepose outputs S {} and I {}" " should have equal first dim size".format( - S.size(), I.size() - ) - assert N == U.size( - 0 - ), "densepose outputs S {} and U {}" " should have equal first dim size".format( - S.size(), U.size() - ) - assert N == V.size( - 0 - ), "densepose outputs S {} and V {}" " should have equal first dim size".format( - S.size(), V.size() - ) - assert N == len( - bboxes_xywh - ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format( - len(bboxes_xywh), N - ) - for n in range(N): - Sn = S[n].argmax(dim=0) - In = I[n].argmax(dim=0) * (Sn > 0).long() - segmentation = In.cpu().numpy().astype(np.uint8) - mask = np.zeros(segmentation.shape, dtype=np.uint8) - mask[segmentation > 0] = 1 - bbox_xywh = bboxes_xywh[n] - - if self.to_visualize == "I": - vis = segmentation - elif self.to_visualize in "UV": - U_or_Vn = {"U": U, "V": V}[self.to_visualize][n].cpu().numpy().astype(np.float32) - vis = np.zeros(segmentation.shape, dtype=np.float32) - for partId in range(U_or_Vn.shape[0]): - vis[segmentation == partId] = ( - U_or_Vn[partId][segmentation == partId].clip(0, 1) * 255 - ) - - # pyre-fixme[61]: `vis` may not be initialized here. - image_bgr = self.mask_visualizer.visualize(image_bgr, mask, vis, bbox_xywh) - - return image_bgr - - -class DensePoseOutputsUVisualizer(DensePoseOutputsVisualizer): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs): - super().__init__(inplace=inplace, cmap=cmap, alpha=alpha, to_visualize="U", **kwargs) - - -class DensePoseOutputsVVisualizer(DensePoseOutputsVisualizer): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs): - super().__init__(inplace=inplace, cmap=cmap, alpha=alpha, to_visualize="V", **kwargs) - - -class DensePoseOutputsFineSegmentationVisualizer(DensePoseOutputsVisualizer): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs): - super().__init__(inplace=inplace, cmap=cmap, alpha=alpha, to_visualize="I", **kwargs) diff --git a/detectron2/projects/DensePose/densepose/vis/densepose_outputs_vertex.py b/detectron2/projects/DensePose/densepose/vis/densepose_outputs_vertex.py deleted file mode 100644 index fe296fcf81e5711eea21049a1b4de17eb2541b3f..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/vis/densepose_outputs_vertex.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# pyre-unsafe -import json -import numpy as np -from functools import lru_cache -from typing import Dict, List, Optional, Tuple -import cv2 -import torch - -from detectron2.utils.file_io import PathManager - -from densepose.modeling import build_densepose_embedder -from densepose.modeling.cse.utils import get_closest_vertices_mask_from_ES - -from ..data.utils import get_class_to_mesh_name_mapping -from ..structures import DensePoseEmbeddingPredictorOutput -from ..structures.mesh import create_mesh -from .base import Boxes, Image, MatrixVisualizer -from .densepose_results_textures import get_texture_atlas - - -@lru_cache() -def get_xyz_vertex_embedding(mesh_name: str, device: torch.device): - if mesh_name == "smpl_27554": - embed_path = PathManager.get_local_path( - "https://dl.fbaipublicfiles.com/densepose/data/cse/mds_d=256.npy" - ) - embed_map, _ = np.load(embed_path, allow_pickle=True) - embed_map = torch.tensor(embed_map).float()[:, 0] - embed_map -= embed_map.min() - embed_map /= embed_map.max() - else: - mesh = create_mesh(mesh_name, device) - embed_map = mesh.vertices.sum(dim=1) - embed_map -= embed_map.min() - embed_map /= embed_map.max() - embed_map = embed_map**2 - return embed_map - - -class DensePoseOutputsVertexVisualizer: - def __init__( - self, - cfg, - inplace=True, - cmap=cv2.COLORMAP_JET, - alpha=0.7, - device="cuda", - default_class=0, - **kwargs, - ): - self.mask_visualizer = MatrixVisualizer( - inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha - ) - self.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg) - self.embedder = build_densepose_embedder(cfg) - self.device = torch.device(device) - self.default_class = default_class - - self.mesh_vertex_embeddings = { - mesh_name: self.embedder(mesh_name).to(self.device) - for mesh_name in self.class_to_mesh_name.values() - if self.embedder.has_embeddings(mesh_name) - } - - def visualize( - self, - image_bgr: Image, - outputs_boxes_xywh_classes: Tuple[ - Optional[DensePoseEmbeddingPredictorOutput], Optional[Boxes], Optional[List[int]] - ], - ) -> Image: - if outputs_boxes_xywh_classes[0] is None: - return image_bgr - - S, E, N, bboxes_xywh, pred_classes = self.extract_and_check_outputs_and_boxes( - outputs_boxes_xywh_classes - ) - - for n in range(N): - x, y, w, h = bboxes_xywh[n].int().tolist() - mesh_name = self.class_to_mesh_name[pred_classes[n]] - closest_vertices, mask = get_closest_vertices_mask_from_ES( - E[[n]], - S[[n]], - h, - w, - self.mesh_vertex_embeddings[mesh_name], - self.device, - ) - embed_map = get_xyz_vertex_embedding(mesh_name, self.device) - vis = (embed_map[closest_vertices].clip(0, 1) * 255.0).cpu().numpy() - mask_numpy = mask.cpu().numpy().astype(dtype=np.uint8) - image_bgr = self.mask_visualizer.visualize(image_bgr, mask_numpy, vis, [x, y, w, h]) - - return image_bgr - - def extract_and_check_outputs_and_boxes(self, outputs_boxes_xywh_classes): - - densepose_output, bboxes_xywh, pred_classes = outputs_boxes_xywh_classes - - if pred_classes is None: - pred_classes = [self.default_class] * len(bboxes_xywh) - - assert isinstance( - densepose_output, DensePoseEmbeddingPredictorOutput - ), "DensePoseEmbeddingPredictorOutput expected, {} encountered".format( - type(densepose_output) - ) - - S = densepose_output.coarse_segm - E = densepose_output.embedding - N = S.size(0) - assert N == E.size( - 0 - ), "CSE coarse_segm {} and embeddings {}" " should have equal first dim size".format( - S.size(), E.size() - ) - assert N == len( - bboxes_xywh - ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format( - len(bboxes_xywh), N - ) - assert N == len(pred_classes), ( - "number of predicted classes {}" - " should be equal to first dim size of outputs {}".format(len(bboxes_xywh), N) - ) - - return S, E, N, bboxes_xywh, pred_classes - - -def get_texture_atlases(json_str: Optional[str]) -> Optional[Dict[str, Optional[np.ndarray]]]: - """ - json_str is a JSON string representing a mesh_name -> texture_atlas_path dictionary - """ - if json_str is None: - return None - - paths = json.loads(json_str) - return {mesh_name: get_texture_atlas(path) for mesh_name, path in paths.items()} - - -class DensePoseOutputsTextureVisualizer(DensePoseOutputsVertexVisualizer): - def __init__( - self, - cfg, - texture_atlases_dict, - device="cuda", - default_class=0, - **kwargs, - ): - self.embedder = build_densepose_embedder(cfg) - - self.texture_image_dict = {} - self.alpha_dict = {} - - for mesh_name in texture_atlases_dict.keys(): - if texture_atlases_dict[mesh_name].shape[-1] == 4: # Image with alpha channel - self.alpha_dict[mesh_name] = texture_atlases_dict[mesh_name][:, :, -1] / 255.0 - self.texture_image_dict[mesh_name] = texture_atlases_dict[mesh_name][:, :, :3] - else: - self.alpha_dict[mesh_name] = texture_atlases_dict[mesh_name].sum(axis=-1) > 0 - self.texture_image_dict[mesh_name] = texture_atlases_dict[mesh_name] - - self.device = torch.device(device) - self.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg) - self.default_class = default_class - - self.mesh_vertex_embeddings = { - mesh_name: self.embedder(mesh_name).to(self.device) - for mesh_name in self.class_to_mesh_name.values() - } - - def visualize( - self, - image_bgr: Image, - outputs_boxes_xywh_classes: Tuple[ - Optional[DensePoseEmbeddingPredictorOutput], Optional[Boxes], Optional[List[int]] - ], - ) -> Image: - image_target_bgr = image_bgr.copy() - if outputs_boxes_xywh_classes[0] is None: - return image_target_bgr - - S, E, N, bboxes_xywh, pred_classes = self.extract_and_check_outputs_and_boxes( - outputs_boxes_xywh_classes - ) - - meshes = { - p: create_mesh(self.class_to_mesh_name[p], self.device) for p in np.unique(pred_classes) - } - - for n in range(N): - x, y, w, h = bboxes_xywh[n].int().cpu().numpy() - mesh_name = self.class_to_mesh_name[pred_classes[n]] - closest_vertices, mask = get_closest_vertices_mask_from_ES( - E[[n]], - S[[n]], - h, - w, - self.mesh_vertex_embeddings[mesh_name], - self.device, - ) - uv_array = meshes[pred_classes[n]].texcoords[closest_vertices].permute((2, 0, 1)) - uv_array = uv_array.cpu().numpy().clip(0, 1) - textured_image = self.generate_image_with_texture( - image_target_bgr[y : y + h, x : x + w], - uv_array, - mask.cpu().numpy(), - self.class_to_mesh_name[pred_classes[n]], - ) - if textured_image is None: - continue - image_target_bgr[y : y + h, x : x + w] = textured_image - - return image_target_bgr - - def generate_image_with_texture(self, bbox_image_bgr, uv_array, mask, mesh_name): - alpha = self.alpha_dict.get(mesh_name) - texture_image = self.texture_image_dict.get(mesh_name) - if alpha is None or texture_image is None: - return None - U, V = uv_array - x_index = (U * texture_image.shape[1]).astype(int) - y_index = (V * texture_image.shape[0]).astype(int) - local_texture = texture_image[y_index, x_index][mask] - local_alpha = np.expand_dims(alpha[y_index, x_index][mask], -1) - output_image = bbox_image_bgr.copy() - output_image[mask] = output_image[mask] * (1 - local_alpha) + local_texture * local_alpha - return output_image.astype(np.uint8) diff --git a/detectron2/projects/DensePose/densepose/vis/densepose_results.py b/detectron2/projects/DensePose/densepose/vis/densepose_results.py deleted file mode 100644 index d49a3828339b6ff03735924d3621396ca8f00e5c..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/vis/densepose_results.py +++ /dev/null @@ -1,357 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import logging -import numpy as np -from typing import List, Optional, Tuple -import cv2 -import torch - -from densepose.structures import DensePoseDataRelative - -from ..structures import DensePoseChartResult -from .base import Boxes, Image, MatrixVisualizer - - -class DensePoseResultsVisualizer: - def visualize( - self, - image_bgr: Image, - results_and_boxes_xywh: Tuple[Optional[List[DensePoseChartResult]], Optional[Boxes]], - ) -> Image: - densepose_result, boxes_xywh = results_and_boxes_xywh - if densepose_result is None or boxes_xywh is None: - return image_bgr - - boxes_xywh = boxes_xywh.cpu().numpy() - context = self.create_visualization_context(image_bgr) - for i, result in enumerate(densepose_result): - iuv_array = torch.cat( - (result.labels[None].type(torch.float32), result.uv * 255.0) - ).type(torch.uint8) - self.visualize_iuv_arr(context, iuv_array.cpu().numpy(), boxes_xywh[i]) - image_bgr = self.context_to_image_bgr(context) - return image_bgr - - def create_visualization_context(self, image_bgr: Image): - return image_bgr - - def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None: - pass - - def context_to_image_bgr(self, context): - return context - - def get_image_bgr_from_context(self, context): - return context - - -class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer): - def __init__( - self, - data_extractor, - segm_extractor, - inplace=True, - cmap=cv2.COLORMAP_PARULA, - alpha=0.7, - val_scale=1.0, - **kwargs, - ): - self.mask_visualizer = MatrixVisualizer( - inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha - ) - self.data_extractor = data_extractor - self.segm_extractor = segm_extractor - - def context_to_image_bgr(self, context): - return context - - def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None: - image_bgr = self.get_image_bgr_from_context(context) - matrix = self.data_extractor(iuv_arr) - segm = self.segm_extractor(iuv_arr) - mask = np.zeros(matrix.shape, dtype=np.uint8) - mask[segm > 0] = 1 - image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh) - - -def _extract_i_from_iuvarr(iuv_arr): - return iuv_arr[0, :, :] - - -def _extract_u_from_iuvarr(iuv_arr): - return iuv_arr[1, :, :] - - -def _extract_v_from_iuvarr(iuv_arr): - return iuv_arr[2, :, :] - - -class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer): - def __init__(self, levels=10, **kwargs): - self.levels = levels - self.plot_args = kwargs - - def create_visualization_context(self, image_bgr: Image): - import matplotlib.pyplot as plt - from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas - - context = {} - context["image_bgr"] = image_bgr - dpi = 100 - height_inches = float(image_bgr.shape[0]) / dpi - width_inches = float(image_bgr.shape[1]) / dpi - fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi) - plt.axes([0, 0, 1, 1]) - plt.axis("off") - context["fig"] = fig - canvas = FigureCanvas(fig) - context["canvas"] = canvas - extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0) - plt.imshow(image_bgr[:, :, ::-1], extent=extent) - return context - - def context_to_image_bgr(self, context): - fig = context["fig"] - w, h = map(int, fig.get_size_inches() * fig.get_dpi()) - canvas = context["canvas"] - canvas.draw() - image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8") - image_rgb = image_1d.reshape(h, w, 3) - image_bgr = image_rgb[:, :, ::-1].copy() - return image_bgr - - def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> None: - import matplotlib.pyplot as plt - - u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0 - v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0 - extent = ( - bbox_xywh[0], - bbox_xywh[0] + bbox_xywh[2], - bbox_xywh[1], - bbox_xywh[1] + bbox_xywh[3], - ) - plt.contour(u, self.levels, extent=extent, **self.plot_args) - plt.contour(v, self.levels, extent=extent, **self.plot_args) - - -class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer): - """ - Contour visualization using marching squares - """ - - def __init__(self, levels=10, **kwargs): - # TODO: colormap is hardcoded - cmap = cv2.COLORMAP_PARULA - if isinstance(levels, int): - self.levels = np.linspace(0, 1, levels) - else: - self.levels = levels - if "linewidths" in kwargs: - self.linewidths = kwargs["linewidths"] - else: - self.linewidths = [1] * len(self.levels) - self.plot_args = kwargs - img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap) - self.level_colors_bgr = [ - [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr - ] - - def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> None: - image_bgr = self.get_image_bgr_from_context(context) - segm = _extract_i_from_iuvarr(iuv_arr) - u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0 - v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0 - self._contours(image_bgr, u, segm, bbox_xywh) - self._contours(image_bgr, v, segm, bbox_xywh) - - def _contours(self, image_bgr, arr, segm, bbox_xywh): - for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1): - mask = segm == part_idx - if not np.any(mask): - continue - arr_min = np.amin(arr[mask]) - arr_max = np.amax(arr[mask]) - I, J = np.nonzero(mask) - i0 = np.amin(I) - i1 = np.amax(I) + 1 - j0 = np.amin(J) - j1 = np.amax(J) + 1 - if (j1 == j0 + 1) or (i1 == i0 + 1): - continue - Nw = arr.shape[1] - 1 - Nh = arr.shape[0] - 1 - for level_idx, level in enumerate(self.levels): - if (level < arr_min) or (level > arr_max): - continue - vp = arr[i0:i1, j0:j1] >= level - bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8 - mp = mask[i0:i1, j0:j1] - bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8 - it = np.nditer(bin_codes, flags=["multi_index"]) - color_bgr = self.level_colors_bgr[level_idx] - linewidth = self.linewidths[level_idx] - while not it.finished: - if (it[0] != 0) and (it[0] != 15): - i, j = it.multi_index - if bin_mask_codes[i, j] != 0: - self._draw_line( - image_bgr, - arr, - mask, - level, - color_bgr, - linewidth, - it[0], - it.multi_index, - bbox_xywh, - Nw, - Nh, - (i0, j0), - ) - it.iternext() - - def _draw_line( - self, - image_bgr, - arr, - mask, - v, - color_bgr, - linewidth, - bin_code, - multi_idx, - bbox_xywh, - Nw, - Nh, - offset, - ): - lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset) - x0, y0, w, h = bbox_xywh - x1 = x0 + w - y1 = y0 + h - for line in lines: - x0r, y0r = line[0] - x1r, y1r = line[1] - pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0))) - pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0))) - cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth) - - def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset): - i0, j0 = offset - i, j = multi_idx - i += i0 - j += j0 - v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1] - x0i = float(j) / Nw - y0j = float(i) / Nh - He = 1.0 / Nh - We = 1.0 / Nw - if (bin_code == 1) or (bin_code == 14): - a = (v - v0) / (v1 - v0) - b = (v - v0) / (v3 - v0) - pt1 = (x0i, y0j + a * He) - pt2 = (x0i + b * We, y0j) - return [(pt1, pt2)] - elif (bin_code == 2) or (bin_code == 13): - a = (v - v0) / (v1 - v0) - b = (v - v1) / (v2 - v1) - pt1 = (x0i, y0j + a * He) - pt2 = (x0i + b * We, y0j + He) - return [(pt1, pt2)] - elif (bin_code == 3) or (bin_code == 12): - a = (v - v0) / (v3 - v0) - b = (v - v1) / (v2 - v1) - pt1 = (x0i + a * We, y0j) - pt2 = (x0i + b * We, y0j + He) - return [(pt1, pt2)] - elif (bin_code == 4) or (bin_code == 11): - a = (v - v1) / (v2 - v1) - b = (v - v3) / (v2 - v3) - pt1 = (x0i + a * We, y0j + He) - pt2 = (x0i + We, y0j + b * He) - return [(pt1, pt2)] - elif (bin_code == 6) or (bin_code == 9): - a = (v - v0) / (v1 - v0) - b = (v - v3) / (v2 - v3) - pt1 = (x0i, y0j + a * He) - pt2 = (x0i + We, y0j + b * He) - return [(pt1, pt2)] - elif (bin_code == 7) or (bin_code == 8): - a = (v - v0) / (v3 - v0) - b = (v - v3) / (v2 - v3) - pt1 = (x0i + a * We, y0j) - pt2 = (x0i + We, y0j + b * He) - return [(pt1, pt2)] - elif bin_code == 5: - a1 = (v - v0) / (v1 - v0) - b1 = (v - v1) / (v2 - v1) - pt11 = (x0i, y0j + a1 * He) - pt12 = (x0i + b1 * We, y0j + He) - a2 = (v - v0) / (v3 - v0) - b2 = (v - v3) / (v2 - v3) - pt21 = (x0i + a2 * We, y0j) - pt22 = (x0i + We, y0j + b2 * He) - return [(pt11, pt12), (pt21, pt22)] - elif bin_code == 10: - a1 = (v - v0) / (v3 - v0) - b1 = (v - v0) / (v1 - v0) - pt11 = (x0i + a1 * We, y0j) - pt12 = (x0i, y0j + b1 * He) - a2 = (v - v1) / (v2 - v1) - b2 = (v - v3) / (v2 - v3) - pt21 = (x0i + a2 * We, y0j + He) - pt22 = (x0i + We, y0j + b2 * He) - return [(pt11, pt12), (pt21, pt22)] - return [] - - -try: - import matplotlib - - matplotlib.use("Agg") - DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer -except ModuleNotFoundError: - logger = logging.getLogger(__name__) - logger.warning("Could not import matplotlib, using custom contour visualizer") - DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer - - -class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs): - super(DensePoseResultsFineSegmentationVisualizer, self).__init__( - _extract_i_from_iuvarr, - _extract_i_from_iuvarr, - inplace, - cmap, - alpha, - val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS, - **kwargs, - ) - - -class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs): - super(DensePoseResultsUVisualizer, self).__init__( - _extract_u_from_iuvarr, - _extract_i_from_iuvarr, - inplace, - cmap, - alpha, - val_scale=1.0, - **kwargs, - ) - - -class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs): - super(DensePoseResultsVVisualizer, self).__init__( - _extract_v_from_iuvarr, - _extract_i_from_iuvarr, - inplace, - cmap, - alpha, - val_scale=1.0, - **kwargs, - ) diff --git a/detectron2/projects/DensePose/densepose/vis/densepose_results_textures.py b/detectron2/projects/DensePose/densepose/vis/densepose_results_textures.py deleted file mode 100644 index aa33b861100b796f411f3aade1c03a68c279262e..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/vis/densepose_results_textures.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import numpy as np -from typing import List, Optional, Tuple -import torch - -from detectron2.data.detection_utils import read_image - -from ..structures import DensePoseChartResult -from .base import Boxes, Image -from .densepose_results import DensePoseResultsVisualizer - - -def get_texture_atlas(path: Optional[str]) -> Optional[np.ndarray]: - if path is None: - return None - - # Reading images like that downsamples 16-bit images to 8-bit - # If 16-bit images are needed, we can replace that by cv2.imread with the - # cv2.IMREAD_UNCHANGED flag (with cv2 we also need it to keep alpha channels) - # The rest of the pipeline would need to be adapted to 16-bit images too - bgr_image = read_image(path) - rgb_image = np.copy(bgr_image) # Convert BGR -> RGB - rgb_image[:, :, :3] = rgb_image[:, :, 2::-1] # Works with alpha channel - return rgb_image - - -class DensePoseResultsVisualizerWithTexture(DensePoseResultsVisualizer): - """ - texture_atlas: An image, size 6N * 4N, with N * N squares for each of the 24 body parts. - It must follow the grid found at https://github.com/facebookresearch/DensePose/blob/master/DensePoseData/demo_data/texture_atlas_200.png # noqa - For each body part, U is proportional to the x coordinate, and (1 - V) to y - """ - - def __init__(self, texture_atlas, **kwargs): - self.texture_atlas = texture_atlas - self.body_part_size = texture_atlas.shape[0] // 6 - assert self.body_part_size == texture_atlas.shape[1] // 4 - - def visualize( - self, - image_bgr: Image, - results_and_boxes_xywh: Tuple[Optional[List[DensePoseChartResult]], Optional[Boxes]], - ) -> Image: - densepose_result, boxes_xywh = results_and_boxes_xywh - if densepose_result is None or boxes_xywh is None: - return image_bgr - - boxes_xywh = boxes_xywh.int().cpu().numpy() - texture_image, alpha = self.get_texture() - for i, result in enumerate(densepose_result): - iuv_array = torch.cat((result.labels[None], result.uv.clamp(0, 1))) - x, y, w, h = boxes_xywh[i] - bbox_image = image_bgr[y : y + h, x : x + w] - image_bgr[y : y + h, x : x + w] = self.generate_image_with_texture( - texture_image, alpha, bbox_image, iuv_array.cpu().numpy() - ) - return image_bgr - - def get_texture(self): - N = self.body_part_size - texture_image = np.zeros([24, N, N, self.texture_atlas.shape[-1]]) - for i in range(4): - for j in range(6): - texture_image[(6 * i + j), :, :, :] = self.texture_atlas[ - N * j : N * (j + 1), N * i : N * (i + 1), : - ] - - if texture_image.shape[-1] == 4: # Image with alpha channel - alpha = texture_image[:, :, :, -1] / 255.0 - texture_image = texture_image[:, :, :, :3] - else: - alpha = texture_image.sum(axis=-1) > 0 - - return texture_image, alpha - - def generate_image_with_texture(self, texture_image, alpha, bbox_image_bgr, iuv_array): - - I, U, V = iuv_array - generated_image_bgr = bbox_image_bgr.copy() - - for PartInd in range(1, 25): - x, y = np.where(I == PartInd) - x_index = (U[x, y] * (self.body_part_size - 1)).astype(int) - y_index = ((1 - V[x, y]) * (self.body_part_size - 1)).astype(int) - part_alpha = np.expand_dims(alpha[PartInd - 1, y_index, x_index], -1) - generated_image_bgr[I == PartInd] = ( - generated_image_bgr[I == PartInd] * (1 - part_alpha) - + texture_image[PartInd - 1, y_index, x_index] * part_alpha - ) - - return generated_image_bgr.astype(np.uint8) diff --git a/detectron2/projects/DensePose/densepose/vis/extractor.py b/detectron2/projects/DensePose/densepose/vis/extractor.py deleted file mode 100644 index cdc52a51955750a178521b8ed9442b31dd9f1ebb..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/densepose/vis/extractor.py +++ /dev/null @@ -1,201 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -# pyre-unsafe -import logging -from typing import List, Optional, Sequence, Tuple -import torch - -from detectron2.layers.nms import batched_nms -from detectron2.structures.instances import Instances - -from densepose.converters import ToChartResultConverterWithConfidences -from densepose.structures import ( - DensePoseChartResultWithConfidences, - DensePoseEmbeddingPredictorOutput, -) -from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer -from densepose.vis.densepose_outputs_vertex import DensePoseOutputsVertexVisualizer -from densepose.vis.densepose_results import DensePoseResultsVisualizer - -from .base import CompoundVisualizer - -Scores = Sequence[float] -DensePoseChartResultsWithConfidences = List[DensePoseChartResultWithConfidences] - - -def extract_scores_from_instances(instances: Instances, select=None): - if instances.has("scores"): - return instances.scores if select is None else instances.scores[select] - return None - - -def extract_boxes_xywh_from_instances(instances: Instances, select=None): - if instances.has("pred_boxes"): - boxes_xywh = instances.pred_boxes.tensor.clone() - boxes_xywh[:, 2] -= boxes_xywh[:, 0] - boxes_xywh[:, 3] -= boxes_xywh[:, 1] - return boxes_xywh if select is None else boxes_xywh[select] - return None - - -def create_extractor(visualizer: object): - """ - Create an extractor for the provided visualizer - """ - if isinstance(visualizer, CompoundVisualizer): - extractors = [create_extractor(v) for v in visualizer.visualizers] - return CompoundExtractor(extractors) - elif isinstance(visualizer, DensePoseResultsVisualizer): - return DensePoseResultExtractor() - elif isinstance(visualizer, ScoredBoundingBoxVisualizer): - return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances]) - elif isinstance(visualizer, BoundingBoxVisualizer): - return extract_boxes_xywh_from_instances - elif isinstance(visualizer, DensePoseOutputsVertexVisualizer): - return DensePoseOutputsExtractor() - else: - logger = logging.getLogger(__name__) - logger.error(f"Could not create extractor for {visualizer}") - return None - - -class BoundingBoxExtractor: - """ - Extracts bounding boxes from instances - """ - - def __call__(self, instances: Instances): - boxes_xywh = extract_boxes_xywh_from_instances(instances) - return boxes_xywh - - -class ScoredBoundingBoxExtractor: - """ - Extracts bounding boxes from instances - """ - - def __call__(self, instances: Instances, select=None): - scores = extract_scores_from_instances(instances) - boxes_xywh = extract_boxes_xywh_from_instances(instances) - if (scores is None) or (boxes_xywh is None): - return (boxes_xywh, scores) - if select is not None: - scores = scores[select] - boxes_xywh = boxes_xywh[select] - return (boxes_xywh, scores) - - -class DensePoseResultExtractor: - """ - Extracts DensePose chart result with confidences from instances - """ - - def __call__( - self, instances: Instances, select=None - ) -> Tuple[Optional[DensePoseChartResultsWithConfidences], Optional[torch.Tensor]]: - if instances.has("pred_densepose") and instances.has("pred_boxes"): - dpout = instances.pred_densepose - boxes_xyxy = instances.pred_boxes - boxes_xywh = extract_boxes_xywh_from_instances(instances) - if select is not None: - dpout = dpout[select] - boxes_xyxy = boxes_xyxy[select] - converter = ToChartResultConverterWithConfidences() - results = [converter.convert(dpout[i], boxes_xyxy[[i]]) for i in range(len(dpout))] - return results, boxes_xywh - else: - return None, None - - -class DensePoseOutputsExtractor: - """ - Extracts DensePose result from instances - """ - - def __call__( - self, - instances: Instances, - select=None, - ) -> Tuple[ - Optional[DensePoseEmbeddingPredictorOutput], Optional[torch.Tensor], Optional[List[int]] - ]: - if not (instances.has("pred_densepose") and instances.has("pred_boxes")): - return None, None, None - - dpout = instances.pred_densepose - boxes_xyxy = instances.pred_boxes - boxes_xywh = extract_boxes_xywh_from_instances(instances) - - if instances.has("pred_classes"): - classes = instances.pred_classes.tolist() - else: - classes = None - - if select is not None: - dpout = dpout[select] - boxes_xyxy = boxes_xyxy[select] - if classes is not None: - classes = classes[select] - - return dpout, boxes_xywh, classes - - -class CompoundExtractor: - """ - Extracts data for CompoundVisualizer - """ - - def __init__(self, extractors): - self.extractors = extractors - - def __call__(self, instances: Instances, select=None): - datas = [] - for extractor in self.extractors: - data = extractor(instances, select) - datas.append(data) - return datas - - -class NmsFilteredExtractor: - """ - Extracts data in the format accepted by NmsFilteredVisualizer - """ - - def __init__(self, extractor, iou_threshold): - self.extractor = extractor - self.iou_threshold = iou_threshold - - def __call__(self, instances: Instances, select=None): - scores = extract_scores_from_instances(instances) - boxes_xywh = extract_boxes_xywh_from_instances(instances) - if boxes_xywh is None: - return None - select_local_idx = batched_nms( - boxes_xywh, - scores, - torch.zeros(len(scores), dtype=torch.int32), - iou_threshold=self.iou_threshold, - ).squeeze() - select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device) - select_local[select_local_idx] = True - select = select_local if select is None else (select & select_local) - return self.extractor(instances, select=select) - - -class ScoreThresholdedExtractor: - """ - Extracts data in the format accepted by ScoreThresholdedVisualizer - """ - - def __init__(self, extractor, min_score): - self.extractor = extractor - self.min_score = min_score - - def __call__(self, instances: Instances, select=None): - scores = extract_scores_from_instances(instances) - if scores is None: - return None - select_local = scores > self.min_score - select = select_local if select is None else (select & select_local) - data = self.extractor(instances, select=select) - return data diff --git a/detectron2/projects/DensePose/dev/README.md b/detectron2/projects/DensePose/dev/README.md deleted file mode 100644 index e3a94b67ed4b4d0c2934f074802cd00f3660f9a9..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/dev/README.md +++ /dev/null @@ -1,7 +0,0 @@ - -## Some scripts for developers to use, include: - -- `run_instant_tests.sh`: run training for a few iterations. -- `run_inference_tests.sh`: run inference on a small dataset. -- `../../dev/linter.sh`: lint the codebase before commit -- `../../dev/parse_results.sh`: parse results from log file. diff --git a/detectron2/projects/DensePose/dev/run_inference_tests.sh b/detectron2/projects/DensePose/dev/run_inference_tests.sh deleted file mode 100644 index 46556b80a3ee793bdf6a79f5de2ec88cac902189..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/dev/run_inference_tests.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. - -BIN="python train_net.py" -OUTPUT="inference_test_output" -NUM_GPUS=2 -IMS_PER_GPU=2 -IMS_PER_BATCH=$(( NUM_GPUS * IMS_PER_GPU )) - -CFG_LIST=( "${@:1}" ) - -if [ ${#CFG_LIST[@]} -eq 0 ]; then - CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml ) -fi - -echo "========================================================================" -echo "Configs to run:" -echo "${CFG_LIST[@]}" -echo "========================================================================" - -for cfg in "${CFG_LIST[@]}"; do - echo "========================================================================" - echo "Running $cfg ..." - echo "========================================================================" - $BIN \ - --eval-only \ - --num-gpus $NUM_GPUS \ - --config-file "$cfg" \ - OUTPUT_DIR "$OUTPUT" \ - SOLVER.IMS_PER_BATCH $IMS_PER_BATCH - rm -rf $OUTPUT -done - diff --git a/detectron2/projects/DensePose/dev/run_instant_tests.sh b/detectron2/projects/DensePose/dev/run_instant_tests.sh deleted file mode 100644 index 23a9c67cefe3cfca790181c90b27f2471d8a7771..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/dev/run_instant_tests.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. - -BIN="python train_net.py" -OUTPUT="instant_test_output" -NUM_GPUS=2 -SOLVER_IMS_PER_BATCH=$((NUM_GPUS * 2)) - -CFG_LIST=( "${@:1}" ) -if [ ${#CFG_LIST[@]} -eq 0 ]; then - CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml ) -fi - -echo "========================================================================" -echo "Configs to run:" -echo "${CFG_LIST[@]}" -echo "========================================================================" - -for cfg in "${CFG_LIST[@]}"; do - echo "========================================================================" - echo "Running $cfg ..." - echo "========================================================================" - $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \ - SOLVER.IMS_PER_BATCH $SOLVER_IMS_PER_BATCH \ - OUTPUT_DIR "$OUTPUT" - rm -rf "$OUTPUT" -done - diff --git a/detectron2/projects/DensePose/doc/BOOTSTRAPPING_PIPELINE.md b/detectron2/projects/DensePose/doc/BOOTSTRAPPING_PIPELINE.md deleted file mode 100644 index a1326862abe5479140269f5e6af50b68e7c2d0aa..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/doc/BOOTSTRAPPING_PIPELINE.md +++ /dev/null @@ -1,197 +0,0 @@ -# Bootstrapping Pipeline - -Bootstrapping pipeline for DensePose was proposed in -[Sanakoyeu et al., 2020](https://arxiv.org/pdf/2003.00080.pdf) -to extend DensePose from humans to proximal animal classes -(chimpanzees). Currently, the pipeline is only implemented for -[chart-based models](DENSEPOSE_IUV.md). -Bootstrapping proceeds in two steps. - -## Master Model Training - -Master model is trained on data from source domain (humans) -and supporting domain (animals). Instances from the source domain -contain full DensePose annotations (`S`, `I`, `U` and `V`) and -instances from the supporting domain have segmentation annotations only. -To ensure segmentation quality in the target domain, only a subset of -supporting domain classes is included into the training. This is achieved -through category filters, e.g. -(see [configs/evolution/Base-RCNN-FPN-Atop10P_CA.yaml](../configs/evolution/Base-RCNN-FPN-Atop10P_CA.yaml)): - -``` - WHITELISTED_CATEGORIES: - "base_coco_2017_train": - - 1 # person - - 16 # bird - - 17 # cat - - 18 # dog - - 19 # horse - - 20 # sheep - - 21 # cow - - 22 # elephant - - 23 # bear - - 24 # zebra - - 25 # girafe -``` -The acronym `Atop10P` in config file names indicates that categories are filtered to -only contain top 10 animals and person. - -The training is performed in a *class-agnostic* manner: all instances -are mapped into the same class (person), e.g. -(see [configs/evolution/Base-RCNN-FPN-Atop10P_CA.yaml](../configs/evolution/Base-RCNN-FPN-Atop10P_CA.yaml)): - -``` - CATEGORY_MAPS: - "base_coco_2017_train": - "16": 1 # bird -> person - "17": 1 # cat -> person - "18": 1 # dog -> person - "19": 1 # horse -> person - "20": 1 # sheep -> person - "21": 1 # cow -> person - "22": 1 # elephant -> person - "23": 1 # bear -> person - "24": 1 # zebra -> person - "25": 1 # girafe -> person -``` -The acronym `CA` in config file names indicates that the training is class-agnostic. - -## Student Model Training - -Student model is trained on data from source domain (humans), -supporting domain (animals) and target domain (chimpanzees). -Annotations in source and supporting domains are similar to the ones -used for the master model training. -Annotations in target domain are obtained by applying the master model -to images that contain instances from the target category and sampling -sparse annotations from dense results. This process is called *bootstrapping*. -Below we give details on how the bootstrapping pipeline is implemented. - -### Data Loaders - -The central components that enable bootstrapping are -[`InferenceBasedLoader`](../densepose/data/inference_based_loader.py) and -[`CombinedDataLoader`](../densepose/data/combined_loader.py). - -`InferenceBasedLoader` takes images from a data loader, applies a model -to the images, filters the model outputs based on the selected criteria and -samples the filtered outputs to produce annotations. - -`CombinedDataLoader` combines data obtained from the loaders based on specified -ratios. The standard data loader has the default ratio of 1.0, -ratios for bootstrap datasets are specified in the configuration file. -The higher the ratio the higher the probability to include samples from the -particular data loader into a batch. - -Here is an example of the bootstrapping configuration taken from -[`configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_uniform.yaml`](../configs/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_uniform.yaml): -``` -BOOTSTRAP_DATASETS: - - DATASET: "chimpnsee" - RATIO: 1.0 - IMAGE_LOADER: - TYPE: "video_keyframe" - SELECT: - STRATEGY: "random_k" - NUM_IMAGES: 4 - TRANSFORM: - TYPE: "resize" - MIN_SIZE: 800 - MAX_SIZE: 1333 - BATCH_SIZE: 8 - NUM_WORKERS: 1 - INFERENCE: - INPUT_BATCH_SIZE: 1 - OUTPUT_BATCH_SIZE: 1 - DATA_SAMPLER: - # supported types: - # densepose_uniform - # densepose_UV_confidence - # densepose_fine_segm_confidence - # densepose_coarse_segm_confidence - TYPE: "densepose_uniform" - COUNT_PER_CLASS: 8 - FILTER: - TYPE: "detection_score" - MIN_VALUE: 0.8 -BOOTSTRAP_MODEL: - WEIGHTS: https://dl.fbaipublicfiles.com/densepose/evolution/densepose_R_50_FPN_DL_WC1M_3x_Atop10P_CA/217578784/model_final_9fe1cc.pkl -``` - -The above example has one bootstrap dataset (`chimpnsee`). This dataset is registered as -a [VIDEO_LIST](../densepose/data/datasets/chimpnsee.py) dataset, which means that -it consists of a number of videos specified in a text file. For videos there can be -different strategies to sample individual images. Here we use `video_keyframe` strategy -which considers only keyframes; this ensures temporal offset between sampled images and -faster seek operations. We select at most 4 random keyframes in each video: - -``` -SELECT: - STRATEGY: "random_k" - NUM_IMAGES: 4 -``` - -The frames are then resized - -``` -TRANSFORM: - TYPE: "resize" - MIN_SIZE: 800 - MAX_SIZE: 1333 -``` - -and batched using the standard -[PyTorch DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader): - -``` -BATCH_SIZE: 8 -NUM_WORKERS: 1 -``` - -`InferenceBasedLoader` decomposes those batches into batches of size `INPUT_BATCH_SIZE` -and applies the master model specified by `BOOTSTRAP_MODEL`. Models outputs are filtered -by detection score: - -``` -FILTER: - TYPE: "detection_score" - MIN_VALUE: 0.8 -``` - -and sampled using the specified sampling strategy: - -``` -DATA_SAMPLER: - # supported types: - # densepose_uniform - # densepose_UV_confidence - # densepose_fine_segm_confidence - # densepose_coarse_segm_confidence - TYPE: "densepose_uniform" - COUNT_PER_CLASS: 8 -``` - -The current implementation supports -[uniform sampling](../densepose/data/samplers/densepose_uniform.py) and -[confidence-based sampling](../densepose/data/samplers/densepose_confidence_based.py) -to obtain sparse annotations from dense results. For confidence-based -sampling one needs to use the master model which produces confidence estimates. -The `WC1M` master model used in the example above produces all three types of confidence -estimates. - -Finally, sampled data is grouped into batches of size `OUTPUT_BATCH_SIZE`: - -``` -INFERENCE: - INPUT_BATCH_SIZE: 1 - OUTPUT_BATCH_SIZE: 1 -``` - -The proportion of data from annotated datasets and bootstrapped dataset can be tracked -in the logs, e.g.: - -``` -[... densepose.engine.trainer]: batch/ 1.8, batch/base_coco_2017_train 6.4, batch/densepose_coco_2014_train 3.85 -``` - -which means that over the last 20 iterations, on average for 1.8 bootstrapped data samples there were 6.4 samples from `base_coco_2017_train` and 3.85 samples from `densepose_coco_2014_train`. diff --git a/detectron2/projects/DensePose/doc/DENSEPOSE_CSE.md b/detectron2/projects/DensePose/doc/DENSEPOSE_CSE.md deleted file mode 100644 index d5761ef989bdfb441a2a61f4e508cc826f93d2d1..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/doc/DENSEPOSE_CSE.md +++ /dev/null @@ -1,336 +0,0 @@ -# Continuous Surface Embeddings for Dense Pose Estimation for Humans and Animals - -## Overview - -
- -
- -The pipeline uses [Faster R-CNN](https://arxiv.org/abs/1506.01497) -with [Feature Pyramid Network](https://arxiv.org/abs/1612.03144) meta architecture -outlined in Figure 1. For each detected object, the model predicts -its coarse segmentation `S` (2 channels: foreground / background) -and the embedding `E` (16 channels). At the same time, the embedder produces vertex -embeddings `Ê` for the corresponding mesh. Universal positional embeddings `E` -and vertex embeddings `Ê` are matched to derive for each pixel its continuous -surface embedding. - -
- -
-

Figure 1. DensePose continuous surface embeddings architecture based on Faster R-CNN with Feature Pyramid Network (FPN).

- -### Datasets - -For more details on datasets used for training and validation of -continuous surface embeddings models, -please refer to the [DensePose Datasets](DENSEPOSE_DATASETS.md) page. - -## Model Zoo and Baselines - -### Human CSE Models - -Continuous surface embeddings models for humans trained using the protocols from [Neverova et al, 2020](https://arxiv.org/abs/2011.12438). - -Models trained with hard assignment loss ℒ: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_s1xs1x0.3490.0606.361.167.164.465.7251155172model | metrics
R_101_FPN_s1xs1x0.4610.0717.462.367.264.765.8251155500model | metrics
R_50_FPN_DL_s1xs1x0.3990.0617.060.867.865.566.4251156349model | metrics
R_101_FPN_DL_s1xs1x0.5040.0748.361.568.065.666.6251156606model | metrics
- -Models trained with soft assignment loss ℒσ: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_soft_s1xs1x0.3570.0579.761.366.964.365.4250533982model | metrics
R_101_FPN_soft_s1xs1x0.4640.07110.562.167.364.566.0250712522model | metrics
R_50_FPN_DL_soft_s1xs1x0.4270.06211.360.868.066.166.7250713703model | metrics
R_101_FPN_DL_soft_s1xs1x0.4830.07112.261.568.266.267.1250713061model | metrics
- -### Animal CSE Models - -Models obtained by finetuning human CSE models on animals data from `ds1_train` -(see the [DensePose LVIS](DENSEPOSE_DATASETS.md#continuous-surface-embeddings-annotations-3) -section for more details on the datasets) with soft assignment loss ℒσ: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_soft_chimps_finetune_4k4K0.5690.0514.762.059.032.239.6253146869model | metrics
R_50_FPN_soft_animals_finetune_4k4K0.3810.0617.344.955.521.328.8253145793model | metrics
R_50_FPN_soft_animals_CA_finetune_4k4K0.4120.0597.153.459.525.433.4253498611model | metrics
- -Acronyms: - -`CA`: class agnostic training, where all annotated instances are mapped into a single category - - -Models obtained by finetuning human CSE models on animals data from `ds2_train` dataset -with soft assignment loss ℒσ and, for some schedules, cycle losses. -Please refer to [DensePose LVIS](DENSEPOSE_DATASETS.md#continuous-surface-embeddings-annotations-3) -section for details on the dataset and to [Neverova et al, 2021]() for details on cycle losses. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
GErrGPSmodel iddownload
R_50_FPN_soft_animals_I0_finetune_16k16k0.3860.0588.454.267.029.038.613.285.4270727112model | metrics
R_50_FPN_soft_animals_I0_finetune_m2m_16k16k0.5080.05612.254.167.328.638.412.587.6270982215model | metrics
R_50_FPN_soft_animals_I0_finetune_i2m_16k16k0.4830.0569.754.066.628.938.311.088.9270727461model | metrics
- -## References - -If you use DensePose methods based on continuous surface embeddings, please take the -references from the following BibTeX entries: - -Continuous surface embeddings: -``` -@InProceedings{Neverova2020ContinuousSurfaceEmbeddings, - title = {Continuous Surface Embeddings}, - author = {Neverova, Natalia and Novotny, David and Khalidov, Vasil and Szafraniec, Marc and Labatut, Patrick and Vedaldi, Andrea}, - journal = {Advances in Neural Information Processing Systems}, - year = {2020}, -} -``` - -Cycle Losses: -``` -@InProceedings{Neverova2021UniversalCanonicalMaps, - title = {Discovering Relationships between Object Categories via Universal Canonical Maps}, - author = {Neverova, Natalia and Sanakoyeu, Artsiom and Novotny, David and Labatut, Patrick and Vedaldi, Andrea}, - journal = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, - year = {2021}, -} -``` diff --git a/detectron2/projects/DensePose/doc/DENSEPOSE_DATASETS.md b/detectron2/projects/DensePose/doc/DENSEPOSE_DATASETS.md deleted file mode 100644 index 6943741e104310e7ec1837951e602e9c79061b10..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/doc/DENSEPOSE_DATASETS.md +++ /dev/null @@ -1,513 +0,0 @@ -# DensePose Datasets - -We summarize the datasets used in various DensePose training -schedules and describe different available annotation types. - -## Table of Contents - -[General Information](#general-information) - -[DensePose COCO](#densepose-coco) - -[DensePose PoseTrack](#densepose-posetrack) - -[DensePose Chimps](#densepose-chimps) - -[DensePose LVIS](#densepose-lvis) - -## General Information - -DensePose annotations are typically stored in JSON files. Their -structure follows the [COCO Data Format](https://cocodataset.org/#format-data), -the basic data structure is outlined below: - -``` -{ - "info": info, - "images": [image], - "annotations": [annotation], - "licenses": [license], -} - -info{ - "year": int, - "version": str, - "description": str, - "contributor": str, - "url": str, - "date_created": datetime, -} - -image{ - "id": int, - "width": int, - "height": int, - "file_name": str, - "license": int, - "flickr_url": str, - "coco_url": str, - "date_captured": datetime, -} - -license{ - "id": int, "name": str, "url": str, -} -``` - -DensePose annotations can be of two types: -*chart-based annotations* or *continuous surface embeddings annotations*. -We give more details on each of the two annotation types below. - -### Chart-based Annotations - -These annotations assume a single 3D model which corresponds to -all the instances in a given dataset. -3D model is assumed to be split into *charts*. Each chart has its own -2D parametrization through inner coordinates `U` and `V`, typically -taking values in `[0, 1]`. - -Chart-based annotations consist of *point-based annotations* and -*segmentation annotations*. Point-based annotations specify, for a given -image point, which model part it belongs to and what are its coordinates -in the corresponding chart. Segmentation annotations specify regions -in an image that are occupied by a given part. In some cases, charts -associated with point annotations are more detailed than the ones -associated with segmentation annotations. In this case we distinguish -*fine segmentation* (associated with points) and *coarse segmentation* -(associated with masks). - -**Point-based annotations**: - -`dp_x` and `dp_y`: image coordinates of the annotated points along -the horizontal and vertical axes respectively. The coordinates are defined -with respect to the top-left corner of the annotated bounding box and are -normalized assuming the bounding box size to be `256x256`; - -`dp_I`: for each point specifies the index of the fine segmentation chart -it belongs to; - -`dp_U` and `dp_V`: point coordinates on the corresponding chart. -Each fine segmentation part has its own parametrization in terms of chart -coordinates. - -**Segmentation annotations**: - -`dp_masks`: RLE encoded dense masks (`dict` containing keys `counts` and `size`). -The masks are typically of size `256x256`, they define segmentation within the -bounding box. - -### Continuous Surface Embeddings Annotations - -Continuous surface embeddings annotations also consist of *point-based annotations* -and *segmentation annotations*. Point-based annotations establish correspondence -between image points and 3D model vertices. Segmentation annotations specify -foreground regions for a given instane. - -**Point-based annotations**: - -`dp_x` and `dp_y` specify image point coordinates the same way as for chart-based -annotations; - -`dp_vertex` gives indices of 3D model vertices, which the annotated image points -correspond to; - -`ref_model` specifies 3D model name. - -**Segmentation annotations**: - -Segmentations can either be given by `dp_masks` field or by `segmentation` field. - -`dp_masks`: RLE encoded dense masks (`dict` containing keys `counts` and `size`). -The masks are typically of size `256x256`, they define segmentation within the -bounding box. - -`segmentation`: polygon-based masks stored as a 2D list -`[[x1 y1 x2 y2...],[x1 y1 ...],...]` of polygon vertex coordinates in a given -image. - -## DensePose COCO - -
- -
-

- Figure 1. Annotation examples from the DensePose COCO dataset. -

- -DensePose COCO dataset contains about 50K annotated persons on images from the -[COCO dataset](https://cocodataset.org/#home) -The images are available for download from the -[COCO Dataset download page](https://cocodataset.org/#download): -[train2014](http://images.cocodataset.org/zips/train2014.zip), -[val2014](http://images.cocodataset.org/zips/val2014.zip). -The details on available annotations and their download links are given below. - -### Chart-based Annotations - -Chart-based DensePose COCO annotations are available for the instances of category -`person` and correspond to the model shown in Figure 2. -They include `dp_x`, `dp_y`, `dp_I`, `dp_U` and `dp_V` fields for annotated points -(~100 points per annotated instance) and `dp_masks` field, which encodes -coarse segmentation into 14 parts in the following order: -`Torso`, `Right Hand`, `Left Hand`, `Left Foot`, `Right Foot`, -`Upper Leg Right`, `Upper Leg Left`, `Lower Leg Right`, `Lower Leg Left`, -`Upper Arm Left`, `Upper Arm Right`, `Lower Arm Left`, `Lower Arm Right`, -`Head`. - -
- -
-

- Figure 2. Human body charts (fine segmentation) - and the associated 14 body parts depicted with rounded rectangles - (coarse segmentation). -

- -The dataset splits used in the training schedules are -`train2014`, `valminusminival2014` and `minival2014`. -`train2014` and `valminusminival2014` are used for training, -and `minival2014` is used for validation. -The table with annotation download links, which summarizes the number of annotated -instances and images for each of the dataset splits is given below: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Name# inst# imagesfile sizedownload
densepose_train20143921026437526Mdensepose_train2014.json
densepose_valminusminival201472975984105Mdensepose_valminusminival2014.json
densepose_minival20142243150831Mdensepose_minival2014.json
- -### Continuous Surface Embeddings Annotations - -DensePose COCO continuous surface embeddings annotations are available for the instances -of category `person`. The annotations correspond to the 3D model shown in Figure 2, -and include `dp_x`, `dp_y` and `dp_vertex` and `ref_model` fields. -All chart-based annotations were also kept for convenience. - -As with chart-based annotations, the dataset splits used in the training schedules are -`train2014`, `valminusminival2014` and `minival2014`. -`train2014` and `valminusminival2014` are used for training, -and `minival2014` is used for validation. -The table with annotation download links, which summarizes the number of annotated -instances and images for each of the dataset splits is given below: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Name# inst# imagesfile sizedownload
densepose_train2014_cse3921026437554Mdensepose_train2014_cse.json
densepose_valminusminival2014_cse72975984110Mdensepose_valminusminival2014_cse.json
densepose_minival2014_cse2243150832Mdensepose_minival2014_cse.json
- -## DensePose PoseTrack - -
- -
-

- Figure 3. Annotation examples from the PoseTrack dataset. -

- -DensePose PoseTrack dataset contains annotated image sequences. -To download the images for this dataset, please follow the instructions -from the [PoseTrack Download Page](https://posetrack.net/users/download.php). - -### Chart-based Annotations - -Chart-based DensePose PoseTrack annotations are available for the instances with category -`person` and correspond to the model shown in Figure 2. -They include `dp_x`, `dp_y`, `dp_I`, `dp_U` and `dp_V` fields for annotated points -(~100 points per annotated instance) and `dp_masks` field, which encodes -coarse segmentation into the same 14 parts as in DensePose COCO. - -The dataset splits used in the training schedules are -`posetrack_train2017` (train set) and `posetrack_val2017` (validation set). -The table with annotation download links, which summarizes the number of annotated -instances, instance tracks and images for the dataset splits is given below: - - - - - - - - - - - - - - - - - - - - - - - - - - -
Name# inst# images# tracksfile sizedownload
densepose_posetrack_train20178274168036118Mdensepose_posetrack_train2017.json
densepose_posetrack_val201747537824659Mdensepose_posetrack_val2017.json
- -## DensePose Chimps - -
- -
-

- Figure 4. Example images from the DensePose Chimps dataset. -

- -DensePose Chimps dataset contains annotated images of chimpanzees. -To download the images for this dataset, please use the URL specified in -`image_url` field in the annotations. - -### Chart-based Annotations - -Chart-based DensePose Chimps annotations correspond to the human model shown in Figure 2, -the instances are thus annotated to belong to the `person` category. -They include `dp_x`, `dp_y`, `dp_I`, `dp_U` and `dp_V` fields for annotated points -(~3 points per annotated instance) and `dp_masks` field, which encodes -foreground mask in RLE format. - -Chart-base DensePose Chimps annotations are used for validation only. -The table with annotation download link, which summarizes the number of annotated -instances and images is given below: - - - - - - - - - - - - - - - - - -
Name# inst# imagesfile sizedownload
densepose_chimps9306546Mdensepose_chimps_full_v2.json
- -### Continuous Surface Embeddings Annotations - -Continuous surface embeddings annotations for DensePose Chimps -include `dp_x`, `dp_y` and `dp_vertex` point-based annotations -(~3 points per annotated instance), `dp_masks` field with the same -contents as for chart-based annotations and `ref_model` field -which refers to a chimpanzee 3D model `chimp_5029`. - -The dataset is split into training and validation subsets. -The table with annotation download links, which summarizes the number of annotated -instances and images for each of the dataset splits is given below: - -The table below outlines the dataset splits: - - - - - - - - - - - - - - - - - - - - - - - -
Name# inst# imagesfile sizedownload
densepose_chimps_cse_train5003503Mdensepose_chimps_cse_train.json
densepose_chimps_cse_val4303043Mdensepose_chimps_cse_val.json
- -## DensePose LVIS - -
- -
-

- Figure 5. Example images from the DensePose LVIS dataset. -

- -DensePose LVIS dataset contains segmentation and DensePose annotations for animals -on images from the [LVIS dataset](https://www.lvisdataset.org/dataset). -The images are available for download through the links: -[train2017](http://images.cocodataset.org/zips/train2017.zip), -[val2017](http://images.cocodataset.org/zips/val2017.zip). - -### Continuous Surface Embeddings Annotations - -Continuous surface embeddings (CSE) annotations for DensePose LVIS -include `dp_x`, `dp_y` and `dp_vertex` point-based annotations -(~3 points per annotated instance) and a `ref_model` field -which refers to a 3D model that corresponds to the instance. -Instances from 9 animal categories were annotated with CSE DensePose data: -bear, cow, cat, dog, elephant, giraffe, horse, sheep and zebra. - -Foreground masks are available from instance segmentation annotations -(`segmentation` field) in polygon format, they are stored as a 2D list -`[[x1 y1 x2 y2...],[x1 y1 ...],...]`. - -We used two datasets, each constising of one training (`train`) -and validation (`val`) subsets: the first one (`ds1`) -was used in [Neverova et al, 2020](https://arxiv.org/abs/2011.12438). -The second one (`ds2`), was used in [Neverova et al, 2021](). - -The summary of the available datasets is given below: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
All DataSelected Animals
(9 categories)
File
Name# cat# img# segm# img# segm# dpsizedownload
ds1_train55641412398541419472518446Mdensepose_lvis_v1_ds1_train_v1.json
ds1_val2515713281571153710365Mdensepose_lvis_v1_ds1_val_v1.json
ds2_train12039938812701411374646964189321051Mdensepose_lvis_v1_ds2_train_v1.json
ds2_val92690915526909155360424Mdensepose_lvis_v1_ds2_val_v1.json
- -Legend: - -`#cat` - number of categories in the dataset for which annotations are available; - -`#img` - number of images with annotations in the dataset; - -`#segm` - number of segmentation annotations; - -`#dp` - number of DensePose annotations. - - -Important Notes: - -1. The reference models used for `ds1_train` and `ds1_val` are -`bear_4936`, `cow_5002`, `cat_5001`, `dog_5002`, `elephant_5002`, `giraffe_5002`, -`horse_5004`, `sheep_5004` and `zebra_5002`. The reference models used for -`ds2_train` and `ds2_val` are `bear_4936`, `cow_5002`, `cat_7466`, -`dog_7466`, `elephant_5002`, `giraffe_5002`, `horse_5004`, `sheep_5004` and `zebra_5002`. -So reference models for categories `cat` aind `dog` are different for `ds1` and `ds2`. - -2. Some annotations from `ds1_train` are reused in `ds2_train` (4538 DensePose annotations -and 21275 segmentation annotations). The ones for cat and dog categories were remapped -from `cat_5001` and `dog_5002` reference models used in `ds1` to `cat_7466` and `dog_7466` -used in `ds2`. - -3. All annotations from `ds1_val` are included into `ds2_val` after the remapping -procedure mentioned in note 2. - -4. Some annotations from `ds1_train` are part of `ds2_val` (646 DensePose annotations and -1225 segmentation annotations). Thus one should not train on `ds1_train` if evaluating on `ds2_val`. diff --git a/detectron2/projects/DensePose/doc/DENSEPOSE_IUV.md b/detectron2/projects/DensePose/doc/DENSEPOSE_IUV.md deleted file mode 100644 index de158e0eea0c287507b701376abc9307ce92c0f1..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/doc/DENSEPOSE_IUV.md +++ /dev/null @@ -1,627 +0,0 @@ -# Chart-based Dense Pose Estimation for Humans and Animals - -## Overview - -The goal of chart-based DensePose methods is to establish dense correspondences -between image pixels and 3D object mesh by splitting the latter into charts and estimating -for each pixel the corresponding chart index `I` and local chart coordinates `(U, V)`. - -
- -
- -The charts used for human DensePose estimation are shown in Figure 1. -The human body is split into 24 parts, each part is parametrized by `U` and `V` -coordinates, each taking values in `[0, 1]`. - -
- -
-

Figure 1. Partitioning and parametrization of human body surface.

- -The pipeline uses [Faster R-CNN](https://arxiv.org/abs/1506.01497) -with [Feature Pyramid Network](https://arxiv.org/abs/1612.03144) meta architecture -outlined in Figure 2. For each detected object, the model predicts -its coarse segmentation `S` (2 or 15 channels: foreground / background or -background + 14 predefined body parts), fine segmentation `I` (25 channels: -background + 24 predefined body parts) and local chart coordinates `U` and `V`. - -
- -
-

Figure 2. DensePose chart-based architecture based on Faster R-CNN with Feature Pyramid Network (FPN).

- -### Bootstrapping Chart-Based Models - -[Sanakoyeu et al., 2020](https://arxiv.org/pdf/2003.00080.pdf) introduced a pipeline -to transfer DensePose models trained on humans to proximal animal classes (chimpanzees), -which is summarized in Figure 3. The training proceeds in two stages: - -First, a *master* model is trained on data from source domain (humans with full -DensePose annotation `S`, `I`, `U` and `V`) -and supporting domain (animals with segmentation annotation only). -Only selected animal classes are chosen from the supporting -domain through *category filters* to guarantee the quality of target domain results. -The training is done in *class-agnostic manner*: all selected categories are mapped -to a single category (human). - -Second, a *student* model is trained on data from source and supporting domains, -as well as data from target domain obtained by applying the master model, selecting -high-confidence detections and sampling the results. - -
- -
-

Figure 3. Domain adaptation: master model is trained on data from source and -supporting domains to produce predictions in target domain; student model combines data from source and -supporting domains, as well as sampled predictions from the master model on target domain to improve -target domain predictions quality.

- -Examples of pretrained master and student models are available in the [Model Zoo](#ModelZooBootstrap). -For more details on the bootstrapping pipeline, please see [Bootstrapping Pipeline](BOOTSTRAPPING_PIPELINE.md). - -### Datasets - -For more details on datasets used for chart-based model training and validation, -please refer to the [DensePose Datasets](DENSEPOSE_DATASETS.md) page. - -## Model Zoo and Baselines - -### Legacy Models - -Baselines trained using schedules from [GΓΌler et al, 2018](https://arxiv.org/pdf/1802.00434.pdf) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_s1x_legacys1x0.3070.0513.258.158.252.154.9164832157model | metrics
R_101_FPN_s1x_legacys1x0.3900.0634.359.559.353.256.0164832182model | metrics
- -### Improved Baselines, Original Fully Convolutional Head - -These models use an improved training schedule and Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_s1xs1x0.3590.0664.561.267.263.765.3165712039model | metrics
R_101_FPN_s1xs1x0.4280.0795.862.367.864.566.2165712084model | metrics
- -### Improved Baselines, DeepLabV3 Head - -These models use an improved training schedule, Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446) and DeepLabV3 head from [Chen et al, 2017](https://arxiv.org/abs/1706.05587). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_DL_s1xs1x0.3920.0706.761.168.365.666.7165712097model | metrics
R_101_FPN_DL_s1xs1x0.4780.0837.062.368.766.367.6165712116model | metrics
- -###
Baselines with Confidence Estimation - -These models perform additional estimation of confidence in regressed UV coodrinates, along the lines of [Neverova et al., 2019](https://papers.nips.cc/paper/8378-correlated-uncertainty-for-learning-dense-correspondences-from-noisy-labels). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_WC1_s1xs1x0.3530.0644.660.567.064.265.4173862049model | metrics
R_50_FPN_WC2_s1xs1x0.3640.0664.860.766.964.265.7173861455model | metrics
R_50_FPN_DL_WC1_s1xs1x0.3970.0686.761.168.165.867.0173067973model | metrics
R_50_FPN_DL_WC2_s1xs1x0.4100.0706.860.867.965.666.7173859335model | metrics
R_101_FPN_WC1_s1xs1x0.4350.0765.762.567.664.966.3171402969model | metrics
R_101_FPN_WC2_s1xs1x0.4500.0785.762.367.664.866.4173860702model | metrics
R_101_FPN_DL_WC1_s1xs1x0.4790.0817.962.068.466.267.2173858525model | metrics
R_101_FPN_DL_WC2_s1xs1x0.4910.0827.661.768.365.967.2173294801model | metrics
- -Acronyms: - -`WC1`: with confidence estimation model type 1 for `U` and `V` - -`WC2`: with confidence estimation model type 2 for `U` and `V` - -###
Baselines with Mask Confidence Estimation - -Models that perform estimation of confidence in regressed UV coodrinates -as well as confidences associated with coarse and fine segmentation, -see [Sanakoyeu et al., 2020](https://arxiv.org/pdf/2003.00080.pdf) for details. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_WC1M_s1xs1x0.3810.0664.860.666.764.065.4217144516model | metrics
R_50_FPN_WC2M_s1xs1x0.3420.0685.060.766.964.265.5216245640model | metrics
R_50_FPN_DL_WC1M_s1xs1x0.3710.0686.060.768.065.266.7216245703model | metrics
R_50_FPN_DL_WC2M_s1xs1x0.3850.0716.160.868.165.066.4216245758model | metrics
R_101_FPN_WC1M_s1xs1x0.4230.0795.962.067.364.866.0216453687model | metrics
R_101_FPN_WC2M_s1xs1x0.4360.0805.962.567.464.566.0216245682model | metrics
R_101_FPN_DL_WC1M_s1xs1x0.4530.0796.862.068.166.467.1216245771model | metrics
R_101_FPN_DL_WC2M_s1xs1x0.4640.0806.961.968.266.167.1216245790model | metrics
- -Acronyms: - -`WC1M`: with confidence estimation model type 1 for `U` and `V` and mask confidence estimation - -`WC2M`: with confidence estimation model type 2 for `U` and `V` and mask confidence estimation - -###
Bootstrapping Baselines - -Master and student models trained using the bootstrapping pipeline with chimpanzee as the target category, -see [Sanakoyeu et al., 2020](https://arxiv.org/pdf/2003.00080.pdf) -and [Bootstrapping Pipeline](BOOTSTRAPPING_PIPELINE.md) for details. -Evaluation is performed on [DensePose Chimps](DENSEPOSE_DATASETS.md#densepose-chimps) dataset. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
segm
AP
dp. APex
GPS
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_DL_WC1M_3x_Atop10P_CA3x0.5220.0739.761.359.136.220.030.2217578784model | metrics
R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_uniform3x1.9390.07210.160.958.537.221.531.0256453729model | metrics
R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_uv3x1.9850.0729.661.458.938.322.232.1256452095model | metrics
R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_finesegm3x2.0470.07210.360.958.536.720.730.7256452819model | metrics
R_50_FPN_DL_WC1M_3x_Atop10P_CA_B_coarsesegm3x1.8300.0709.661.359.237.921.531.6256455697model | metrics
- -Acronyms: - -`WC1M`: with confidence estimation model type 1 for `U` and `V` and mask confidence estimation - -`Atop10P`: humans and animals from the 10 best suitable categories are used for training - -`CA`: class agnostic training, where all annotated instances are mapped into a single category - -`B_<...>`: schedule with bootstrapping with the specified results sampling strategy - -Note: - -The relaxed `dp. APex GPS` metric was used in -[Sanakoyeu et al., 2020](https://arxiv.org/pdf/2003.00080.pdf) to evaluate DensePose -results. This metric considers matches at thresholds 0.2, 0.3 and 0.4 additionally -to the standard ones used in the evaluation protocol. The minimum threshold is -controlled by `DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD` config option. - -### License - -All models available for download are licensed under the -[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/) - -## References - -If you use chart-based DensePose methods, please take the references from the following -BibTeX entries: - -DensePose bootstrapping pipeline: -``` -@InProceedings{Sanakoyeu2020TransferringDensePose, - title = {Transferring Dense Pose to Proximal Animal Classes}, - author = {Artsiom Sanakoyeu and Vasil Khalidov and Maureen S. McCarthy and Andrea Vedaldi and Natalia Neverova}, - journal = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, - year = {2020}, -} -``` - -DensePose with confidence estimation: -``` -@InProceedings{Neverova2019DensePoseConfidences, - title = {Correlated Uncertainty for Learning Dense Correspondences from Noisy Labels}, - author = {Neverova, Natalia and Novotny, David and Vedaldi, Andrea}, - journal = {Advances in Neural Information Processing Systems}, - year = {2019}, -} -``` - -Original DensePose: -``` -@InProceedings{Guler2018DensePose, - title={DensePose: Dense Human Pose Estimation In The Wild}, - author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos}, - journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, - year={2018} -} -``` diff --git a/detectron2/projects/DensePose/doc/GETTING_STARTED.md b/detectron2/projects/DensePose/doc/GETTING_STARTED.md deleted file mode 100644 index a5c86f3ab5e66dc3dee4f7836aa79bd5d41b68f2..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/doc/GETTING_STARTED.md +++ /dev/null @@ -1,76 +0,0 @@ -# Getting Started with DensePose - -## Inference with Pre-trained Models - -1. Pick a model and its config file from [Model Zoo(IUV)](DENSEPOSE_IUV.md#ModelZoo), [Model Zoo(CSE)](DENSEPOSE_CSE.md#ModelZoo), for example [densepose_rcnn_R_50_FPN_s1x.yaml](../configs/densepose_rcnn_R_50_FPN_s1x.yaml) -2. Run the [Apply Net](TOOL_APPLY_NET.md) tool to visualize the results or save the to disk. For example, to use contour visualization for DensePose, one can run: -```bash -python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml densepose_rcnn_R_50_FPN_s1x.pkl image.jpg dp_contour,bbox --output image_densepose_contour.png -``` -Please see [Apply Net](TOOL_APPLY_NET.md) for more details on the tool. - -## Training - -First, prepare the [dataset](http://densepose.org/#dataset) into the following structure under the directory you'll run training scripts: -
-datasets/coco/
-  annotations/
-    densepose_{train,minival,valminusminival}2014.json
-    densepose_minival2014_100.json   (optional, for testing only)
-  {train,val}2014/
-    # image files that are mentioned in the corresponding json
-
- -To train a model one can use the [train_net.py](../train_net.py) script. -This script was used to train all DensePose models in [Model Zoo(IUV)](DENSEPOSE_IUV.md#ModelZoo), [Model Zoo(CSE)](DENSEPOSE_CSE.md#ModelZoo). -For example, to launch end-to-end DensePose-RCNN training with ResNet-50 FPN backbone -on 8 GPUs following the s1x schedule, one can run -```bash -python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml --num-gpus 8 -``` -The configs are made for 8-GPU training. To train on 1 GPU, one can apply the -[linear learning rate scaling rule](https://arxiv.org/abs/1706.02677): -```bash -python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \ - SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 -``` - -## Evaluation - -Model testing can be done in the same way as training, except for an additional flag `--eval-only` and -model location specification through `MODEL.WEIGHTS model.pth` in the command line -```bash -python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \ - --eval-only MODEL.WEIGHTS model.pth -``` - -## Tools - -We provide tools which allow one to: - - easily view DensePose annotated data in a dataset; - - perform DensePose inference on a set of images; - - visualize DensePose model results; - -`query_db` is a tool to print or visualize DensePose data in a dataset. -Please refer to [Query DB](TOOL_QUERY_DB.md) for more details on this tool - -`apply_net` is a tool to print or visualize DensePose results. -Please refer to [Apply Net](TOOL_APPLY_NET.md) for more details on this tool - - -## Installation as a package - -DensePose can also be installed as a Python package for integration with other software. - -The following dependencies are needed: -- Python >= 3.7 -- [PyTorch](https://pytorch.org/get-started/locally/#start-locally) >= 1.7 (to match [detectron2 requirements](https://detectron2.readthedocs.io/en/latest/tutorials/install.html#requirements)) -- [torchvision](https://pytorch.org/vision/stable/) version [compatible with your version of PyTorch](https://github.com/pytorch/vision#installation) - -DensePose can then be installed from this repository with: - -``` -pip install git+https://github.com/facebookresearch/detectron2@main#subdirectory=projects/DensePose -``` - -After installation, the package will be importable as `densepose`. diff --git a/detectron2/projects/DensePose/doc/RELEASE_2020_04.md b/detectron2/projects/DensePose/doc/RELEASE_2020_04.md deleted file mode 100644 index 2fab6ae78e887c630ad94e71aa6e946115c61593..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/doc/RELEASE_2020_04.md +++ /dev/null @@ -1,6 +0,0 @@ -# DensePose Confidence Estimation and Model Zoo Improvements - -* [DensePose models with confidence estimation](doc/DENSEPOSE_IUV.md#ModelZooConfidence) -* [Panoptic FPN and DeepLabV3 head implementation](doc/DENSEPOSE_IUV.md#ModelZooDeepLabV3) -* Test time augmentations for DensePose -* New evaluation metric (GPSm) that yields more reliable scores diff --git a/detectron2/projects/DensePose/doc/RELEASE_2021_03.md b/detectron2/projects/DensePose/doc/RELEASE_2021_03.md deleted file mode 100644 index eb908a67f7e48d1d3aba51f946c0ca884cfcfe79..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/doc/RELEASE_2021_03.md +++ /dev/null @@ -1,45 +0,0 @@ -# DensePose CSE and DensePose Evolution - -* [DensePose Evolution pipeline](DENSEPOSE_IUV.md#ModelZooBootstrap), a framework to bootstrap - DensePose on unlabeled data - * [`InferenceBasedLoader`](../densepose/data/inference_based_loader.py) - with data samplers to use inference results from one model - to train another model (bootstrap); - * [`VideoKeyframeDataset`](../densepose/data/video/video_keyframe_dataset.py) - to efficiently load images from video keyframes; - * Category maps and filters to combine annotations from different categories - and train in a class-agnostic manner; - * [Pretrained models](DENSEPOSE_IUV.md#ModelZooBootstrap) for DensePose estimation on chimpanzees; - * DensePose head training from partial data (segmentation only); - * [DensePose models with mask confidence estimation](DENSEPOSE_IUV.md#ModelZooMaskConfidence); - * [DensePose Chimps]() dataset for IUV evaluation -* [DensePose Continuous Surface Embeddings](DENSEPOSE_CSE.md), a framework to extend DensePose - to various categories using 3D models - * [Hard embedding](../densepose/modeling/losses/embed.py) and - [soft embedding](../densepose/modeling/losses/soft_embed.py) - losses to train universal positional embeddings; - * [Embedder](../(densepose/modeling/cse/embedder.py) to handle - mesh vertex embeddings; - * [Storage](../densepose/evaluation/tensor_storage.py) for evaluation with high volumes of data; - * [Pretrained models](DENSEPOSE_CSE.md#ModelZoo) for DensePose CSE estimation on humans and animals; - * [DensePose Chimps](DENSEPOSE_DATASETS.md#densepose-chimps) and - [DensePose LVIS](DENSEPOSE_DATASETS.md#densepose-lvis) datasets for CSE finetuning and evaluation; - * [Vertex and texture mapping visualizers](../densepose/vis/densepose_outputs_vertex.py); -* Refactoring of all major components: losses, predictors, model outputs, model results, visualizers; - * Dedicated structures for [chart outputs](../densepose/structures/chart.py), - [chart outputs with confidences](../densepose/structures/chart_confidence.py), - [chart results](../densepose/structures/chart_result.py), - [CSE outputs](../densepose/structures/cse.py); - * Dedicated predictors for - [chart-based estimation](../densepose/modeling/predictors/chart.py), - [confidence estimation](../densepose/modeling/predictors/chart_confidence.py) - and [CSE estimation](../densepose/modeling/predictors/cse.py); - * Generic handling of various [conversions](../densepose/converters) (e.g. from outputs to results); - * Better organization of various [losses](../densepose/modeling/losses); - * Segregation of loss data accumulators for - [IUV setting](../densepose/modeling/losses/utils.py) - and [CSE setting](../densepose/modeling/losses/embed_utils.py); - * Splitting visualizers into separate modules; -* [HRNet](../densepose/modeling/hrnet.py) and [HRFPN](../densepose/modeling/hrfpn.py) backbones; -* [PoseTrack](DENSEPOSE_DATASETS.md#densepose-posetrack) dataset; -* [IUV texture visualizer](../densepose/vis/densepose_results_textures.py) diff --git a/detectron2/projects/DensePose/doc/RELEASE_2021_06.md b/detectron2/projects/DensePose/doc/RELEASE_2021_06.md deleted file mode 100644 index fb5ff4facdfaf5559d7be26c49852f4f6bc5495e..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/doc/RELEASE_2021_06.md +++ /dev/null @@ -1,12 +0,0 @@ -# DensePose CSE with Cycle Losses - -This release follows the paper [Neverova et al, 2021]() and -adds CSE datasets with more annotations, better CSE animal models -to the model zoo, losses to ensure cycle consistency for models and mesh -alignment evaluator. In particular: - -* [Pixel to shape](../densepose/modeling/losses/cycle_pix2shape.py) and [shape to shape](../densepose/modeling/losses/cycle_shape2shape.py) cycle consistency losses; -* Mesh alignment [evaluator](../densepose/evaluation/mesh_alignment_evaluator.py); -* Existing CSE datasets renamed to [ds1_train](https://dl.fbaipublicfiles.com/densepose/annotations/lvis/densepose_lvis_v1_ds1_train_v1.json) and [ds1_val](https://dl.fbaipublicfiles.com/densepose/annotations/lvis/densepose_lvis_v1_ds1_val_v1.json); -* New CSE datasets [ds2_train](https://dl.fbaipublicfiles.com/densepose/annotations/lvis/densepose_lvis_v1_ds2_train_v1.json) and [ds2_val](https://dl.fbaipublicfiles.com/densepose/annotations/lvis/densepose_lvis_v1_ds2_val_v1.json) added; -* Better CSE animal models trained with the 16k schedule added to the [model zoo](DENSEPOSE_CSE.md#animal-cse-models). diff --git a/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md b/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md deleted file mode 100644 index ca8e1ddafc7b1003ba98cce2826157ab995a2443..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md +++ /dev/null @@ -1,203 +0,0 @@ -# Apply Net - -`apply_net` is a tool to print or visualize DensePose results on a set of images. -It has two modes: `dump` to save DensePose model results to a pickle file -and `show` to visualize them on images. - -The `image.jpg` file that is used as an example in this doc can be found [here](http://images.cocodataset.org/train2017/000000117508.jpg) - -## Dump Mode - -The general command form is: -```bash -python apply_net.py dump [-h] [-v] [--output ] -``` - -There are three mandatory arguments: - - ``, configuration file for a given model; - - ``, model file with trained parameters - - ``, input image file name, pattern or folder - -One can additionally provide `--output` argument to define the output file name, -which defaults to `output.pkl`. - - -Examples: - -1. Dump results of the [R_50_FPN_s1x](https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl) DensePose model for images in a folder `images` to file `dump.pkl`: -```bash -python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml \ -https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl \ -images --output dump.pkl -v -``` - -2. Dump results of the [R_50_FPN_s1x](https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl) DensePose model for images with file name matching a pattern `image*.jpg` to file `results.pkl`: -```bash -python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml \ -https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl \ -"image*.jpg" --output results.pkl -v -``` - -If you want to load the pickle file generated by the above command: -``` -# make sure DensePose is in your PYTHONPATH, or use the following line to add it: -sys.path.append("/your_detectron2_path/detectron2_repo/projects/DensePose/") - -f = open('/your_result_path/results.pkl', 'rb') -data = pickle.load(f) -``` - -The file `results.pkl` contains the list of results per image, for each image the result is a dictionary. - -**If you use a [IUV model](DENSEPOSE_IUV.md#-model-zoo-and-baselines)**, the dumped data will have the following format: - -``` -data: [{'file_name': '/your_path/image1.jpg', - 'scores': tensor([0.9884]), - 'pred_boxes_XYXY': tensor([[ 69.6114, 0.0000, 706.9797, 706.0000]]), - 'pred_densepose': [DensePoseChartResultWithConfidences(labels=tensor(...), uv=tensor(...), sigma_1=None, - sigma_2=None, kappa_u=None, kappa_v=None, fine_segm_confidence=None, coarse_segm_confidence=None), - DensePoseChartResultWithConfidences, ...] - } - {'file_name': '/your_path/image2.jpg', - 'scores': tensor([0.9999, 0.5373, 0.3991]), - 'pred_boxes_XYXY': tensor([[ 59.5734, 7.7535, 579.9311, 932.3619], - [612.9418, 686.1254, 612.9999, 704.6053], - [164.5081, 407.4034, 598.3944, 920.4266]]), - 'pred_densepose': [DensePoseChartResultWithConfidences(labels=tensor(...), uv=tensor(...), sigma_1=None, - sigma_2=None, kappa_u=None, kappa_v=None, fine_segm_confidence=None, coarse_segm_confidence=None), - DensePoseChartResultWithConfidences, ...] - }] -``` - -`DensePoseChartResultWithConfidences` contains the following fields: -- `labels` - a tensor of size `[H, W]` of type `torch.long` which contains fine segmentation labels (previously called `I`) -- `uv` - a tensor of size `[2, H, W]` of type `torch.float` which contains `U` and `V` coordinates -- various optional confidence-related fields (`sigma_1`, `sigma_2`, `kappa_u`, `kappa_v`, `fine_segm_confidence`, `coarse_segm_confidence`) - - -**If you use a [CSE model](DENSEPOSE_CSE.md#-model-zoo-and-baselines)**, the dumped data will have the following format: -``` -data: [{'file_name': '/your_path/image1.jpg', - 'scores': tensor([0.9984, 0.9961]), - 'pred_boxes_XYXY': tensor([[480.0093, 461.0796, 698.3614, 696.1011], - [78.1589, 168.6614, 307.1287, 653.8522]]), - 'pred_densepose': DensePoseEmbeddingPredictorOutput(embedding=tensor(...), coarse_segm=tensor(...))} - {'file_name': '/your_path/image2.jpg', - 'scores': tensor([0.9189, 0.9491]), - 'pred_boxes_XYXY': tensor([[734.9685, 534.2003, 287.3923, 254.8859], - [434.2853, 765.1219, 132.1029, 867.9283]]), - 'pred_densepose': DensePoseEmbeddingPredictorOutput(embedding=tensor(...), coarse_segm=tensor(...))}] -``` - -`DensePoseEmbeddingPredictorOutput` contains the following fields: -- `embedding` - a tensor of size `[N, D, sz, sz]` of type `torch.float`, which contains embeddings of size `D` of the `N` detections in the image -- `coarse_segm` - a tensor of size `[N, 2, sz, sz]` of type `torch.float` which contains segmentation scores of the `N` detections in the image; e.g. a mask can be obtained by `coarse_segm.argmax(dim=1)` - -`sz` is a fixed size for the tensors; you can resize them to the size of the bounding box, if needed - -We can use the following code, to parse the outputs of the first -detected instance on the first image (IUV model). -``` -img_id, instance_id = 0, 0 # Look at the first image and the first detected instance -bbox_xyxy = data[img_id]['pred_boxes_XYXY'][instance_id] -result = data[img_id]['pred_densepose'][instance_id] -uv = result.uv -``` -The array `bbox_xyxy` contains (x0, y0, x1, y1) of the bounding box. - - -## Visualization Mode - -The general command form is: -```bash -python apply_net.py show [-h] [-v] [--min_score ] [--nms_thresh ] [--output ] -``` - -There are four mandatory arguments: - - ``, configuration file for a given model; - - ``, model file with trained parameters - - ``, input image file name, pattern or folder - - ``, visualizations specifier; currently available visualizations are: - * `bbox` - bounding boxes of detected persons; - * `dp_segm` - segmentation masks for detected persons; - * `dp_u` - each body part is colored according to the estimated values of the - U coordinate in part parameterization; - * `dp_v` - each body part is colored according to the estimated values of the - V coordinate in part parameterization; - * `dp_contour` - plots contours with color-coded U and V coordinates; - * `dp_iuv_texture` - transfers the texture from a given texture image file to detected instances, in IUV mode; - * `dp_vertex` - plots the rainbow visualization of the closest vertices prediction for a given mesh, in CSE mode; - * `dp_cse_texture` - transfers the texture from a given list of texture image files (one from each human or animal mesh) to detected instances, in CSE mode - - -One can additionally provide the following optional arguments: - - `--min_score` to only show detections with sufficient scores that are not lower than provided value - - `--nms_thresh` to additionally apply non-maximum suppression to detections at a given threshold - - `--output` to define visualization file name template, which defaults to `output.png`. - To distinguish output file names for different images, the tool appends 1-based entry index, - e.g. output.0001.png, output.0002.png, etc... -- `--texture_atlas` to define the texture atlas image for IUV texture transfer -- `--texture_atlases_map` to define the texture atlas images map (a dictionary `{mesh name: texture atlas image}`) for CSE texture transfer - - -The following examples show how to output results of a DensePose model -with ResNet-50 FPN backbone using different visualizations for image `image.jpg`: - -1. Show bounding box and segmentation: -```bash -python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml \ -https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl \ -image.jpg bbox,dp_segm -v -``` -![Bounding Box + Segmentation Visualization](https://dl.fbaipublicfiles.com/densepose/web/apply_net/res_bbox_dp_segm.jpg) - -2. Show bounding box and estimated U coordinates for body parts: -```bash -python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml \ -https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl \ -image.jpg bbox,dp_u -v -``` -![Bounding Box + U Coordinate Visualization](https://dl.fbaipublicfiles.com/densepose/web/apply_net/res_bbox_dp_u.jpg) - -3. Show bounding box and estimated V coordinates for body parts: -```bash -python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml \ -https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl \ -image.jpg bbox,dp_v -v -``` -![Bounding Box + V Coordinate Visualization](https://dl.fbaipublicfiles.com/densepose/web/apply_net/res_bbox_dp_v.jpg) - -4. Show bounding box and estimated U and V coordinates via contour plots: -```bash -python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml \ -https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl \ -image.jpg dp_contour,bbox -v -``` -![Bounding Box + Contour Visualization](https://dl.fbaipublicfiles.com/densepose/web/apply_net/res_bbox_dp_contour.jpg) - -5. Show bounding box and texture transfer: -```bash -python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml \ -https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl \ -image.jpg dp_iuv_texture,bbox --texture_atlas texture_from_SURREAL.jpg -v -``` -![Bounding Box + IUV Texture Transfer Visualization](https://dl.fbaipublicfiles.com/densepose/web/apply_net/res_bbox_dp_iuv_texture.jpg) - -6. Show bounding box and CSE rainbow visualization: -```bash -python apply_net.py show configs/cse/densepose_rcnn_R_50_FPN_s1x.yaml \ -https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_s1x/251155172/model_final_c4ea5f.pkl \ -image.jpg dp_vertex,bbox -v -``` -![Bounding Box + CSE Rainbow Visualization](https://dl.fbaipublicfiles.com/densepose/web/apply_net/res_bbox_dp_vertex.jpg) - -7. Show bounding box and CSE texture transfer: -```bash -python apply_net.py show configs/cse/densepose_rcnn_R_50_FPN_s1x.yaml \ -https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_s1x/251155172/model_final_c4ea5f.pkl \ -image.jpg dp_cse_texture,bbox --texture_atlases_map '{"smpl_27554": "smpl_uvSnapshot_colors.jpg"}' -v -``` -![Bounding Box + CSE Texture Transfer Visualization](https://dl.fbaipublicfiles.com/densepose/web/apply_net/res_bbox_dp_cse_texture.jpg) - -The texture files can be found in the `doc/images` folder diff --git a/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md b/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md deleted file mode 100644 index b0a764b8740597c6af634127b80b53d28913726f..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md +++ /dev/null @@ -1,105 +0,0 @@ - -# Query Dataset - -`query_db` is a tool to print or visualize DensePose data from a dataset. -It has two modes: `print` and `show` to output dataset entries to standard -output or to visualize them on images. - -## Print Mode - -The general command form is: -```bash -python query_db.py print [-h] [-v] [--max-entries N] -``` - -There are two mandatory arguments: - - ``, DensePose dataset specification, from which to select - the entries (e.g. `densepose_coco_2014_train`). - - ``, dataset entry selector which can be a single specification, - or a comma-separated list of specifications of the form - `field[:type]=value` for exact match with the value - or `field[:type]=min-max` for a range of values - -One can additionally limit the maximum number of entries to output -by providing `--max-entries` argument. - -Examples: - -1. Output at most 10 first entries from the `densepose_coco_2014_train` dataset: -```bash -python query_db.py print densepose_coco_2014_train \* --max-entries 10 -v -``` - -2. Output all entries with `file_name` equal to `COCO_train2014_000000000036.jpg`: -```bash -python query_db.py print densepose_coco_2014_train file_name=COCO_train2014_000000000036.jpg -v -``` - -3. Output all entries with `image_id` between 36 and 156: -```bash -python query_db.py print densepose_coco_2014_train image_id:int=36-156 -v -``` - -## Visualization Mode - -The general command form is: -```bash -python query_db.py show [-h] [-v] [--max-entries N] [--output ] -``` - -There are three mandatory arguments: - - ``, DensePose dataset specification, from which to select - the entries (e.g. `densepose_coco_2014_train`). - - ``, dataset entry selector which can be a single specification, - or a comma-separated list of specifications of the form - `field[:type]=value` for exact match with the value - or `field[:type]=min-max` for a range of values - - ``, visualizations specifier; currently available visualizations are: - * `bbox` - bounding boxes of annotated persons; - * `dp_i` - annotated points colored according to the containing part; - * `dp_pts` - annotated points in green color; - * `dp_segm` - segmentation masks for annotated persons; - * `dp_u` - annotated points colored according to their U coordinate in part parameterization; - * `dp_v` - annotated points colored according to their V coordinate in part parameterization; - -One can additionally provide one of the two optional arguments: - - `--max_entries` to limit the maximum number of entries to visualize - - `--output` to provide visualization file name template, which defaults - to `output.png`. To distinguish file names for different dataset - entries, the tool appends 1-based entry index to the output file name, - e.g. output.0001.png, output.0002.png, etc. - -The following examples show how to output different visualizations for image with `id = 322` -from `densepose_coco_2014_train` dataset: - -1. Show bounding box and segmentation: -```bash -python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v -``` -![Bounding Box + Segmentation Visualization](images/vis_bbox_dp_segm.jpg) - -2. Show bounding box and points colored according to the containing part: -```bash -python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_i -v -``` -![Bounding Box + Point Label Visualization](images/vis_bbox_dp_i.jpg) - -3. Show bounding box and annotated points in green color: -```bash -python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v -``` -![Bounding Box + Point Visualization](images/vis_bbox_dp_pts.jpg) - -4. Show bounding box and annotated points colored according to their U coordinate in part parameterization: -```bash -python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_u -v -``` -![Bounding Box + Point U Visualization](images/vis_bbox_dp_u.jpg) - -5. Show bounding box and annotated points colored according to their V coordinate in part parameterization: -```bash -python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_v -v -``` -![Bounding Box + Point V Visualization](images/vis_bbox_dp_v.jpg) - - diff --git a/detectron2/projects/DensePose/query_db.py b/detectron2/projects/DensePose/query_db.py deleted file mode 100644 index 997eba3e6c264213b364a9444755cb15580e82ee..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/query_db.py +++ /dev/null @@ -1,250 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. - -import argparse -import logging -import os -import sys -from timeit import default_timer as timer -from typing import Any, ClassVar, Dict, List -import torch - -from detectron2.data.catalog import DatasetCatalog -from detectron2.utils.file_io import PathManager -from detectron2.utils.logger import setup_logger - -from densepose.structures import DensePoseDataRelative -from densepose.utils.dbhelper import EntrySelector -from densepose.utils.logger import verbosity_to_level -from densepose.vis.base import CompoundVisualizer -from densepose.vis.bounding_box import BoundingBoxVisualizer -from densepose.vis.densepose_data_points import ( - DensePoseDataCoarseSegmentationVisualizer, - DensePoseDataPointsIVisualizer, - DensePoseDataPointsUVisualizer, - DensePoseDataPointsVisualizer, - DensePoseDataPointsVVisualizer, -) - -DOC = """Query DB - a tool to print / visualize data from a database -""" - -LOGGER_NAME = "query_db" - -logger = logging.getLogger(LOGGER_NAME) - -_ACTION_REGISTRY: Dict[str, "Action"] = {} - - -class Action: - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - parser.add_argument( - "-v", - "--verbosity", - action="count", - help="Verbose mode. Multiple -v options increase the verbosity.", - ) - - -def register_action(cls: type): - """ - Decorator for action classes to automate action registration - """ - global _ACTION_REGISTRY - _ACTION_REGISTRY[cls.COMMAND] = cls - return cls - - -class EntrywiseAction(Action): - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(EntrywiseAction, cls).add_arguments(parser) - parser.add_argument( - "dataset", metavar="", help="Dataset name (e.g. densepose_coco_2014_train)" - ) - parser.add_argument( - "selector", - metavar="", - help="Dataset entry selector in the form field1[:type]=value1[," - "field2[:type]=value_min-value_max...] which selects all " - "entries from the dataset that satisfy the constraints", - ) - parser.add_argument( - "--max-entries", metavar="N", help="Maximum number of entries to process", type=int - ) - - @classmethod - def execute(cls: type, args: argparse.Namespace): - dataset = setup_dataset(args.dataset) - entry_selector = EntrySelector.from_string(args.selector) - context = cls.create_context(args) - if args.max_entries is not None: - for _, entry in zip(range(args.max_entries), dataset): - if entry_selector(entry): - cls.execute_on_entry(entry, context) - else: - for entry in dataset: - if entry_selector(entry): - cls.execute_on_entry(entry, context) - - @classmethod - def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]: - context = {} - return context - - -@register_action -class PrintAction(EntrywiseAction): - """ - Print action that outputs selected entries to stdout - """ - - COMMAND: ClassVar[str] = "print" - - @classmethod - def add_parser(cls: type, subparsers: argparse._SubParsersAction): - parser = subparsers.add_parser(cls.COMMAND, help="Output selected entries to stdout. ") - cls.add_arguments(parser) - parser.set_defaults(func=cls.execute) - - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(PrintAction, cls).add_arguments(parser) - - @classmethod - def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]): - import pprint - - printer = pprint.PrettyPrinter(indent=2, width=200, compact=True) - printer.pprint(entry) - - -@register_action -class ShowAction(EntrywiseAction): - """ - Show action that visualizes selected entries on an image - """ - - COMMAND: ClassVar[str] = "show" - VISUALIZERS: ClassVar[Dict[str, object]] = { - "dp_segm": DensePoseDataCoarseSegmentationVisualizer(), - "dp_i": DensePoseDataPointsIVisualizer(), - "dp_u": DensePoseDataPointsUVisualizer(), - "dp_v": DensePoseDataPointsVVisualizer(), - "dp_pts": DensePoseDataPointsVisualizer(), - "bbox": BoundingBoxVisualizer(), - } - - @classmethod - def add_parser(cls: type, subparsers: argparse._SubParsersAction): - parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries") - cls.add_arguments(parser) - parser.set_defaults(func=cls.execute) - - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(ShowAction, cls).add_arguments(parser) - parser.add_argument( - "visualizations", - metavar="", - help="Comma separated list of visualizations, possible values: " - "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))), - ) - parser.add_argument( - "--output", - metavar="", - default="output.png", - help="File name to save output to", - ) - - @classmethod - def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]): - import cv2 - import numpy as np - - image_fpath = PathManager.get_local_path(entry["file_name"]) - image = cv2.imread(image_fpath, cv2.IMREAD_GRAYSCALE) - image = np.tile(image[:, :, np.newaxis], [1, 1, 3]) - datas = cls._extract_data_for_visualizers_from_entry(context["vis_specs"], entry) - visualizer = context["visualizer"] - image_vis = visualizer.visualize(image, datas) - entry_idx = context["entry_idx"] + 1 - out_fname = cls._get_out_fname(entry_idx, context["out_fname"]) - cv2.imwrite(out_fname, image_vis) - logger.info(f"Output saved to {out_fname}") - context["entry_idx"] += 1 - - @classmethod - def _get_out_fname(cls: type, entry_idx: int, fname_base: str): - base, ext = os.path.splitext(fname_base) - return base + ".{0:04d}".format(entry_idx) + ext - - @classmethod - def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]: - vis_specs = args.visualizations.split(",") - visualizers = [] - for vis_spec in vis_specs: - vis = cls.VISUALIZERS[vis_spec] - visualizers.append(vis) - context = { - "vis_specs": vis_specs, - "visualizer": CompoundVisualizer(visualizers), - "out_fname": args.output, - "entry_idx": 0, - } - return context - - @classmethod - def _extract_data_for_visualizers_from_entry( - cls: type, vis_specs: List[str], entry: Dict[str, Any] - ): - dp_list = [] - bbox_list = [] - for annotation in entry["annotations"]: - is_valid, _ = DensePoseDataRelative.validate_annotation(annotation) - if not is_valid: - continue - bbox = torch.as_tensor(annotation["bbox"]) - bbox_list.append(bbox) - dp_data = DensePoseDataRelative(annotation) - dp_list.append(dp_data) - datas = [] - for vis_spec in vis_specs: - datas.append(bbox_list if "bbox" == vis_spec else (bbox_list, dp_list)) - return datas - - -def setup_dataset(dataset_name): - logger.info("Loading dataset {}".format(dataset_name)) - start = timer() - dataset = DatasetCatalog.get(dataset_name) - stop = timer() - logger.info("Loaded dataset {} in {:.3f}s".format(dataset_name, stop - start)) - return dataset - - -def create_argument_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - description=DOC, - formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120), - ) - parser.set_defaults(func=lambda _: parser.print_help(sys.stdout)) - subparsers = parser.add_subparsers(title="Actions") - for _, action in _ACTION_REGISTRY.items(): - action.add_parser(subparsers) - return parser - - -def main(): - parser = create_argument_parser() - args = parser.parse_args() - verbosity = getattr(args, "verbosity", None) - global logger - logger = setup_logger(name=LOGGER_NAME) - logger.setLevel(verbosity_to_level(verbosity)) - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/detectron2/projects/DensePose/setup.py b/detectron2/projects/DensePose/setup.py deleted file mode 100644 index 22ad239fe320b8f9501f783afb134b975276a628..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/setup.py +++ /dev/null @@ -1,42 +0,0 @@ -import re -from pathlib import Path -from setuptools import find_packages, setup - -try: - import torch # noqa: F401 -except ImportError as e: - raise Exception( - """ -You must install PyTorch prior to installing DensePose: -pip install torch - -For more information: - https://pytorch.org/get-started/locally/ - """ - ) from e - - -def get_detectron2_current_version(): - """Version is not available for import through Python since it is - above the top level of the package. Instead, we parse it from the - file with a regex.""" - # Get version info from detectron2 __init__.py - version_source = (Path(__file__).parents[2] / "detectron2" / "__init__.py").read_text() - version_number = re.findall(r'__version__ = "([0-9\.]+)"', version_source)[0] - return version_number - - -setup( - name="detectron2-densepose", - author="FAIR", - version=get_detectron2_current_version(), - url="https://github.com/facebookresearch/detectron2/tree/main/projects/DensePose", - packages=find_packages(), - python_requires=">=3.7", - install_requires=[ - "av>=8.0.3", - "detectron2@git+https://github.com/facebookresearch/detectron2.git", - "opencv-python-headless>=4.5.3.56", - "scipy>=1.5.4", - ], -) diff --git a/detectron2/projects/DensePose/tests/common.py b/detectron2/projects/DensePose/tests/common.py deleted file mode 100644 index ff22b9ab6eceb7c9de0f769c3cbd3197ecd51222..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/common.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import os -import torch - -from detectron2.config import get_cfg -from detectron2.engine import default_setup -from detectron2.modeling import build_model - -from densepose import add_densepose_config - -_BASE_CONFIG_DIR = "configs" -_EVOLUTION_CONFIG_SUB_DIR = "evolution" -_HRNET_CONFIG_SUB_DIR = "HRNet" -_QUICK_SCHEDULES_CONFIG_SUB_DIR = "quick_schedules" -_BASE_CONFIG_FILE_PREFIX = "Base-" -_CONFIG_FILE_EXT = ".yaml" - - -def _get_base_config_dir(): - """ - Return the base directory for configurations - """ - return os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", _BASE_CONFIG_DIR) - - -def _get_evolution_config_dir(): - """ - Return the base directory for evolution configurations - """ - return os.path.join(_get_base_config_dir(), _EVOLUTION_CONFIG_SUB_DIR) - - -def _get_hrnet_config_dir(): - """ - Return the base directory for HRNet configurations - """ - return os.path.join(_get_base_config_dir(), _HRNET_CONFIG_SUB_DIR) - - -def _get_quick_schedules_config_dir(): - """ - Return the base directory for quick schedules configurations - """ - return os.path.join(_get_base_config_dir(), _QUICK_SCHEDULES_CONFIG_SUB_DIR) - - -def _collect_config_files(config_dir): - """ - Collect all configuration files (i.e. densepose_*.yaml) directly in the specified directory - """ - start = _get_base_config_dir() - results = [] - for entry in os.listdir(config_dir): - path = os.path.join(config_dir, entry) - if not os.path.isfile(path): - continue - _, ext = os.path.splitext(entry) - if ext != _CONFIG_FILE_EXT: - continue - if entry.startswith(_BASE_CONFIG_FILE_PREFIX): - continue - config_file = os.path.relpath(path, start) - results.append(config_file) - return results - - -def get_config_files(): - """ - Get all the configuration files (relative to the base configuration directory) - """ - return _collect_config_files(_get_base_config_dir()) - - -def get_evolution_config_files(): - """ - Get all the evolution configuration files (relative to the base configuration directory) - """ - return _collect_config_files(_get_evolution_config_dir()) - - -def get_hrnet_config_files(): - """ - Get all the HRNet configuration files (relative to the base configuration directory) - """ - return _collect_config_files(_get_hrnet_config_dir()) - - -def get_quick_schedules_config_files(): - """ - Get all the quick schedules configuration files (relative to the base configuration directory) - """ - return _collect_config_files(_get_quick_schedules_config_dir()) - - -def get_model_config(config_file): - """ - Load and return the configuration from the specified file (relative to the base configuration - directory) - """ - cfg = get_cfg() - add_densepose_config(cfg) - path = os.path.join(_get_base_config_dir(), config_file) - cfg.merge_from_file(path) - if not torch.cuda.is_available(): - cfg.MODEL.DEVICE = "cpu" - return cfg - - -def get_model(config_file): - """ - Get the model from the specified file (relative to the base configuration directory) - """ - cfg = get_model_config(config_file) - return build_model(cfg) - - -def setup(config_file): - """ - Setup the configuration from the specified file (relative to the base configuration directory) - """ - cfg = get_model_config(config_file) - cfg.freeze() - default_setup(cfg, {}) diff --git a/detectron2/projects/DensePose/tests/test_chart_based_annotations_accumulator.py b/detectron2/projects/DensePose/tests/test_chart_based_annotations_accumulator.py deleted file mode 100644 index a1c4f8565a3c55b79b6ed96b03635e6c2932958d..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_chart_based_annotations_accumulator.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import unittest -import torch - -from detectron2.structures import Boxes, BoxMode, Instances - -from densepose.modeling.losses.utils import ChartBasedAnnotationsAccumulator -from densepose.structures import DensePoseDataRelative, DensePoseList - -image_shape = (100, 100) -instances = Instances(image_shape) -n_instances = 3 -instances.proposal_boxes = Boxes(torch.rand(n_instances, 4)) -instances.gt_boxes = Boxes(torch.rand(n_instances, 4)) - - -# instances.gt_densepose = None cannot happen because instances attributes need a length -class TestChartBasedAnnotationsAccumulator(unittest.TestCase): - def test_chart_based_annotations_accumulator_no_gt_densepose(self): - accumulator = ChartBasedAnnotationsAccumulator() - accumulator.accumulate(instances) - expected_values = {"nxt_bbox_with_dp_index": 0, "nxt_bbox_index": n_instances} - for key in accumulator.__dict__: - self.assertEqual(getattr(accumulator, key), expected_values.get(key, [])) - - def test_chart_based_annotations_accumulator_gt_densepose_none(self): - instances.gt_densepose = [None] * n_instances - accumulator = ChartBasedAnnotationsAccumulator() - accumulator.accumulate(instances) - expected_values = {"nxt_bbox_with_dp_index": 0, "nxt_bbox_index": n_instances} - for key in accumulator.__dict__: - self.assertEqual(getattr(accumulator, key), expected_values.get(key, [])) - - def test_chart_based_annotations_accumulator_gt_densepose(self): - data_relative_keys = [ - DensePoseDataRelative.X_KEY, - DensePoseDataRelative.Y_KEY, - DensePoseDataRelative.I_KEY, - DensePoseDataRelative.U_KEY, - DensePoseDataRelative.V_KEY, - DensePoseDataRelative.S_KEY, - ] - annotations = [DensePoseDataRelative({k: [0] for k in data_relative_keys})] * n_instances - instances.gt_densepose = DensePoseList(annotations, instances.gt_boxes, image_shape) - accumulator = ChartBasedAnnotationsAccumulator() - accumulator.accumulate(instances) - bbox_xywh_est = BoxMode.convert( - instances.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS - ) - bbox_xywh_gt = BoxMode.convert( - instances.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS - ) - expected_values = { - "s_gt": [ - torch.zeros((3, DensePoseDataRelative.MASK_SIZE, DensePoseDataRelative.MASK_SIZE)) - ] - * n_instances, - "bbox_xywh_est": bbox_xywh_est.split(1), - "bbox_xywh_gt": bbox_xywh_gt.split(1), - "point_bbox_with_dp_indices": [torch.tensor([i]) for i in range(n_instances)], - "point_bbox_indices": [torch.tensor([i]) for i in range(n_instances)], - "bbox_indices": list(range(n_instances)), - "nxt_bbox_with_dp_index": n_instances, - "nxt_bbox_index": n_instances, - } - default_value = [torch.tensor([0])] * 3 - for key in accumulator.__dict__: - to_test = getattr(accumulator, key) - gt_value = expected_values.get(key, default_value) - if key in ["nxt_bbox_with_dp_index", "nxt_bbox_index"]: - self.assertEqual(to_test, gt_value) - elif key == "bbox_indices": - self.assertListEqual(to_test, gt_value) - else: - self.assertTrue(torch.allclose(torch.stack(to_test), torch.stack(gt_value))) diff --git a/detectron2/projects/DensePose/tests/test_combine_data_loader.py b/detectron2/projects/DensePose/tests/test_combine_data_loader.py deleted file mode 100644 index 832903a8e133b124669830b378af582c3b58b3dc..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_combine_data_loader.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import random -import unittest -from typing import Any, Iterable, Iterator, Tuple - -from densepose.data import CombinedDataLoader - - -def _grouper(iterable: Iterable[Any], n: int, fillvalue=None) -> Iterator[Tuple[Any]]: - """ - Group elements of an iterable by chunks of size `n`, e.g. - grouper(range(9), 4) -> - (0, 1, 2, 3), (4, 5, 6, 7), (8, None, None, None) - """ - it = iter(iterable) - while True: - values = [] - for _ in range(n): - try: - value = next(it) - except StopIteration: - values.extend([fillvalue] * (n - len(values))) - yield tuple(values) - return - values.append(value) - yield tuple(values) - - -class TestCombinedDataLoader(unittest.TestCase): - def test_combine_loaders_1(self): - loader1 = _grouper([f"1_{i}" for i in range(10)], 2) - loader2 = _grouper([f"2_{i}" for i in range(11)], 3) - batch_size = 4 - ratios = (0.1, 0.9) - random.seed(43) - combined = CombinedDataLoader((loader1, loader2), batch_size, ratios) - BATCHES_GT = [ - ["1_0", "1_1", "2_0", "2_1"], - ["2_2", "2_3", "2_4", "2_5"], - ["1_2", "1_3", "2_6", "2_7"], - ["2_8", "2_9", "2_10", None], - ] - for i, batch in enumerate(combined): - self.assertEqual(len(batch), batch_size) - self.assertEqual(batch, BATCHES_GT[i]) diff --git a/detectron2/projects/DensePose/tests/test_cse_annotations_accumulator.py b/detectron2/projects/DensePose/tests/test_cse_annotations_accumulator.py deleted file mode 100644 index a22dce9ce00532d60dc3f4edbef4cea26b006b92..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_cse_annotations_accumulator.py +++ /dev/null @@ -1,240 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import unittest -import torch - -from detectron2.structures import Boxes, BoxMode, Instances - -from densepose.modeling.losses.embed_utils import CseAnnotationsAccumulator -from densepose.structures import DensePoseDataRelative, DensePoseList - - -class TestCseAnnotationsAccumulator(unittest.TestCase): - def test_cse_annotations_accumulator_nodp(self): - instances_lst = [ - self._create_instances_nodp(), - ] - self._test_template(instances_lst) - - def test_cse_annotations_accumulator_sparsedp(self): - instances_lst = [ - self._create_instances_sparsedp(), - ] - self._test_template(instances_lst) - - def test_cse_annotations_accumulator_fulldp(self): - instances_lst = [ - self._create_instances_fulldp(), - ] - self._test_template(instances_lst) - - def test_cse_annotations_accumulator_combined(self): - instances_lst = [ - self._create_instances_nodp(), - self._create_instances_sparsedp(), - self._create_instances_fulldp(), - ] - self._test_template(instances_lst) - - def _test_template(self, instances_lst): - acc = CseAnnotationsAccumulator() - for instances in instances_lst: - acc.accumulate(instances) - packed_anns = acc.pack() - self._check_correspondence(packed_anns, instances_lst) - - def _create_instances_nodp(self): - image_shape = (480, 640) - instances = Instances(image_shape) - instances.gt_boxes = Boxes( - torch.as_tensor( - [ - [40.0, 40.0, 140.0, 140.0], - [160.0, 160.0, 270.0, 270.0], - [40.0, 160.0, 160.0, 280.0], - ] - ) - ) - instances.proposal_boxes = Boxes( - torch.as_tensor( - [ - [41.0, 39.0, 142.0, 138.0], - [161.0, 159.0, 272.0, 268.0], - [41.0, 159.0, 162.0, 278.0], - ] - ) - ) - # do not add gt_densepose - return instances - - def _create_instances_sparsedp(self): - image_shape = (540, 720) - instances = Instances(image_shape) - instances.gt_boxes = Boxes( - torch.as_tensor( - [ - [50.0, 50.0, 130.0, 130.0], - [150.0, 150.0, 240.0, 240.0], - [50.0, 150.0, 230.0, 330.0], - ] - ) - ) - instances.proposal_boxes = Boxes( - torch.as_tensor( - [ - [49.0, 51.0, 131.0, 129.0], - [151.0, 149.0, 241.0, 239.0], - [51.0, 149.0, 232.0, 329.0], - ] - ) - ) - instances.gt_densepose = DensePoseList( - [ - None, - self._create_dp_data( - { - "dp_x": [81.69, 153.47, 151.00], - "dp_y": [162.24, 128.71, 113.81], - "dp_vertex": [0, 1, 2], - "ref_model": "zebra_5002", - "dp_masks": [], - }, - {"c": (166, 133), "r": 64}, - ), - None, - ], - instances.gt_boxes, - image_shape, - ) - return instances - - def _create_instances_fulldp(self): - image_shape = (680, 840) - instances = Instances(image_shape) - instances.gt_boxes = Boxes( - torch.as_tensor( - [ - [65.0, 55.0, 165.0, 155.0], - [170.0, 175.0, 275.0, 280.0], - [55.0, 165.0, 165.0, 275.0], - ] - ) - ) - instances.proposal_boxes = Boxes( - torch.as_tensor( - [ - [66.0, 54.0, 166.0, 154.0], - [171.0, 174.0, 276.0, 279.0], - [56.0, 164.0, 166.0, 274.0], - ] - ) - ) - instances.gt_densepose = DensePoseList( - [ - self._create_dp_data( - { - "dp_x": [149.99, 198.62, 157.59], - "dp_y": [170.74, 197.73, 123.12], - "dp_vertex": [3, 4, 5], - "ref_model": "cat_5001", - "dp_masks": [], - }, - {"c": (100, 100), "r": 50}, - ), - self._create_dp_data( - { - "dp_x": [234.53, 116.72, 71.66], - "dp_y": [107.53, 11.31, 142.32], - "dp_vertex": [6, 7, 8], - "ref_model": "dog_5002", - "dp_masks": [], - }, - {"c": (200, 150), "r": 40}, - ), - self._create_dp_data( - { - "dp_x": [225.54, 202.61, 135.90], - "dp_y": [167.46, 181.00, 211.47], - "dp_vertex": [9, 10, 11], - "ref_model": "elephant_5002", - "dp_masks": [], - }, - {"c": (100, 200), "r": 45}, - ), - ], - instances.gt_boxes, - image_shape, - ) - return instances - - def _create_dp_data(self, anns, blob_def=None): - dp_data = DensePoseDataRelative(anns) - if blob_def is not None: - dp_data.segm[ - blob_def["c"][0] - blob_def["r"] : blob_def["c"][0] + blob_def["r"], - blob_def["c"][1] - blob_def["r"] : blob_def["c"][1] + blob_def["r"], - ] = 1 - return dp_data - - def _check_correspondence(self, packed_anns, instances_lst): - instance_idx = 0 - data_idx = 0 - pt_offset = 0 - if packed_anns is not None: - bbox_xyxy_gt = BoxMode.convert( - packed_anns.bbox_xywh_gt.clone(), BoxMode.XYWH_ABS, BoxMode.XYXY_ABS - ) - bbox_xyxy_est = BoxMode.convert( - packed_anns.bbox_xywh_est.clone(), BoxMode.XYWH_ABS, BoxMode.XYXY_ABS - ) - for instances in instances_lst: - if not hasattr(instances, "gt_densepose"): - instance_idx += len(instances) - continue - for i, dp_data in enumerate(instances.gt_densepose): - if dp_data is None: - instance_idx += 1 - continue - n_pts = len(dp_data.x) - self.assertTrue( - torch.allclose(dp_data.x, packed_anns.x_gt[pt_offset : pt_offset + n_pts]) - ) - self.assertTrue( - torch.allclose(dp_data.y, packed_anns.y_gt[pt_offset : pt_offset + n_pts]) - ) - self.assertTrue(torch.allclose(dp_data.segm, packed_anns.coarse_segm_gt[data_idx])) - self.assertTrue( - torch.allclose( - torch.ones(n_pts, dtype=torch.long) * dp_data.mesh_id, - packed_anns.vertex_mesh_ids_gt[pt_offset : pt_offset + n_pts], - ) - ) - self.assertTrue( - torch.allclose( - dp_data.vertex_ids, packed_anns.vertex_ids_gt[pt_offset : pt_offset + n_pts] - ) - ) - self.assertTrue( - torch.allclose(instances.gt_boxes.tensor[i], bbox_xyxy_gt[data_idx]) - ) - self.assertTrue( - torch.allclose(instances.proposal_boxes.tensor[i], bbox_xyxy_est[data_idx]) - ) - self.assertTrue( - torch.allclose( - torch.ones(n_pts, dtype=torch.long) * data_idx, - packed_anns.point_bbox_with_dp_indices[pt_offset : pt_offset + n_pts], - ) - ) - self.assertTrue( - torch.allclose( - torch.ones(n_pts, dtype=torch.long) * instance_idx, - packed_anns.point_bbox_indices[pt_offset : pt_offset + n_pts], - ) - ) - self.assertEqual(instance_idx, packed_anns.bbox_indices[data_idx]) - pt_offset += n_pts - instance_idx += 1 - data_idx += 1 - if data_idx == 0: - self.assertIsNone(packed_anns) diff --git a/detectron2/projects/DensePose/tests/test_dataset_loaded_annotations.py b/detectron2/projects/DensePose/tests/test_dataset_loaded_annotations.py deleted file mode 100644 index cf8035b87c6477221a113ba9fcb794495c04af7c..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_dataset_loaded_annotations.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import unittest - -from densepose.data.datasets.builtin import COCO_DATASETS, DENSEPOSE_ANNOTATIONS_DIR, LVIS_DATASETS -from densepose.data.datasets.coco import load_coco_json -from densepose.data.datasets.lvis import load_lvis_json -from densepose.data.utils import maybe_prepend_base_path -from densepose.structures import DensePoseDataRelative - - -class TestDatasetLoadedAnnotations(unittest.TestCase): - COCO_DATASET_DATA = { - "densepose_coco_2014_train": {"n_instances": 39210}, - "densepose_coco_2014_minival": {"n_instances": 2243}, - "densepose_coco_2014_minival_100": {"n_instances": 164}, - "densepose_coco_2014_valminusminival": {"n_instances": 7297}, - "densepose_coco_2014_train_cse": {"n_instances": 39210}, - "densepose_coco_2014_minival_cse": {"n_instances": 2243}, - "densepose_coco_2014_minival_100_cse": {"n_instances": 164}, - "densepose_coco_2014_valminusminival_cse": {"n_instances": 7297}, - "densepose_chimps": {"n_instances": 930}, - "posetrack2017_train": {"n_instances": 8274}, - "posetrack2017_val": {"n_instances": 4753}, - "lvis_v05_train": {"n_instances": 5186}, - "lvis_v05_val": {"n_instances": 1037}, - } - - LVIS_DATASET_DATA = { - "densepose_lvis_v1_train1": {"n_instances": 3394}, - "densepose_lvis_v1_train2": {"n_instances": 1800}, - "densepose_lvis_v1_val": {"n_instances": 1037}, - "densepose_lvis_v1_val_animals_100": {"n_instances": 89}, - } - - def generic_coco_test(self, dataset_info): - if dataset_info.name not in self.COCO_DATASET_DATA: - return - n_inst = self.COCO_DATASET_DATA[dataset_info.name]["n_instances"] - self.generic_test(dataset_info, n_inst, load_coco_json) - - def generic_lvis_test(self, dataset_info): - if dataset_info.name not in self.LVIS_DATASET_DATA: - return - n_inst = self.LVIS_DATASET_DATA[dataset_info.name]["n_instances"] - self.generic_test(dataset_info, n_inst, load_lvis_json) - - def generic_test(self, dataset_info, n_inst, loader_fun): - datasets_root = DENSEPOSE_ANNOTATIONS_DIR - annotations_fpath = maybe_prepend_base_path(datasets_root, dataset_info.annotations_fpath) - images_root = maybe_prepend_base_path(datasets_root, dataset_info.images_root) - image_annotation_dicts = loader_fun( - annotations_json_file=annotations_fpath, - image_root=images_root, - dataset_name=dataset_info.name, - ) - num_valid = sum( - 1 - for image_annotation_dict in image_annotation_dicts - for ann in image_annotation_dict["annotations"] - if DensePoseDataRelative.validate_annotation(ann)[0] - ) - self.assertEqual(num_valid, n_inst) - - -def coco_test_fun(dataset_info): - return lambda self: self.generic_coco_test(dataset_info) - - -for dataset_info in COCO_DATASETS: - setattr( - TestDatasetLoadedAnnotations, - f"test_coco_builtin_loaded_annotations_{dataset_info.name}", - coco_test_fun(dataset_info), - ) - - -def lvis_test_fun(dataset_info): - return lambda self: self.generic_lvis_test(dataset_info) - - -for dataset_info in LVIS_DATASETS: - setattr( - TestDatasetLoadedAnnotations, - f"test_lvis_builtin_loaded_annotations_{dataset_info.name}", - lvis_test_fun(dataset_info), - ) diff --git a/detectron2/projects/DensePose/tests/test_frame_selector.py b/detectron2/projects/DensePose/tests/test_frame_selector.py deleted file mode 100644 index 65f05f55c78d4ab24950e5335818b3e1f981aa0d..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_frame_selector.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import random -import unittest - -from densepose.data.video import FirstKFramesSelector, LastKFramesSelector, RandomKFramesSelector - - -class TestFrameSelector(unittest.TestCase): - def test_frame_selector_random_k_1(self): - _SEED = 43 - _K = 4 - random.seed(_SEED) - selector = RandomKFramesSelector(_K) - frame_tss = list(range(0, 20, 2)) - _SELECTED_GT = [0, 8, 4, 6] - selected = selector(frame_tss) - self.assertEqual(_SELECTED_GT, selected) - - def test_frame_selector_random_k_2(self): - _SEED = 43 - _K = 10 - random.seed(_SEED) - selector = RandomKFramesSelector(_K) - frame_tss = list(range(0, 6, 2)) - _SELECTED_GT = [0, 2, 4] - selected = selector(frame_tss) - self.assertEqual(_SELECTED_GT, selected) - - def test_frame_selector_first_k_1(self): - _K = 4 - selector = FirstKFramesSelector(_K) - frame_tss = list(range(0, 20, 2)) - _SELECTED_GT = frame_tss[:_K] - selected = selector(frame_tss) - self.assertEqual(_SELECTED_GT, selected) - - def test_frame_selector_first_k_2(self): - _K = 10 - selector = FirstKFramesSelector(_K) - frame_tss = list(range(0, 6, 2)) - _SELECTED_GT = frame_tss[:_K] - selected = selector(frame_tss) - self.assertEqual(_SELECTED_GT, selected) - - def test_frame_selector_last_k_1(self): - _K = 4 - selector = LastKFramesSelector(_K) - frame_tss = list(range(0, 20, 2)) - _SELECTED_GT = frame_tss[-_K:] - selected = selector(frame_tss) - self.assertEqual(_SELECTED_GT, selected) - - def test_frame_selector_last_k_2(self): - _K = 10 - selector = LastKFramesSelector(_K) - frame_tss = list(range(0, 6, 2)) - _SELECTED_GT = frame_tss[-_K:] - selected = selector(frame_tss) - self.assertEqual(_SELECTED_GT, selected) diff --git a/detectron2/projects/DensePose/tests/test_image_list_dataset.py b/detectron2/projects/DensePose/tests/test_image_list_dataset.py deleted file mode 100644 index 7932602448b49b9be4fcea9645fe7a9c4d53c00e..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_image_list_dataset.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import contextlib -import os -import tempfile -import unittest -import torch -from torchvision.utils import save_image - -from densepose.data.image_list_dataset import ImageListDataset -from densepose.data.transform import ImageResizeTransform - - -@contextlib.contextmanager -def temp_image(height, width): - random_image = torch.rand(height, width) - with tempfile.NamedTemporaryFile(suffix=".jpg") as f: - f.close() - save_image(random_image, f.name) - yield f.name - os.unlink(f.name) - - -class TestImageListDataset(unittest.TestCase): - def test_image_list_dataset(self): - height, width = 720, 1280 - with temp_image(height, width) as image_fpath: - image_list = [image_fpath] - category_list = [None] - dataset = ImageListDataset(image_list, category_list) - self.assertEqual(len(dataset), 1) - data1, categories1 = dataset[0]["images"], dataset[0]["categories"] - self.assertEqual(data1.shape, torch.Size((1, 3, height, width))) - self.assertEqual(data1.dtype, torch.float32) - self.assertIsNone(categories1[0]) - - def test_image_list_dataset_with_transform(self): - height, width = 720, 1280 - with temp_image(height, width) as image_fpath: - image_list = [image_fpath] - category_list = [None] - transform = ImageResizeTransform() - dataset = ImageListDataset(image_list, category_list, transform) - self.assertEqual(len(dataset), 1) - data1, categories1 = dataset[0]["images"], dataset[0]["categories"] - self.assertEqual(data1.shape, torch.Size((1, 3, 749, 1333))) - self.assertEqual(data1.dtype, torch.float32) - self.assertIsNone(categories1[0]) diff --git a/detectron2/projects/DensePose/tests/test_image_resize_transform.py b/detectron2/projects/DensePose/tests/test_image_resize_transform.py deleted file mode 100644 index 01c3373b64ee243198af682928939781a15f929a..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_image_resize_transform.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import unittest -import torch - -from densepose.data.transform import ImageResizeTransform - - -class TestImageResizeTransform(unittest.TestCase): - def test_image_resize_1(self): - images_batch = torch.ones((3, 3, 100, 100), dtype=torch.uint8) * 100 - transform = ImageResizeTransform() - images_transformed = transform(images_batch) - IMAGES_GT = torch.ones((3, 3, 800, 800), dtype=torch.float) * 100 - self.assertEqual(images_transformed.size(), IMAGES_GT.size()) - self.assertAlmostEqual(torch.abs(IMAGES_GT - images_transformed).max().item(), 0.0) diff --git a/detectron2/projects/DensePose/tests/test_model_e2e.py b/detectron2/projects/DensePose/tests/test_model_e2e.py deleted file mode 100644 index 055fadfd781adcdfd661795edbc621d5eca763fe..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_model_e2e.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import unittest -import torch - -from detectron2.structures import BitMasks, Boxes, Instances - -from .common import get_model - - -# TODO(plabatut): Modularize detectron2 tests and re-use -def make_model_inputs(image, instances=None): - if instances is None: - return {"image": image} - - return {"image": image, "instances": instances} - - -def make_empty_instances(h, w): - instances = Instances((h, w)) - instances.gt_boxes = Boxes(torch.rand(0, 4)) - instances.gt_classes = torch.tensor([]).to(dtype=torch.int64) - instances.gt_masks = BitMasks(torch.rand(0, h, w)) - return instances - - -class ModelE2ETest(unittest.TestCase): - CONFIG_PATH = "" - - def setUp(self): - self.model = get_model(self.CONFIG_PATH) - - def _test_eval(self, sizes): - inputs = [make_model_inputs(torch.rand(3, size[0], size[1])) for size in sizes] - self.model.eval() - self.model(inputs) - - -class DensePoseRCNNE2ETest(ModelE2ETest): - CONFIG_PATH = "densepose_rcnn_R_101_FPN_s1x.yaml" - - def test_empty_data(self): - self._test_eval([(200, 250), (200, 249)]) diff --git a/detectron2/projects/DensePose/tests/test_setup.py b/detectron2/projects/DensePose/tests/test_setup.py deleted file mode 100644 index 165a1b9a7b64aa8a0fbe5b862ebfb6594e77c256..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_setup.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import unittest - -from .common import ( - get_config_files, - get_evolution_config_files, - get_hrnet_config_files, - get_quick_schedules_config_files, - setup, -) - - -class TestSetup(unittest.TestCase): - def _test_setup(self, config_file): - setup(config_file) - - def test_setup_configs(self): - config_files = get_config_files() - for config_file in config_files: - self._test_setup(config_file) - - def test_setup_evolution_configs(self): - config_files = get_evolution_config_files() - for config_file in config_files: - self._test_setup(config_file) - - def test_setup_hrnet_configs(self): - config_files = get_hrnet_config_files() - for config_file in config_files: - self._test_setup(config_file) - - def test_setup_quick_schedules_configs(self): - config_files = get_quick_schedules_config_files() - for config_file in config_files: - self._test_setup(config_file) diff --git a/detectron2/projects/DensePose/tests/test_structures.py b/detectron2/projects/DensePose/tests/test_structures.py deleted file mode 100644 index 54082d3abf119bf2fdba7206124893f35b4b4ae1..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_structures.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import unittest - -from densepose.structures import normalized_coords_transform - - -class TestStructures(unittest.TestCase): - def test_normalized_coords_transform(self): - bbox = (32, 24, 288, 216) - x0, y0, w, h = bbox - xmin, ymin, xmax, ymax = x0, y0, x0 + w, y0 + h - f = normalized_coords_transform(*bbox) - # Top-left - expected_p, actual_p = (-1, -1), f((xmin, ymin)) - self.assertEqual(expected_p, actual_p) - # Top-right - expected_p, actual_p = (1, -1), f((xmax, ymin)) - self.assertEqual(expected_p, actual_p) - # Bottom-left - expected_p, actual_p = (-1, 1), f((xmin, ymax)) - self.assertEqual(expected_p, actual_p) - # Bottom-right - expected_p, actual_p = (1, 1), f((xmax, ymax)) - self.assertEqual(expected_p, actual_p) diff --git a/detectron2/projects/DensePose/tests/test_tensor_storage.py b/detectron2/projects/DensePose/tests/test_tensor_storage.py deleted file mode 100644 index aeeeffae4675f8d607d0471250dadb2ece5361a0..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_tensor_storage.py +++ /dev/null @@ -1,256 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import io -import tempfile -import unittest -from contextlib import ExitStack -import torch -import torch.distributed as dist -import torch.multiprocessing as mp - -from detectron2.utils import comm - -from densepose.evaluation.tensor_storage import ( - SingleProcessFileTensorStorage, - SingleProcessRamTensorStorage, - SizeData, - storage_gather, -) - - -class TestSingleProcessRamTensorStorage(unittest.TestCase): - def test_read_write_1(self): - schema = { - "tf": SizeData(dtype="float32", shape=(112, 112)), - "ti": SizeData(dtype="int32", shape=(4, 64, 64)), - } - # generate data which corresponds to the schema - data_elts = [] - torch.manual_seed(23) - for _i in range(3): - data_elt = { - "tf": torch.rand((112, 112), dtype=torch.float32), - "ti": (torch.rand(4, 64, 64) * 1000).to(dtype=torch.int32), - } - data_elts.append(data_elt) - storage = SingleProcessRamTensorStorage(schema, io.BytesIO()) - # write data to the storage - for i in range(3): - record_id = storage.put(data_elts[i]) - self.assertEqual(record_id, i) - # read data from the storage - for i in range(3): - record = storage.get(i) - self.assertEqual(len(record), len(schema)) - for field_name in schema: - self.assertTrue(field_name in record) - self.assertEqual(data_elts[i][field_name].shape, record[field_name].shape) - self.assertEqual(data_elts[i][field_name].dtype, record[field_name].dtype) - self.assertTrue(torch.allclose(data_elts[i][field_name], record[field_name])) - - -class TestSingleProcessFileTensorStorage(unittest.TestCase): - def test_read_write_1(self): - schema = { - "tf": SizeData(dtype="float32", shape=(112, 112)), - "ti": SizeData(dtype="int32", shape=(4, 64, 64)), - } - # generate data which corresponds to the schema - data_elts = [] - torch.manual_seed(23) - for _i in range(3): - data_elt = { - "tf": torch.rand((112, 112), dtype=torch.float32), - "ti": (torch.rand(4, 64, 64) * 1000).to(dtype=torch.int32), - } - data_elts.append(data_elt) - # WARNING: opens the file several times! may not work on all platforms - with tempfile.NamedTemporaryFile() as hFile: - storage = SingleProcessFileTensorStorage(schema, hFile.name, "wb") - # write data to the storage - for i in range(3): - record_id = storage.put(data_elts[i]) - self.assertEqual(record_id, i) - hFile.seek(0) - storage = SingleProcessFileTensorStorage(schema, hFile.name, "rb") - # read data from the storage - for i in range(3): - record = storage.get(i) - self.assertEqual(len(record), len(schema)) - for field_name in schema: - self.assertTrue(field_name in record) - self.assertEqual(data_elts[i][field_name].shape, record[field_name].shape) - self.assertEqual(data_elts[i][field_name].dtype, record[field_name].dtype) - self.assertTrue(torch.allclose(data_elts[i][field_name], record[field_name])) - - -def _find_free_port(): - """ - Copied from detectron2/engine/launch.py - """ - import socket - - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - # Binding to port 0 will cause the OS to find an available port for us - sock.bind(("", 0)) - port = sock.getsockname()[1] - sock.close() - # NOTE: there is still a chance the port could be taken by other processes. - return port - - -def launch(main_func, nprocs, args=()): - port = _find_free_port() - dist_url = f"tcp://127.0.0.1:{port}" - # dist_url = "env://" - mp.spawn( - distributed_worker, nprocs=nprocs, args=(main_func, nprocs, dist_url, args), daemon=False - ) - - -def distributed_worker(local_rank, main_func, nprocs, dist_url, args): - dist.init_process_group( - backend="gloo", init_method=dist_url, world_size=nprocs, rank=local_rank - ) - comm.synchronize() - assert comm._LOCAL_PROCESS_GROUP is None - pg = dist.new_group(list(range(nprocs))) - comm._LOCAL_PROCESS_GROUP = pg - main_func(*args) - - -def ram_read_write_worker(): - schema = { - "tf": SizeData(dtype="float32", shape=(112, 112)), - "ti": SizeData(dtype="int32", shape=(4, 64, 64)), - } - storage = SingleProcessRamTensorStorage(schema, io.BytesIO()) - world_size = comm.get_world_size() - rank = comm.get_rank() - data_elts = [] - # prepare different number of tensors in different processes - for i in range(rank + 1): - data_elt = { - "tf": torch.ones((112, 112), dtype=torch.float32) * (rank + i * world_size), - "ti": torch.ones((4, 64, 64), dtype=torch.int32) * (rank + i * world_size), - } - data_elts.append(data_elt) - # write data to the single process storage - for i in range(rank + 1): - record_id = storage.put(data_elts[i]) - assert record_id == i, f"Process {rank}: record ID {record_id}, expected {i}" - comm.synchronize() - # gather all data in process rank 0 - multi_storage = storage_gather(storage) - if rank != 0: - return - # read and check data from the multiprocess storage - for j in range(world_size): - for i in range(j): - record = multi_storage.get(j, i) - record_gt = { - "tf": torch.ones((112, 112), dtype=torch.float32) * (j + i * world_size), - "ti": torch.ones((4, 64, 64), dtype=torch.int32) * (j + i * world_size), - } - assert len(record) == len(schema), ( - f"Process {rank}: multi storage record, rank {j}, id {i}: " - f"expected {len(schema)} fields in the record, got {len(record)}" - ) - for field_name in schema: - assert field_name in record, ( - f"Process {rank}: multi storage record, rank {j}, id {i}: " - f"field {field_name} not in the record" - ) - - assert record_gt[field_name].shape == record[field_name].shape, ( - f"Process {rank}: multi storage record, rank {j}, id {i}: " - f"field {field_name}, expected shape {record_gt[field_name].shape} " - f"got {record[field_name].shape}" - ) - assert record_gt[field_name].dtype == record[field_name].dtype, ( - f"Process {rank}: multi storage record, rank {j}, id {i}: " - f"field {field_name}, expected dtype {record_gt[field_name].dtype} " - f"got {record[field_name].dtype}" - ) - assert torch.allclose(record_gt[field_name], record[field_name]), ( - f"Process {rank}: multi storage record, rank {j}, id {i}: " - f"field {field_name}, tensors are not close enough:" - f"L-inf {(record_gt[field_name]-record[field_name]).abs_().max()} " - f"L1 {(record_gt[field_name]-record[field_name]).abs_().sum()} " - ) - - -def file_read_write_worker(rank_to_fpath): - schema = { - "tf": SizeData(dtype="float32", shape=(112, 112)), - "ti": SizeData(dtype="int32", shape=(4, 64, 64)), - } - world_size = comm.get_world_size() - rank = comm.get_rank() - storage = SingleProcessFileTensorStorage(schema, rank_to_fpath[rank], "wb") - data_elts = [] - # prepare different number of tensors in different processes - for i in range(rank + 1): - data_elt = { - "tf": torch.ones((112, 112), dtype=torch.float32) * (rank + i * world_size), - "ti": torch.ones((4, 64, 64), dtype=torch.int32) * (rank + i * world_size), - } - data_elts.append(data_elt) - # write data to the single process storage - for i in range(rank + 1): - record_id = storage.put(data_elts[i]) - assert record_id == i, f"Process {rank}: record ID {record_id}, expected {i}" - comm.synchronize() - # gather all data in process rank 0 - multi_storage = storage_gather(storage) - if rank != 0: - return - # read and check data from the multiprocess storage - for j in range(world_size): - for i in range(j): - record = multi_storage.get(j, i) - record_gt = { - "tf": torch.ones((112, 112), dtype=torch.float32) * (j + i * world_size), - "ti": torch.ones((4, 64, 64), dtype=torch.int32) * (j + i * world_size), - } - assert len(record) == len(schema), ( - f"Process {rank}: multi storage record, rank {j}, id {i}: " - f"expected {len(schema)} fields in the record, got {len(record)}" - ) - for field_name in schema: - assert field_name in record, ( - f"Process {rank}: multi storage record, rank {j}, id {i}: " - f"field {field_name} not in the record" - ) - - assert record_gt[field_name].shape == record[field_name].shape, ( - f"Process {rank}: multi storage record, rank {j}, id {i}: " - f"field {field_name}, expected shape {record_gt[field_name].shape} " - f"got {record[field_name].shape}" - ) - assert record_gt[field_name].dtype == record[field_name].dtype, ( - f"Process {rank}: multi storage record, rank {j}, id {i}: " - f"field {field_name}, expected dtype {record_gt[field_name].dtype} " - f"got {record[field_name].dtype}" - ) - assert torch.allclose(record_gt[field_name], record[field_name]), ( - f"Process {rank}: multi storage record, rank {j}, id {i}: " - f"field {field_name}, tensors are not close enough:" - f"L-inf {(record_gt[field_name]-record[field_name]).abs_().max()} " - f"L1 {(record_gt[field_name]-record[field_name]).abs_().sum()} " - ) - - -class TestMultiProcessRamTensorStorage(unittest.TestCase): - def test_read_write_1(self): - launch(ram_read_write_worker, 8) - - -class TestMultiProcessFileTensorStorage(unittest.TestCase): - def test_read_write_1(self): - with ExitStack() as stack: - # WARNING: opens the files several times! may not work on all platforms - rank_to_fpath = { - i: stack.enter_context(tempfile.NamedTemporaryFile()).name for i in range(8) - } - launch(file_read_write_worker, 8, (rank_to_fpath,)) diff --git a/detectron2/projects/DensePose/tests/test_video_keyframe_dataset.py b/detectron2/projects/DensePose/tests/test_video_keyframe_dataset.py deleted file mode 100644 index 988e1616cdd30757157b479990050d1ca494ce7b..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/tests/test_video_keyframe_dataset.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import contextlib -import os -import random -import tempfile -import unittest -import torch -import torchvision.io as io - -from densepose.data.transform import ImageResizeTransform -from densepose.data.video import RandomKFramesSelector, VideoKeyframeDataset - -try: - import av -except ImportError: - av = None - - -# copied from torchvision test/test_io.py -def _create_video_frames(num_frames, height, width): - y, x = torch.meshgrid(torch.linspace(-2, 2, height), torch.linspace(-2, 2, width)) - data = [] - for i in range(num_frames): - xc = float(i) / num_frames - yc = 1 - float(i) / (2 * num_frames) - d = torch.exp(-((x - xc) ** 2 + (y - yc) ** 2) / 2) * 255 - data.append(d.unsqueeze(2).repeat(1, 1, 3).byte()) - return torch.stack(data, 0) - - -# adapted from torchvision test/test_io.py -@contextlib.contextmanager -def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, options=None): - if lossless: - if video_codec is not None: - raise ValueError("video_codec can't be specified together with lossless") - if options is not None: - raise ValueError("options can't be specified together with lossless") - video_codec = "libx264rgb" - options = {"crf": "0"} - if video_codec is None: - video_codec = "libx264" - if options is None: - options = {} - data = _create_video_frames(num_frames, height, width) - with tempfile.NamedTemporaryFile(suffix=".mp4") as f: - f.close() - io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options) - yield f.name, data - os.unlink(f.name) - - -@unittest.skipIf(av is None, "PyAV unavailable") -class TestVideoKeyframeDataset(unittest.TestCase): - def test_read_keyframes_all(self): - with temp_video(60, 300, 300, 5, video_codec="mpeg4") as (fname, data): - video_list = [fname] - category_list = [None] - dataset = VideoKeyframeDataset(video_list, category_list) - self.assertEqual(len(dataset), 1) - data1, categories1 = dataset[0]["images"], dataset[0]["categories"] - self.assertEqual(data1.shape, torch.Size((5, 3, 300, 300))) - self.assertEqual(data1.dtype, torch.float32) - self.assertIsNone(categories1[0]) - return - self.assertTrue(False) - - def test_read_keyframes_with_selector(self): - with temp_video(60, 300, 300, 5, video_codec="mpeg4") as (fname, data): - video_list = [fname] - category_list = [None] - random.seed(0) - frame_selector = RandomKFramesSelector(3) - dataset = VideoKeyframeDataset(video_list, category_list, frame_selector) - self.assertEqual(len(dataset), 1) - data1, categories1 = dataset[0]["images"], dataset[0]["categories"] - self.assertEqual(data1.shape, torch.Size((3, 3, 300, 300))) - self.assertEqual(data1.dtype, torch.float32) - self.assertIsNone(categories1[0]) - return - self.assertTrue(False) - - def test_read_keyframes_with_selector_with_transform(self): - with temp_video(60, 300, 300, 5, video_codec="mpeg4") as (fname, data): - video_list = [fname] - category_list = [None] - random.seed(0) - frame_selector = RandomKFramesSelector(1) - transform = ImageResizeTransform() - dataset = VideoKeyframeDataset(video_list, category_list, frame_selector, transform) - data1, categories1 = dataset[0]["images"], dataset[0]["categories"] - self.assertEqual(len(dataset), 1) - self.assertEqual(data1.shape, torch.Size((1, 3, 800, 800))) - self.assertEqual(data1.dtype, torch.float32) - self.assertIsNone(categories1[0]) - return - self.assertTrue(False) diff --git a/detectron2/projects/DensePose/train_net.py b/detectron2/projects/DensePose/train_net.py deleted file mode 100644 index 6c06011830a729f1f21585fad078308b56b75f88..0000000000000000000000000000000000000000 --- a/detectron2/projects/DensePose/train_net.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -DensePose Training Script. - -This script is similar to the training script in detectron2/tools. - -It is an example of how a user might use detectron2 for a new project. -""" - -from datetime import timedelta - -import detectron2.utils.comm as comm -from detectron2.config import get_cfg -from detectron2.engine import DEFAULT_TIMEOUT, default_argument_parser, default_setup, hooks, launch -from detectron2.evaluation import verify_results -from detectron2.utils.file_io import PathManager -from detectron2.utils.logger import setup_logger - -from densepose import add_densepose_config -from densepose.engine import Trainer -from densepose.modeling.densepose_checkpoint import DensePoseCheckpointer - - -def setup(args): - cfg = get_cfg() - add_densepose_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - # Setup logger for "densepose" module - setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="densepose") - return cfg - - -def main(args): - cfg = setup(args) - # disable strict kwargs checking: allow one to specify path handle - # hints through kwargs, like timeout in DP evaluation - PathManager.set_strict_kwargs_checking(False) - - if args.eval_only: - model = Trainer.build_model(cfg) - DensePoseCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - if cfg.TEST.AUG.ENABLED: - res.update(Trainer.test_with_TTA(cfg, model)) - if comm.is_main_process(): - verify_results(cfg, res) - return res - - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - if cfg.TEST.AUG.ENABLED: - trainer.register_hooks( - [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] - ) - return trainer.train() - - -def invoke_main() -> None: - args = default_argument_parser().parse_args() - cfg = setup(args) - timeout = ( - DEFAULT_TIMEOUT if cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE else timedelta(hours=4) - ) - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - timeout=timeout, - ) - - -if __name__ == "__main__": - invoke_main() # pragma: no cover diff --git a/detectron2/projects/MViTv2/README.md b/detectron2/projects/MViTv2/README.md deleted file mode 100644 index 64afd79cac8d83de5518b57199fd618eebe83645..0000000000000000000000000000000000000000 --- a/detectron2/projects/MViTv2/README.md +++ /dev/null @@ -1,142 +0,0 @@ -# MViTv2: Improved Multiscale Vision Transformers for Classification and Detection - -Yanghao Li*, Chao-Yuan Wu*, Haoqi Fan, Karttikeya Mangalam, Bo Xiong, Jitendra Malik, Christoph Feichtenhofer* - -[[`arXiv`](https://arxiv.org/abs/2112.01526)] [[`BibTeX`](#CitingMViTv2)] - -In this repository, we provide detection configs and models for MViTv2 (CVPR 2022) in Detectron2. For image classification tasks, please refer to [MViTv2 repo](https://github.com/facebookresearch/mvit). - -## Results and Pretrained Models - -### COCO - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namepre-trainMethodepochsbox
AP
mask
AP
#paramsFLOPSmodel iddownload
MViTV2-TIN1KMask R-CNN3648.343.844M279G307611773model
MViTV2-TIN1KCascade Mask R-CNN3652.245.076M701G308344828model
MViTV2-SIN1KCascade Mask R-CNN3653.246.087M748G308344647model
MViTV2-BIN1KCascade Mask R-CNN3654.146.7103M814G308109448model
MViTV2-BIN21KCascade Mask R-CNN3654.947.4103M814G309003202model
MViTV2-LIN21KCascade Mask R-CNN5055.848.3270M1519G308099658model
MViTV2-HIN21KCascade Mask R-CNN3656.148.5718M3084G309013744model
- -Note that the above models were trained and measured on 8-node with 64 NVIDIA A100 GPUs in total. The ImageNet pre-trained model weights are obtained from [MViTv2 repo](https://github.com/facebookresearch/mvit). - -## Training -All configs can be trained with: - -``` -../../tools/lazyconfig_train_net.py --config-file configs/path/to/config.py -``` -By default, we use 64 GPUs with batch size as 64 for training. - -## Evaluation -Model evaluation can be done similarly: -``` -../../tools/lazyconfig_train_net.py --config-file configs/path/to/config.py --eval-only train.init_checkpoint=/path/to/model_checkpoint -``` - - - -## Citing MViTv2 - -If you use MViTv2, please use the following BibTeX entry. - -```BibTeX -@inproceedings{li2021improved, - title={MViTv2: Improved multiscale vision transformers for classification and detection}, - author={Li, Yanghao and Wu, Chao-Yuan and Fan, Haoqi and Mangalam, Karttikeya and Xiong, Bo and Malik, Jitendra and Feichtenhofer, Christoph}, - booktitle={CVPR}, - year={2022} -} -``` diff --git a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_b_3x.py b/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_b_3x.py deleted file mode 100644 index 61366bf11477136e8950b81dd24a1a7af9b37f8b..0000000000000000000000000000000000000000 --- a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_b_3x.py +++ /dev/null @@ -1,8 +0,0 @@ -from .cascade_mask_rcnn_mvitv2_t_3x import model, dataloader, optimizer, lr_multiplier, train - - -model.backbone.bottom_up.depth = 24 -model.backbone.bottom_up.last_block_indexes = (1, 4, 20, 23) -model.backbone.bottom_up.drop_path_rate = 0.4 - -train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_B_in1k.pyth" diff --git a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_b_in21k_3x.py b/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_b_in21k_3x.py deleted file mode 100644 index 7c3bdce0a2206b3afd1a33245a193292f0cd2a35..0000000000000000000000000000000000000000 --- a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_b_in21k_3x.py +++ /dev/null @@ -1,3 +0,0 @@ -from .cascade_mask_rcnn_mvitv2_b_3x import model, dataloader, optimizer, lr_multiplier, train - -train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_B_in21k.pyth" diff --git a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_h_in21k_lsj_3x.py b/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_h_in21k_lsj_3x.py deleted file mode 100644 index 6fee5e99b7d5d611d27dca62a7db7d88808f87da..0000000000000000000000000000000000000000 --- a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_h_in21k_lsj_3x.py +++ /dev/null @@ -1,12 +0,0 @@ -from .cascade_mask_rcnn_mvitv2_b_3x import model, optimizer, train, lr_multiplier -from .common.coco_loader_lsj import dataloader - - -model.backbone.bottom_up.embed_dim = 192 -model.backbone.bottom_up.depth = 80 -model.backbone.bottom_up.num_heads = 3 -model.backbone.bottom_up.last_block_indexes = (3, 11, 71, 79) -model.backbone.bottom_up.drop_path_rate = 0.6 -model.backbone.bottom_up.use_act_checkpoint = True - -train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_H_in21k.pyth" diff --git a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_l_in21k_lsj_50ep.py b/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_l_in21k_lsj_50ep.py deleted file mode 100644 index 38da8958e0174d378555887d72a9956f4b3f8e58..0000000000000000000000000000000000000000 --- a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_l_in21k_lsj_50ep.py +++ /dev/null @@ -1,31 +0,0 @@ -from fvcore.common.param_scheduler import MultiStepParamScheduler - -from detectron2.config import LazyCall as L -from detectron2.solver import WarmupParamScheduler - -from .cascade_mask_rcnn_mvitv2_b_3x import model, optimizer, train -from .common.coco_loader_lsj import dataloader - - -model.backbone.bottom_up.embed_dim = 144 -model.backbone.bottom_up.depth = 48 -model.backbone.bottom_up.num_heads = 2 -model.backbone.bottom_up.last_block_indexes = (1, 7, 43, 47) -model.backbone.bottom_up.drop_path_rate = 0.5 - -train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_L_in21k.pyth" - -# Schedule -# 50ep = 184375 // 2 iters * 64 images/iter / 118000 images/ep -train.max_iter = 184375 // 2 -lr_multiplier = L(WarmupParamScheduler)( - scheduler=L(MultiStepParamScheduler)( - values=[1.0, 0.1, 0.01], - milestones=[163889 // 2, 177546 // 2], - num_updates=train.max_iter, - ), - warmup_length=250 / train.max_iter, - warmup_factor=0.001, -) - -optimizer.lr = 1e-4 diff --git a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_s_3x.py b/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_s_3x.py deleted file mode 100644 index ad8eeb4df25476893c5a966a669ecceaec2a6dbc..0000000000000000000000000000000000000000 --- a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_s_3x.py +++ /dev/null @@ -1,7 +0,0 @@ -from .cascade_mask_rcnn_mvitv2_t_3x import model, dataloader, optimizer, lr_multiplier, train - - -model.backbone.bottom_up.depth = 16 -model.backbone.bottom_up.last_block_indexes = (0, 2, 13, 15) - -train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_S_in1k.pyth" diff --git a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_t_3x.py b/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_t_3x.py deleted file mode 100644 index 51327dd9379b011c2d6cdc8299515b6df8112f4e..0000000000000000000000000000000000000000 --- a/detectron2/projects/MViTv2/configs/cascade_mask_rcnn_mvitv2_t_3x.py +++ /dev/null @@ -1,48 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.layers import ShapeSpec -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.matcher import Matcher -from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads -from detectron2.layers.batch_norm import NaiveSyncBatchNorm - -from .mask_rcnn_mvitv2_t_3x import model, dataloader, optimizer, lr_multiplier, train - - -# arguments that don't exist for Cascade R-CNN -[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]] - -model.roi_heads.update( - _target_=CascadeROIHeads, - box_heads=[ - L(FastRCNNConvFCHead)( - input_shape=ShapeSpec(channels=256, height=7, width=7), - conv_dims=[256, 256, 256, 256], - fc_dims=[1024], - conv_norm=lambda c: NaiveSyncBatchNorm(c, stats_mode="N"), - ) - for _ in range(3) - ], - box_predictors=[ - L(FastRCNNOutputLayers)( - input_shape=ShapeSpec(channels=1024), - test_score_thresh=0.05, - box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)), - cls_agnostic_bbox_reg=True, - num_classes="${...num_classes}", - ) - for (w1, w2) in [(10, 5), (20, 10), (30, 15)] - ], - proposal_matchers=[ - L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False) - for th in [0.5, 0.6, 0.7] - ], -) - -# Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by -# torch.nn.SyncBatchNorm. We can remove this after -# https://github.com/pytorch/pytorch/issues/36530 is fixed. -model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c, stats_mode="N") - -# 2conv in RPN: -# https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97 # noqa: E501, B950 -model.proposal_generator.head.conv_dims = [-1, -1] diff --git a/detectron2/projects/MViTv2/configs/common/coco_loader.py b/detectron2/projects/MViTv2/configs/common/coco_loader.py deleted file mode 100644 index 923878b8d4cdda9292738550f1c6aa18e38d5757..0000000000000000000000000000000000000000 --- a/detectron2/projects/MViTv2/configs/common/coco_loader.py +++ /dev/null @@ -1,59 +0,0 @@ -from omegaconf import OmegaConf - -import detectron2.data.transforms as T -from detectron2.config import LazyCall as L -from detectron2.data import ( - DatasetMapper, - build_detection_test_loader, - build_detection_train_loader, - get_detection_dataset_dicts, -) -from detectron2.evaluation import COCOEvaluator - -dataloader = OmegaConf.create() - -dataloader.train = L(build_detection_train_loader)( - dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"), - mapper=L(DatasetMapper)( - is_train=True, - augmentations=[ - L(T.RandomApply)( - tfm_or_aug=L(T.AugmentationList)( - augs=[ - L(T.ResizeShortestEdge)( - short_edge_length=[400, 500, 600], sample_style="choice" - ), - L(T.RandomCrop)(crop_type="absolute_range", crop_size=(384, 600)), - ] - ), - prob=0.5, - ), - L(T.ResizeShortestEdge)( - short_edge_length=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), - sample_style="choice", - max_size=1333, - ), - L(T.RandomFlip)(horizontal=True), - ], - image_format="RGB", - use_instance_mask=True, - ), - total_batch_size=16, - num_workers=4, -) - -dataloader.test = L(build_detection_test_loader)( - dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False), - mapper=L(DatasetMapper)( - is_train=False, - augmentations=[ - L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333), - ], - image_format="${...train.mapper.image_format}", - ), - num_workers=4, -) - -dataloader.evaluator = L(COCOEvaluator)( - dataset_name="${..test.dataset.names}", -) diff --git a/detectron2/projects/MViTv2/configs/common/coco_loader_lsj.py b/detectron2/projects/MViTv2/configs/common/coco_loader_lsj.py deleted file mode 100644 index 019b21fb23299542f757459da12a56df1c538e2b..0000000000000000000000000000000000000000 --- a/detectron2/projects/MViTv2/configs/common/coco_loader_lsj.py +++ /dev/null @@ -1,19 +0,0 @@ -import detectron2.data.transforms as T -from detectron2 import model_zoo -from detectron2.config import LazyCall as L - -from .coco_loader import dataloader - -# Data using LSJ -image_size = 1024 -dataloader.train.mapper.augmentations = [ - L(T.RandomFlip)(horizontal=True), # flip first - L(T.ResizeScale)( - min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size - ), - L(T.FixedSizeCrop)(crop_size=(image_size, image_size)), -] -dataloader.train.mapper.image_format = "RGB" -dataloader.train.total_batch_size = 64 -# recompute boxes due to cropping -dataloader.train.mapper.recompute_boxes = True diff --git a/detectron2/projects/MViTv2/configs/mask_rcnn_mvitv2_t_3x.py b/detectron2/projects/MViTv2/configs/mask_rcnn_mvitv2_t_3x.py deleted file mode 100644 index ba4bdfecf2fc996f3e06480a2f02781c71b5aa44..0000000000000000000000000000000000000000 --- a/detectron2/projects/MViTv2/configs/mask_rcnn_mvitv2_t_3x.py +++ /dev/null @@ -1,55 +0,0 @@ -from functools import partial -import torch.nn as nn -from fvcore.common.param_scheduler import MultiStepParamScheduler - -from detectron2 import model_zoo -from detectron2.config import LazyCall as L -from detectron2.solver import WarmupParamScheduler -from detectron2.modeling import MViT - -from .common.coco_loader import dataloader - -model = model_zoo.get_config("common/models/mask_rcnn_fpn.py").model -constants = model_zoo.get_config("common/data/constants.py").constants -model.pixel_mean = constants.imagenet_rgb256_mean -model.pixel_std = constants.imagenet_rgb256_std -model.input_format = "RGB" -model.backbone.bottom_up = L(MViT)( - embed_dim=96, - depth=10, - num_heads=1, - last_block_indexes=(0, 2, 7, 9), - residual_pooling=True, - drop_path_rate=0.2, - norm_layer=partial(nn.LayerNorm, eps=1e-6), - out_features=("scale2", "scale3", "scale4", "scale5"), -) -model.backbone.in_features = "${.bottom_up.out_features}" - - -# Initialization and trainer settings -train = model_zoo.get_config("common/train.py").train -train.amp.enabled = True -train.ddp.fp16_compression = True -train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_T_in1k.pyth" - -dataloader.train.total_batch_size = 64 - -# 36 epochs -train.max_iter = 67500 -lr_multiplier = L(WarmupParamScheduler)( - scheduler=L(MultiStepParamScheduler)( - values=[1.0, 0.1, 0.01], - milestones=[52500, 62500, 67500], - ), - warmup_length=250 / train.max_iter, - warmup_factor=0.001, -) - -optimizer = model_zoo.get_config("common/optim.py").AdamW -optimizer.params.overrides = { - "pos_embed": {"weight_decay": 0.0}, - "rel_pos_h": {"weight_decay": 0.0}, - "rel_pos_w": {"weight_decay": 0.0}, -} -optimizer.lr = 1.6e-4 diff --git a/detectron2/projects/Panoptic-DeepLab/README.md b/detectron2/projects/Panoptic-DeepLab/README.md deleted file mode 100644 index 86b6d42ba059d7da602b95cfdf3fe7d37ea7d4ec..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/README.md +++ /dev/null @@ -1,175 +0,0 @@ -# Panoptic-DeepLab: A Simple, Strong, and Fast Baseline for Bottom-Up Panoptic Segmentation - -Bowen Cheng, Maxwell D. Collins, Yukun Zhu, Ting Liu, Thomas S. Huang, Hartwig Adam, Liang-Chieh Chen - -[[`arXiv`](https://arxiv.org/abs/1911.10194)] [[`BibTeX`](#CitingPanopticDeepLab)] [[`Reference implementation`](https://github.com/bowenc0221/panoptic-deeplab)] - -
- -

- -## Installation -Install Detectron2 following [the instructions](https://detectron2.readthedocs.io/tutorials/install.html). -To use cityscapes, prepare data follow the [tutorial](https://detectron2.readthedocs.io/tutorials/builtin_datasets.html#expected-dataset-structure-for-cityscapes). - -## Training - -To train a model with 8 GPUs run: -```bash -cd /path/to/detectron2/projects/Panoptic-DeepLab -python train_net.py --config-file configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024_dsconv.yaml --num-gpus 8 -``` - -## Evaluation - -Model evaluation can be done similarly: -```bash -cd /path/to/detectron2/projects/Panoptic-DeepLab -python train_net.py --config-file configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024_dsconv.yaml --eval-only MODEL.WEIGHTS /path/to/model_checkpoint -``` - -## Benchmark network speed - -If you want to benchmark the network speed without post-processing, you can run the evaluation script with `MODEL.PANOPTIC_DEEPLAB.BENCHMARK_NETWORK_SPEED True`: -```bash -cd /path/to/detectron2/projects/Panoptic-DeepLab -python train_net.py --config-file configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024_dsconv.yaml --eval-only MODEL.WEIGHTS /path/to/model_checkpoint MODEL.PANOPTIC_DEEPLAB.BENCHMARK_NETWORK_SPEED True -``` - -## Cityscapes Panoptic Segmentation -Cityscapes models are trained with ImageNet pretraining. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MethodBackboneOutput
resolution
PQSQRQmIoUAPMemory (M)model iddownload
Panoptic-DeepLabR50-DC51024×2048 58.6 80.9 71.2 75.9 29.8 8668 - model | metrics
Panoptic-DeepLabR52-DC51024×2048 60.3 81.5 72.9 78.2 33.2 9682 30841561 model | metrics
Panoptic-DeepLab (DSConv)R52-DC51024×2048 60.3 81.0 73.2 78.7 32.1 10466 33148034 model | metrics
- -Note: -- [R52](https://dl.fbaipublicfiles.com/detectron2/DeepLab/R-52.pkl): a ResNet-50 with its first 7x7 convolution replaced by 3 3x3 convolutions. This modification has been used in most semantic segmentation papers. We pre-train this backbone on ImageNet using the default recipe of [pytorch examples](https://github.com/pytorch/examples/tree/master/imagenet). -- DC5 means using dilated convolution in `res5`. -- We use a smaller training crop size (512x1024) than the original paper (1025x2049), we find using larger crop size (1024x2048) could further improve PQ by 1.5% but also degrades AP by 3%. -- The implementation with regular Conv2d in ASPP and head is much heavier head than the original paper. -- This implementation does not include optimized post-processing code needed for deployment. Post-processing the network - outputs now takes similar amount of time to the network itself. Please refer to speed in the - original paper for comparison. -- DSConv refers to using DepthwiseSeparableConv2d in ASPP and decoder. The implementation with DSConv is identical to the original paper. - -## COCO Panoptic Segmentation -COCO models are trained with ImageNet pretraining on 16 V100s. - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MethodBackboneOutput
resolution
PQSQRQBox APMask APMemory (M)model iddownload
Panoptic-DeepLab (DSConv)R52-DC5640×640 35.5 77.3 44.7 18.6 19.7 246448865 model | metrics
- -Note: -- [R52](https://dl.fbaipublicfiles.com/detectron2/DeepLab/R-52.pkl): a ResNet-50 with its first 7x7 convolution replaced by 3 3x3 convolutions. This modification has been used in most semantic segmentation papers. We pre-train this backbone on ImageNet using the default recipe of [pytorch examples](https://github.com/pytorch/examples/tree/master/imagenet). -- DC5 means using dilated convolution in `res5`. -- This reproduced number matches the original paper (35.5 vs. 35.1 PQ). -- This implementation does not include optimized post-processing code needed for deployment. Post-processing the network - outputs now takes more time than the network itself. Please refer to speed in the original paper for comparison. -- DSConv refers to using DepthwiseSeparableConv2d in ASPP and decoder. - -## Citing Panoptic-DeepLab - -If you use Panoptic-DeepLab, please use the following BibTeX entry. - -* CVPR 2020 paper: - -``` -@inproceedings{cheng2020panoptic, - title={Panoptic-DeepLab: A Simple, Strong, and Fast Baseline for Bottom-Up Panoptic Segmentation}, - author={Cheng, Bowen and Collins, Maxwell D and Zhu, Yukun and Liu, Ting and Huang, Thomas S and Adam, Hartwig and Chen, Liang-Chieh}, - booktitle={CVPR}, - year={2020} -} -``` - -* ICCV 2019 COCO-Mapillary workshp challenge report: - -``` -@inproceedings{cheng2019panoptic, - title={Panoptic-DeepLab}, - author={Cheng, Bowen and Collins, Maxwell D and Zhu, Yukun and Liu, Ting and Huang, Thomas S and Adam, Hartwig and Chen, Liang-Chieh}, - booktitle={ICCV COCO + Mapillary Joint Recognition Challenge Workshop}, - year={2019} -} -``` diff --git a/detectron2/projects/Panoptic-DeepLab/configs/COCO-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_200k_bs64_crop_640_640_coco_dsconv.yaml b/detectron2/projects/Panoptic-DeepLab/configs/COCO-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_200k_bs64_crop_640_640_coco_dsconv.yaml deleted file mode 100644 index 6944c6fdf3dcaafdc0a740188610fe604cb7d3be..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/configs/COCO-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_200k_bs64_crop_640_640_coco_dsconv.yaml +++ /dev/null @@ -1,42 +0,0 @@ -_BASE_: ../Cityscapes-PanopticSegmentation/Base-PanopticDeepLab-OS16.yaml -MODEL: - WEIGHTS: "detectron2://DeepLab/R-52.pkl" - PIXEL_MEAN: [123.675, 116.280, 103.530] - PIXEL_STD: [58.395, 57.120, 57.375] - BACKBONE: - NAME: "build_resnet_deeplab_backbone" - RESNETS: - DEPTH: 50 - NORM: "SyncBN" - RES5_MULTI_GRID: [1, 2, 4] - STEM_TYPE: "deeplab" - STEM_OUT_CHANNELS: 128 - STRIDE_IN_1X1: False - SEM_SEG_HEAD: - NUM_CLASSES: 133 - LOSS_TOP_K: 1.0 - USE_DEPTHWISE_SEPARABLE_CONV: True - PANOPTIC_DEEPLAB: - STUFF_AREA: 4096 - NMS_KERNEL: 41 - SIZE_DIVISIBILITY: 640 - USE_DEPTHWISE_SEPARABLE_CONV: True -DATASETS: - TRAIN: ("coco_2017_train_panoptic",) - TEST: ("coco_2017_val_panoptic",) -SOLVER: - BASE_LR: 0.0005 - MAX_ITER: 200000 - IMS_PER_BATCH: 64 -INPUT: - FORMAT: "RGB" - GAUSSIAN_SIGMA: 8 - MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 16)]"] - MIN_SIZE_TRAIN_SAMPLING: "choice" - MIN_SIZE_TEST: 640 - MAX_SIZE_TRAIN: 960 - MAX_SIZE_TEST: 640 - CROP: - ENABLED: True - TYPE: "absolute" - SIZE: (640, 640) diff --git a/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/Base-PanopticDeepLab-OS16.yaml b/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/Base-PanopticDeepLab-OS16.yaml deleted file mode 100644 index b7379980fdace160f385f0647e95325830b6bfd7..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/Base-PanopticDeepLab-OS16.yaml +++ /dev/null @@ -1,65 +0,0 @@ -MODEL: - META_ARCHITECTURE: "PanopticDeepLab" - BACKBONE: - FREEZE_AT: 0 - RESNETS: - OUT_FEATURES: ["res2", "res3", "res5"] - RES5_DILATION: 2 - SEM_SEG_HEAD: - NAME: "PanopticDeepLabSemSegHead" - IN_FEATURES: ["res2", "res3", "res5"] - PROJECT_FEATURES: ["res2", "res3"] - PROJECT_CHANNELS: [32, 64] - ASPP_CHANNELS: 256 - ASPP_DILATIONS: [6, 12, 18] - ASPP_DROPOUT: 0.1 - HEAD_CHANNELS: 256 - CONVS_DIM: 256 - COMMON_STRIDE: 4 - NUM_CLASSES: 19 - LOSS_TYPE: "hard_pixel_mining" - NORM: "SyncBN" - INS_EMBED_HEAD: - NAME: "PanopticDeepLabInsEmbedHead" - IN_FEATURES: ["res2", "res3", "res5"] - PROJECT_FEATURES: ["res2", "res3"] - PROJECT_CHANNELS: [32, 64] - ASPP_CHANNELS: 256 - ASPP_DILATIONS: [6, 12, 18] - ASPP_DROPOUT: 0.1 - HEAD_CHANNELS: 32 - CONVS_DIM: 128 - COMMON_STRIDE: 4 - NORM: "SyncBN" - CENTER_LOSS_WEIGHT: 200.0 - OFFSET_LOSS_WEIGHT: 0.01 - PANOPTIC_DEEPLAB: - STUFF_AREA: 2048 - CENTER_THRESHOLD: 0.1 - NMS_KERNEL: 7 - TOP_K_INSTANCE: 200 -DATASETS: - TRAIN: ("cityscapes_fine_panoptic_train",) - TEST: ("cityscapes_fine_panoptic_val",) -SOLVER: - OPTIMIZER: "ADAM" - BASE_LR: 0.001 - WEIGHT_DECAY: 0.0 - WEIGHT_DECAY_NORM: 0.0 - WEIGHT_DECAY_BIAS: 0.0 - MAX_ITER: 60000 - LR_SCHEDULER_NAME: "WarmupPolyLR" - IMS_PER_BATCH: 32 -INPUT: - MIN_SIZE_TRAIN: (512, 640, 704, 832, 896, 1024, 1152, 1216, 1344, 1408, 1536, 1664, 1728, 1856, 1920, 2048) - MIN_SIZE_TRAIN_SAMPLING: "choice" - MIN_SIZE_TEST: 1024 - MAX_SIZE_TRAIN: 4096 - MAX_SIZE_TEST: 2048 - CROP: - ENABLED: True - TYPE: "absolute" - SIZE: (1024, 2048) -DATALOADER: - NUM_WORKERS: 10 -VERSION: 2 diff --git a/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024.yaml b/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024.yaml deleted file mode 100644 index fde902bb2a87ccaf2c6fea4e79be4144ca44e239..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: Base-PanopticDeepLab-OS16.yaml -MODEL: - WEIGHTS: "detectron2://DeepLab/R-52.pkl" - PIXEL_MEAN: [123.675, 116.280, 103.530] - PIXEL_STD: [58.395, 57.120, 57.375] - BACKBONE: - NAME: "build_resnet_deeplab_backbone" - RESNETS: - DEPTH: 50 - NORM: "SyncBN" - RES5_MULTI_GRID: [1, 2, 4] - STEM_TYPE: "deeplab" - STEM_OUT_CHANNELS: 128 - STRIDE_IN_1X1: False -SOLVER: - MAX_ITER: 90000 -INPUT: - FORMAT: "RGB" - CROP: - SIZE: (512, 1024) diff --git a/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024_dsconv.yaml b/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024_dsconv.yaml deleted file mode 100644 index 8e314204c9b464993d92d3b4d95e2aa9b287b938..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/configs/Cityscapes-PanopticSegmentation/panoptic_deeplab_R_52_os16_mg124_poly_90k_bs32_crop_512_1024_dsconv.yaml +++ /dev/null @@ -1,24 +0,0 @@ -_BASE_: Base-PanopticDeepLab-OS16.yaml -MODEL: - WEIGHTS: "detectron2://DeepLab/R-52.pkl" - PIXEL_MEAN: [123.675, 116.280, 103.530] - PIXEL_STD: [58.395, 57.120, 57.375] - BACKBONE: - NAME: "build_resnet_deeplab_backbone" - RESNETS: - DEPTH: 50 - NORM: "SyncBN" - RES5_MULTI_GRID: [1, 2, 4] - STEM_TYPE: "deeplab" - STEM_OUT_CHANNELS: 128 - STRIDE_IN_1X1: False - PANOPTIC_DEEPLAB: - USE_DEPTHWISE_SEPARABLE_CONV: True - SEM_SEG_HEAD: - USE_DEPTHWISE_SEPARABLE_CONV: True -SOLVER: - MAX_ITER: 90000 -INPUT: - FORMAT: "RGB" - CROP: - SIZE: (512, 1024) diff --git a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/__init__.py b/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/__init__.py deleted file mode 100644 index 8d3c980643bbd385594850bfbffa84cd1412c162..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .config import add_panoptic_deeplab_config -from .dataset_mapper import PanopticDeeplabDatasetMapper -from .panoptic_seg import ( - PanopticDeepLab, - INS_EMBED_BRANCHES_REGISTRY, - build_ins_embed_branch, - PanopticDeepLabSemSegHead, - PanopticDeepLabInsEmbedHead, -) diff --git a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/config.py b/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/config.py deleted file mode 100644 index 5aa2d280c66dbccc9ff8c3ccf39ccfbfc1eaa430..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/config.py +++ /dev/null @@ -1,59 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -from detectron2.config import CfgNode as CN -from detectron2.projects.deeplab import add_deeplab_config - - -def add_panoptic_deeplab_config(cfg): - """ - Add config for Panoptic-DeepLab. - """ - # Reuse DeepLab config. - add_deeplab_config(cfg) - # Target generation parameters. - cfg.INPUT.GAUSSIAN_SIGMA = 10 - cfg.INPUT.IGNORE_STUFF_IN_OFFSET = True - cfg.INPUT.SMALL_INSTANCE_AREA = 4096 - cfg.INPUT.SMALL_INSTANCE_WEIGHT = 3 - cfg.INPUT.IGNORE_CROWD_IN_SEMANTIC = False - # Optimizer type. - cfg.SOLVER.OPTIMIZER = "ADAM" - # Panoptic-DeepLab semantic segmentation head. - # We add an extra convolution before predictor. - cfg.MODEL.SEM_SEG_HEAD.HEAD_CHANNELS = 256 - cfg.MODEL.SEM_SEG_HEAD.LOSS_TOP_K = 0.2 - # Panoptic-DeepLab instance segmentation head. - cfg.MODEL.INS_EMBED_HEAD = CN() - cfg.MODEL.INS_EMBED_HEAD.NAME = "PanopticDeepLabInsEmbedHead" - cfg.MODEL.INS_EMBED_HEAD.IN_FEATURES = ["res2", "res3", "res5"] - cfg.MODEL.INS_EMBED_HEAD.PROJECT_FEATURES = ["res2", "res3"] - cfg.MODEL.INS_EMBED_HEAD.PROJECT_CHANNELS = [32, 64] - cfg.MODEL.INS_EMBED_HEAD.ASPP_CHANNELS = 256 - cfg.MODEL.INS_EMBED_HEAD.ASPP_DILATIONS = [6, 12, 18] - cfg.MODEL.INS_EMBED_HEAD.ASPP_DROPOUT = 0.1 - # We add an extra convolution before predictor. - cfg.MODEL.INS_EMBED_HEAD.HEAD_CHANNELS = 32 - cfg.MODEL.INS_EMBED_HEAD.CONVS_DIM = 128 - cfg.MODEL.INS_EMBED_HEAD.COMMON_STRIDE = 4 - cfg.MODEL.INS_EMBED_HEAD.NORM = "SyncBN" - cfg.MODEL.INS_EMBED_HEAD.CENTER_LOSS_WEIGHT = 200.0 - cfg.MODEL.INS_EMBED_HEAD.OFFSET_LOSS_WEIGHT = 0.01 - # Panoptic-DeepLab post-processing setting. - cfg.MODEL.PANOPTIC_DEEPLAB = CN() - # Stuff area limit, ignore stuff region below this number. - cfg.MODEL.PANOPTIC_DEEPLAB.STUFF_AREA = 2048 - cfg.MODEL.PANOPTIC_DEEPLAB.CENTER_THRESHOLD = 0.1 - cfg.MODEL.PANOPTIC_DEEPLAB.NMS_KERNEL = 7 - cfg.MODEL.PANOPTIC_DEEPLAB.TOP_K_INSTANCE = 200 - # If set to False, Panoptic-DeepLab will not evaluate instance segmentation. - cfg.MODEL.PANOPTIC_DEEPLAB.PREDICT_INSTANCES = True - cfg.MODEL.PANOPTIC_DEEPLAB.USE_DEPTHWISE_SEPARABLE_CONV = False - # This is the padding parameter for images with various sizes. ASPP layers - # requires input images to be divisible by the average pooling size and we - # can use `MODEL.PANOPTIC_DEEPLAB.SIZE_DIVISIBILITY` to pad all images to - # a fixed resolution (e.g. 640x640 for COCO) to avoid having a image size - # that is not divisible by ASPP average pooling size. - cfg.MODEL.PANOPTIC_DEEPLAB.SIZE_DIVISIBILITY = -1 - # Only evaluates network speed (ignores post-processing). - cfg.MODEL.PANOPTIC_DEEPLAB.BENCHMARK_NETWORK_SPEED = False diff --git a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/dataset_mapper.py b/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/dataset_mapper.py deleted file mode 100644 index 53272c726af810efc248f2428dda7ca7271fcd00..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/dataset_mapper.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import logging -import numpy as np -from typing import Callable, List, Union -import torch -from panopticapi.utils import rgb2id - -from detectron2.config import configurable -from detectron2.data import MetadataCatalog -from detectron2.data import detection_utils as utils -from detectron2.data import transforms as T - -from .target_generator import PanopticDeepLabTargetGenerator - -__all__ = ["PanopticDeeplabDatasetMapper"] - - -class PanopticDeeplabDatasetMapper: - """ - The callable currently does the following: - - 1. Read the image from "file_name" and label from "pan_seg_file_name" - 2. Applies random scale, crop and flip transforms to image and label - 3. Prepare data to Tensor and generate training targets from label - """ - - @configurable - def __init__( - self, - *, - augmentations: List[Union[T.Augmentation, T.Transform]], - image_format: str, - panoptic_target_generator: Callable, - ): - """ - NOTE: this interface is experimental. - - Args: - augmentations: a list of augmentations or deterministic transforms to apply - image_format: an image format supported by :func:`detection_utils.read_image`. - panoptic_target_generator: a callable that takes "panoptic_seg" and - "segments_info" to generate training targets for the model. - """ - # fmt: off - self.augmentations = T.AugmentationList(augmentations) - self.image_format = image_format - # fmt: on - logger = logging.getLogger(__name__) - logger.info("Augmentations used in training: " + str(augmentations)) - - self.panoptic_target_generator = panoptic_target_generator - - @classmethod - def from_config(cls, cfg): - augs = [ - T.ResizeShortestEdge( - cfg.INPUT.MIN_SIZE_TRAIN, - cfg.INPUT.MAX_SIZE_TRAIN, - cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING, - ) - ] - if cfg.INPUT.CROP.ENABLED: - augs.append(T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) - augs.append(T.RandomFlip()) - - # Assume always applies to the training set. - dataset_names = cfg.DATASETS.TRAIN - meta = MetadataCatalog.get(dataset_names[0]) - panoptic_target_generator = PanopticDeepLabTargetGenerator( - ignore_label=meta.ignore_label, - thing_ids=list(meta.thing_dataset_id_to_contiguous_id.values()), - sigma=cfg.INPUT.GAUSSIAN_SIGMA, - ignore_stuff_in_offset=cfg.INPUT.IGNORE_STUFF_IN_OFFSET, - small_instance_area=cfg.INPUT.SMALL_INSTANCE_AREA, - small_instance_weight=cfg.INPUT.SMALL_INSTANCE_WEIGHT, - ignore_crowd_in_semantic=cfg.INPUT.IGNORE_CROWD_IN_SEMANTIC, - ) - - ret = { - "augmentations": augs, - "image_format": cfg.INPUT.FORMAT, - "panoptic_target_generator": panoptic_target_generator, - } - return ret - - def __call__(self, dataset_dict): - """ - Args: - dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. - - Returns: - dict: a format that builtin models in detectron2 accept - """ - dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below - # Load image. - image = utils.read_image(dataset_dict["file_name"], format=self.image_format) - utils.check_image_size(dataset_dict, image) - # Panoptic label is encoded in RGB image. - pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") - - # Reuses semantic transform for panoptic labels. - aug_input = T.AugInput(image, sem_seg=pan_seg_gt) - _ = self.augmentations(aug_input) - image, pan_seg_gt = aug_input.image, aug_input.sem_seg - - # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, - # but not efficient on large generic data structures due to the use of pickle & mp.Queue. - # Therefore it's important to use torch.Tensor. - dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) - - # Generates training targets for Panoptic-DeepLab. - targets = self.panoptic_target_generator(rgb2id(pan_seg_gt), dataset_dict["segments_info"]) - dataset_dict.update(targets) - - return dataset_dict diff --git a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/panoptic_seg.py b/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/panoptic_seg.py deleted file mode 100644 index c12ca74e3b281e74e8893c87d2ba7e2b60931c65..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/panoptic_seg.py +++ /dev/null @@ -1,572 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -from typing import Callable, Dict, List, Union -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.data import MetadataCatalog -from detectron2.layers import Conv2d, DepthwiseSeparableConv2d, ShapeSpec, get_norm -from detectron2.modeling import ( - META_ARCH_REGISTRY, - SEM_SEG_HEADS_REGISTRY, - build_backbone, - build_sem_seg_head, -) -from detectron2.modeling.postprocessing import sem_seg_postprocess -from detectron2.projects.deeplab import DeepLabV3PlusHead -from detectron2.projects.deeplab.loss import DeepLabCE -from detectron2.structures import BitMasks, ImageList, Instances -from detectron2.utils.registry import Registry - -from .post_processing import get_panoptic_segmentation - -__all__ = ["PanopticDeepLab", "INS_EMBED_BRANCHES_REGISTRY", "build_ins_embed_branch"] - - -INS_EMBED_BRANCHES_REGISTRY = Registry("INS_EMBED_BRANCHES") -INS_EMBED_BRANCHES_REGISTRY.__doc__ = """ -Registry for instance embedding branches, which make instance embedding -predictions from feature maps. -""" - - -@META_ARCH_REGISTRY.register() -class PanopticDeepLab(nn.Module): - """ - Main class for panoptic segmentation architectures. - """ - - def __init__(self, cfg): - super().__init__() - self.backbone = build_backbone(cfg) - self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape()) - self.ins_embed_head = build_ins_embed_branch(cfg, self.backbone.output_shape()) - self.register_buffer("pixel_mean", torch.tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1), False) - self.register_buffer("pixel_std", torch.tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1), False) - self.meta = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) - self.stuff_area = cfg.MODEL.PANOPTIC_DEEPLAB.STUFF_AREA - self.threshold = cfg.MODEL.PANOPTIC_DEEPLAB.CENTER_THRESHOLD - self.nms_kernel = cfg.MODEL.PANOPTIC_DEEPLAB.NMS_KERNEL - self.top_k = cfg.MODEL.PANOPTIC_DEEPLAB.TOP_K_INSTANCE - self.predict_instances = cfg.MODEL.PANOPTIC_DEEPLAB.PREDICT_INSTANCES - self.use_depthwise_separable_conv = cfg.MODEL.PANOPTIC_DEEPLAB.USE_DEPTHWISE_SEPARABLE_CONV - assert ( - cfg.MODEL.SEM_SEG_HEAD.USE_DEPTHWISE_SEPARABLE_CONV - == cfg.MODEL.PANOPTIC_DEEPLAB.USE_DEPTHWISE_SEPARABLE_CONV - ) - self.size_divisibility = cfg.MODEL.PANOPTIC_DEEPLAB.SIZE_DIVISIBILITY - self.benchmark_network_speed = cfg.MODEL.PANOPTIC_DEEPLAB.BENCHMARK_NETWORK_SPEED - - @property - def device(self): - return self.pixel_mean.device - - def forward(self, batched_inputs): - """ - Args: - batched_inputs: a list, batched outputs of :class:`DatasetMapper`. - Each item in the list contains the inputs for one image. - For now, each item in the list is a dict that contains: - * "image": Tensor, image in (C, H, W) format. - * "sem_seg": semantic segmentation ground truth - * "center": center points heatmap ground truth - * "offset": pixel offsets to center points ground truth - * Other information that's included in the original dicts, such as: - "height", "width" (int): the output resolution of the model (may be different - from input resolution), used in inference. - Returns: - list[dict]: - each dict is the results for one image. The dict contains the following keys: - - * "panoptic_seg", "sem_seg": see documentation - :doc:`/tutorials/models` for the standard output format - * "instances": available if ``predict_instances is True``. see documentation - :doc:`/tutorials/models` for the standard output format - """ - images = [x["image"].to(self.device) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - # To avoid error in ASPP layer when input has different size. - size_divisibility = ( - self.size_divisibility - if self.size_divisibility > 0 - else self.backbone.size_divisibility - ) - images = ImageList.from_tensors(images, size_divisibility) - - features = self.backbone(images.tensor) - - losses = {} - if "sem_seg" in batched_inputs[0]: - targets = [x["sem_seg"].to(self.device) for x in batched_inputs] - targets = ImageList.from_tensors( - targets, size_divisibility, self.sem_seg_head.ignore_value - ).tensor - if "sem_seg_weights" in batched_inputs[0]: - # The default D2 DatasetMapper may not contain "sem_seg_weights" - # Avoid error in testing when default DatasetMapper is used. - weights = [x["sem_seg_weights"].to(self.device) for x in batched_inputs] - weights = ImageList.from_tensors(weights, size_divisibility).tensor - else: - weights = None - else: - targets = None - weights = None - sem_seg_results, sem_seg_losses = self.sem_seg_head(features, targets, weights) - losses.update(sem_seg_losses) - - if "center" in batched_inputs[0] and "offset" in batched_inputs[0]: - center_targets = [x["center"].to(self.device) for x in batched_inputs] - center_targets = ImageList.from_tensors( - center_targets, size_divisibility - ).tensor.unsqueeze(1) - center_weights = [x["center_weights"].to(self.device) for x in batched_inputs] - center_weights = ImageList.from_tensors(center_weights, size_divisibility).tensor - - offset_targets = [x["offset"].to(self.device) for x in batched_inputs] - offset_targets = ImageList.from_tensors(offset_targets, size_divisibility).tensor - offset_weights = [x["offset_weights"].to(self.device) for x in batched_inputs] - offset_weights = ImageList.from_tensors(offset_weights, size_divisibility).tensor - else: - center_targets = None - center_weights = None - - offset_targets = None - offset_weights = None - - center_results, offset_results, center_losses, offset_losses = self.ins_embed_head( - features, center_targets, center_weights, offset_targets, offset_weights - ) - losses.update(center_losses) - losses.update(offset_losses) - - if self.training: - return losses - - if self.benchmark_network_speed: - return [] - - processed_results = [] - for sem_seg_result, center_result, offset_result, input_per_image, image_size in zip( - sem_seg_results, center_results, offset_results, batched_inputs, images.image_sizes - ): - height = input_per_image.get("height") - width = input_per_image.get("width") - r = sem_seg_postprocess(sem_seg_result, image_size, height, width) - c = sem_seg_postprocess(center_result, image_size, height, width) - o = sem_seg_postprocess(offset_result, image_size, height, width) - # Post-processing to get panoptic segmentation. - panoptic_image, _ = get_panoptic_segmentation( - r.argmax(dim=0, keepdim=True), - c, - o, - thing_ids=self.meta.thing_dataset_id_to_contiguous_id.values(), - label_divisor=self.meta.label_divisor, - stuff_area=self.stuff_area, - void_label=-1, - threshold=self.threshold, - nms_kernel=self.nms_kernel, - top_k=self.top_k, - ) - # For semantic segmentation evaluation. - processed_results.append({"sem_seg": r}) - panoptic_image = panoptic_image.squeeze(0) - semantic_prob = F.softmax(r, dim=0) - # For panoptic segmentation evaluation. - processed_results[-1]["panoptic_seg"] = (panoptic_image, None) - # For instance segmentation evaluation. - if self.predict_instances: - instances = [] - panoptic_image_cpu = panoptic_image.cpu().numpy() - for panoptic_label in np.unique(panoptic_image_cpu): - if panoptic_label == -1: - continue - pred_class = panoptic_label // self.meta.label_divisor - isthing = pred_class in list( - self.meta.thing_dataset_id_to_contiguous_id.values() - ) - # Get instance segmentation results. - if isthing: - instance = Instances((height, width)) - # Evaluation code takes continuous id starting from 0 - instance.pred_classes = torch.tensor( - [pred_class], device=panoptic_image.device - ) - mask = panoptic_image == panoptic_label - instance.pred_masks = mask.unsqueeze(0) - # Average semantic probability - sem_scores = semantic_prob[pred_class, ...] - sem_scores = torch.mean(sem_scores[mask]) - # Center point probability - mask_indices = torch.nonzero(mask).float() - center_y, center_x = ( - torch.mean(mask_indices[:, 0]), - torch.mean(mask_indices[:, 1]), - ) - center_scores = c[0, int(center_y.item()), int(center_x.item())] - # Confidence score is semantic prob * center prob. - instance.scores = torch.tensor( - [sem_scores * center_scores], device=panoptic_image.device - ) - # Get bounding boxes - instance.pred_boxes = BitMasks(instance.pred_masks).get_bounding_boxes() - instances.append(instance) - if len(instances) > 0: - processed_results[-1]["instances"] = Instances.cat(instances) - - return processed_results - - -@SEM_SEG_HEADS_REGISTRY.register() -class PanopticDeepLabSemSegHead(DeepLabV3PlusHead): - """ - A semantic segmentation head described in :paper:`Panoptic-DeepLab`. - """ - - @configurable - def __init__( - self, - input_shape: Dict[str, ShapeSpec], - *, - decoder_channels: List[int], - norm: Union[str, Callable], - head_channels: int, - loss_weight: float, - loss_type: str, - loss_top_k: float, - ignore_value: int, - num_classes: int, - **kwargs, - ): - """ - NOTE: this interface is experimental. - - Args: - input_shape (ShapeSpec): shape of the input feature - decoder_channels (list[int]): a list of output channels of each - decoder stage. It should have the same length as "input_shape" - (each element in "input_shape" corresponds to one decoder stage). - norm (str or callable): normalization for all conv layers. - head_channels (int): the output channels of extra convolutions - between decoder and predictor. - loss_weight (float): loss weight. - loss_top_k: (float): setting the top k% hardest pixels for - "hard_pixel_mining" loss. - loss_type, ignore_value, num_classes: the same as the base class. - """ - super().__init__( - input_shape, - decoder_channels=decoder_channels, - norm=norm, - ignore_value=ignore_value, - **kwargs, - ) - assert self.decoder_only - - self.loss_weight = loss_weight - use_bias = norm == "" - # `head` is additional transform before predictor - if self.use_depthwise_separable_conv: - # We use a single 5x5 DepthwiseSeparableConv2d to replace - # 2 3x3 Conv2d since they have the same receptive field. - self.head = DepthwiseSeparableConv2d( - decoder_channels[0], - head_channels, - kernel_size=5, - padding=2, - norm1=norm, - activation1=F.relu, - norm2=norm, - activation2=F.relu, - ) - else: - self.head = nn.Sequential( - Conv2d( - decoder_channels[0], - decoder_channels[0], - kernel_size=3, - padding=1, - bias=use_bias, - norm=get_norm(norm, decoder_channels[0]), - activation=F.relu, - ), - Conv2d( - decoder_channels[0], - head_channels, - kernel_size=3, - padding=1, - bias=use_bias, - norm=get_norm(norm, head_channels), - activation=F.relu, - ), - ) - weight_init.c2_xavier_fill(self.head[0]) - weight_init.c2_xavier_fill(self.head[1]) - self.predictor = Conv2d(head_channels, num_classes, kernel_size=1) - nn.init.normal_(self.predictor.weight, 0, 0.001) - nn.init.constant_(self.predictor.bias, 0) - - if loss_type == "cross_entropy": - self.loss = nn.CrossEntropyLoss(reduction="mean", ignore_index=ignore_value) - elif loss_type == "hard_pixel_mining": - self.loss = DeepLabCE(ignore_label=ignore_value, top_k_percent_pixels=loss_top_k) - else: - raise ValueError("Unexpected loss type: %s" % loss_type) - - @classmethod - def from_config(cls, cfg, input_shape): - ret = super().from_config(cfg, input_shape) - ret["head_channels"] = cfg.MODEL.SEM_SEG_HEAD.HEAD_CHANNELS - ret["loss_top_k"] = cfg.MODEL.SEM_SEG_HEAD.LOSS_TOP_K - return ret - - def forward(self, features, targets=None, weights=None): - """ - Returns: - In training, returns (None, dict of losses) - In inference, returns (CxHxW logits, {}) - """ - y = self.layers(features) - if self.training: - return None, self.losses(y, targets, weights) - else: - y = F.interpolate( - y, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - return y, {} - - def layers(self, features): - assert self.decoder_only - y = super().layers(features) - y = self.head(y) - y = self.predictor(y) - return y - - def losses(self, predictions, targets, weights=None): - predictions = F.interpolate( - predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - loss = self.loss(predictions, targets, weights) - losses = {"loss_sem_seg": loss * self.loss_weight} - return losses - - -def build_ins_embed_branch(cfg, input_shape): - """ - Build a instance embedding branch from `cfg.MODEL.INS_EMBED_HEAD.NAME`. - """ - name = cfg.MODEL.INS_EMBED_HEAD.NAME - return INS_EMBED_BRANCHES_REGISTRY.get(name)(cfg, input_shape) - - -@INS_EMBED_BRANCHES_REGISTRY.register() -class PanopticDeepLabInsEmbedHead(DeepLabV3PlusHead): - """ - A instance embedding head described in :paper:`Panoptic-DeepLab`. - """ - - @configurable - def __init__( - self, - input_shape: Dict[str, ShapeSpec], - *, - decoder_channels: List[int], - norm: Union[str, Callable], - head_channels: int, - center_loss_weight: float, - offset_loss_weight: float, - **kwargs, - ): - """ - NOTE: this interface is experimental. - - Args: - input_shape (ShapeSpec): shape of the input feature - decoder_channels (list[int]): a list of output channels of each - decoder stage. It should have the same length as "input_shape" - (each element in "input_shape" corresponds to one decoder stage). - norm (str or callable): normalization for all conv layers. - head_channels (int): the output channels of extra convolutions - between decoder and predictor. - center_loss_weight (float): loss weight for center point prediction. - offset_loss_weight (float): loss weight for center offset prediction. - """ - super().__init__(input_shape, decoder_channels=decoder_channels, norm=norm, **kwargs) - assert self.decoder_only - - self.center_loss_weight = center_loss_weight - self.offset_loss_weight = offset_loss_weight - use_bias = norm == "" - # center prediction - # `head` is additional transform before predictor - self.center_head = nn.Sequential( - Conv2d( - decoder_channels[0], - decoder_channels[0], - kernel_size=3, - padding=1, - bias=use_bias, - norm=get_norm(norm, decoder_channels[0]), - activation=F.relu, - ), - Conv2d( - decoder_channels[0], - head_channels, - kernel_size=3, - padding=1, - bias=use_bias, - norm=get_norm(norm, head_channels), - activation=F.relu, - ), - ) - weight_init.c2_xavier_fill(self.center_head[0]) - weight_init.c2_xavier_fill(self.center_head[1]) - self.center_predictor = Conv2d(head_channels, 1, kernel_size=1) - nn.init.normal_(self.center_predictor.weight, 0, 0.001) - nn.init.constant_(self.center_predictor.bias, 0) - - # offset prediction - # `head` is additional transform before predictor - if self.use_depthwise_separable_conv: - # We use a single 5x5 DepthwiseSeparableConv2d to replace - # 2 3x3 Conv2d since they have the same receptive field. - self.offset_head = DepthwiseSeparableConv2d( - decoder_channels[0], - head_channels, - kernel_size=5, - padding=2, - norm1=norm, - activation1=F.relu, - norm2=norm, - activation2=F.relu, - ) - else: - self.offset_head = nn.Sequential( - Conv2d( - decoder_channels[0], - decoder_channels[0], - kernel_size=3, - padding=1, - bias=use_bias, - norm=get_norm(norm, decoder_channels[0]), - activation=F.relu, - ), - Conv2d( - decoder_channels[0], - head_channels, - kernel_size=3, - padding=1, - bias=use_bias, - norm=get_norm(norm, head_channels), - activation=F.relu, - ), - ) - weight_init.c2_xavier_fill(self.offset_head[0]) - weight_init.c2_xavier_fill(self.offset_head[1]) - self.offset_predictor = Conv2d(head_channels, 2, kernel_size=1) - nn.init.normal_(self.offset_predictor.weight, 0, 0.001) - nn.init.constant_(self.offset_predictor.bias, 0) - - self.center_loss = nn.MSELoss(reduction="none") - self.offset_loss = nn.L1Loss(reduction="none") - - @classmethod - def from_config(cls, cfg, input_shape): - if cfg.INPUT.CROP.ENABLED: - assert cfg.INPUT.CROP.TYPE == "absolute" - train_size = cfg.INPUT.CROP.SIZE - else: - train_size = None - decoder_channels = [cfg.MODEL.INS_EMBED_HEAD.CONVS_DIM] * ( - len(cfg.MODEL.INS_EMBED_HEAD.IN_FEATURES) - 1 - ) + [cfg.MODEL.INS_EMBED_HEAD.ASPP_CHANNELS] - ret = dict( - input_shape={ - k: v for k, v in input_shape.items() if k in cfg.MODEL.INS_EMBED_HEAD.IN_FEATURES - }, - project_channels=cfg.MODEL.INS_EMBED_HEAD.PROJECT_CHANNELS, - aspp_dilations=cfg.MODEL.INS_EMBED_HEAD.ASPP_DILATIONS, - aspp_dropout=cfg.MODEL.INS_EMBED_HEAD.ASPP_DROPOUT, - decoder_channels=decoder_channels, - common_stride=cfg.MODEL.INS_EMBED_HEAD.COMMON_STRIDE, - norm=cfg.MODEL.INS_EMBED_HEAD.NORM, - train_size=train_size, - head_channels=cfg.MODEL.INS_EMBED_HEAD.HEAD_CHANNELS, - center_loss_weight=cfg.MODEL.INS_EMBED_HEAD.CENTER_LOSS_WEIGHT, - offset_loss_weight=cfg.MODEL.INS_EMBED_HEAD.OFFSET_LOSS_WEIGHT, - use_depthwise_separable_conv=cfg.MODEL.SEM_SEG_HEAD.USE_DEPTHWISE_SEPARABLE_CONV, - ) - return ret - - def forward( - self, - features, - center_targets=None, - center_weights=None, - offset_targets=None, - offset_weights=None, - ): - """ - Returns: - In training, returns (None, dict of losses) - In inference, returns (CxHxW logits, {}) - """ - center, offset = self.layers(features) - if self.training: - return ( - None, - None, - self.center_losses(center, center_targets, center_weights), - self.offset_losses(offset, offset_targets, offset_weights), - ) - else: - center = F.interpolate( - center, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - offset = ( - F.interpolate( - offset, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - * self.common_stride - ) - return center, offset, {}, {} - - def layers(self, features): - assert self.decoder_only - y = super().layers(features) - # center - center = self.center_head(y) - center = self.center_predictor(center) - # offset - offset = self.offset_head(y) - offset = self.offset_predictor(offset) - return center, offset - - def center_losses(self, predictions, targets, weights): - predictions = F.interpolate( - predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - loss = self.center_loss(predictions, targets) * weights - if weights.sum() > 0: - loss = loss.sum() / weights.sum() - else: - loss = loss.sum() * 0 - losses = {"loss_center": loss * self.center_loss_weight} - return losses - - def offset_losses(self, predictions, targets, weights): - predictions = ( - F.interpolate( - predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - * self.common_stride - ) - loss = self.offset_loss(predictions, targets) * weights - if weights.sum() > 0: - loss = loss.sum() / weights.sum() - else: - loss = loss.sum() * 0 - losses = {"loss_offset": loss * self.offset_loss_weight} - return losses diff --git a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/post_processing.py b/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/post_processing.py deleted file mode 100644 index 194724eb414db073bde87bf482e5c647fa23cde7..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/post_processing.py +++ /dev/null @@ -1,234 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# Reference: https://github.com/bowenc0221/panoptic-deeplab/blob/master/segmentation/model/post_processing/instance_post_processing.py # noqa - -from collections import Counter -import torch -import torch.nn.functional as F - - -def find_instance_center(center_heatmap, threshold=0.1, nms_kernel=3, top_k=None): - """ - Find the center points from the center heatmap. - Args: - center_heatmap: A Tensor of shape [1, H, W] of raw center heatmap output. - threshold: A float, threshold applied to center heatmap score. - nms_kernel: An integer, NMS max pooling kernel size. - top_k: An integer, top k centers to keep. - Returns: - A Tensor of shape [K, 2] where K is the number of center points. The - order of second dim is (y, x). - """ - # Thresholding, setting values below threshold to -1. - center_heatmap = F.threshold(center_heatmap, threshold, -1) - - # NMS - nms_padding = (nms_kernel - 1) // 2 - center_heatmap_max_pooled = F.max_pool2d( - center_heatmap, kernel_size=nms_kernel, stride=1, padding=nms_padding - ) - center_heatmap[center_heatmap != center_heatmap_max_pooled] = -1 - - # Squeeze first two dimensions. - center_heatmap = center_heatmap.squeeze() - assert len(center_heatmap.size()) == 2, "Something is wrong with center heatmap dimension." - - # Find non-zero elements. - if top_k is None: - return torch.nonzero(center_heatmap > 0) - else: - # find top k centers. - top_k_scores, _ = torch.topk(torch.flatten(center_heatmap), top_k) - return torch.nonzero(center_heatmap > top_k_scores[-1].clamp_(min=0)) - - -def group_pixels(center_points, offsets): - """ - Gives each pixel in the image an instance id. - Args: - center_points: A Tensor of shape [K, 2] where K is the number of center points. - The order of second dim is (y, x). - offsets: A Tensor of shape [2, H, W] of raw offset output. The order of - second dim is (offset_y, offset_x). - Returns: - A Tensor of shape [1, H, W] with values in range [1, K], which represents - the center this pixel belongs to. - """ - height, width = offsets.size()[1:] - - # Generates a coordinate map, where each location is the coordinate of - # that location. - y_coord, x_coord = torch.meshgrid( - torch.arange(height, dtype=offsets.dtype, device=offsets.device), - torch.arange(width, dtype=offsets.dtype, device=offsets.device), - ) - coord = torch.cat((y_coord.unsqueeze(0), x_coord.unsqueeze(0)), dim=0) - - center_loc = coord + offsets - center_loc = center_loc.flatten(1).T.unsqueeze_(0) # [1, H*W, 2] - center_points = center_points.unsqueeze(1) # [K, 1, 2] - - # Distance: [K, H*W]. - distance = torch.norm(center_points - center_loc, dim=-1) - - # Finds center with minimum distance at each location, offset by 1, to - # reserve id=0 for stuff. - instance_id = torch.argmin(distance, dim=0).reshape((1, height, width)) + 1 - return instance_id - - -def get_instance_segmentation( - sem_seg, center_heatmap, offsets, thing_seg, thing_ids, threshold=0.1, nms_kernel=3, top_k=None -): - """ - Post-processing for instance segmentation, gets class agnostic instance id. - Args: - sem_seg: A Tensor of shape [1, H, W], predicted semantic label. - center_heatmap: A Tensor of shape [1, H, W] of raw center heatmap output. - offsets: A Tensor of shape [2, H, W] of raw offset output. The order of - second dim is (offset_y, offset_x). - thing_seg: A Tensor of shape [1, H, W], predicted foreground mask, - if not provided, inference from semantic prediction. - thing_ids: A set of ids from contiguous category ids belonging - to thing categories. - threshold: A float, threshold applied to center heatmap score. - nms_kernel: An integer, NMS max pooling kernel size. - top_k: An integer, top k centers to keep. - Returns: - A Tensor of shape [1, H, W] with value 0 represent stuff (not instance) - and other positive values represent different instances. - A Tensor of shape [1, K, 2] where K is the number of center points. - The order of second dim is (y, x). - """ - center_points = find_instance_center( - center_heatmap, threshold=threshold, nms_kernel=nms_kernel, top_k=top_k - ) - if center_points.size(0) == 0: - return torch.zeros_like(sem_seg), center_points.unsqueeze(0) - ins_seg = group_pixels(center_points, offsets) - return thing_seg * ins_seg, center_points.unsqueeze(0) - - -def merge_semantic_and_instance( - sem_seg, ins_seg, semantic_thing_seg, label_divisor, thing_ids, stuff_area, void_label -): - """ - Post-processing for panoptic segmentation, by merging semantic segmentation - label and class agnostic instance segmentation label. - Args: - sem_seg: A Tensor of shape [1, H, W], predicted category id for each pixel. - ins_seg: A Tensor of shape [1, H, W], predicted instance id for each pixel. - semantic_thing_seg: A Tensor of shape [1, H, W], predicted foreground mask. - label_divisor: An integer, used to convert panoptic id = - semantic id * label_divisor + instance_id. - thing_ids: Set, a set of ids from contiguous category ids belonging - to thing categories. - stuff_area: An integer, remove stuff whose area is less tan stuff_area. - void_label: An integer, indicates the region has no confident prediction. - Returns: - A Tensor of shape [1, H, W]. - """ - # In case thing mask does not align with semantic prediction. - pan_seg = torch.zeros_like(sem_seg) + void_label - is_thing = (ins_seg > 0) & (semantic_thing_seg > 0) - - # Keep track of instance id for each class. - class_id_tracker = Counter() - - # Paste thing by majority voting. - instance_ids = torch.unique(ins_seg) - for ins_id in instance_ids: - if ins_id == 0: - continue - # Make sure only do majority voting within `semantic_thing_seg`. - thing_mask = (ins_seg == ins_id) & is_thing - if torch.nonzero(thing_mask).size(0) == 0: - continue - class_id, _ = torch.mode(sem_seg[thing_mask].view(-1)) - class_id_tracker[class_id.item()] += 1 - new_ins_id = class_id_tracker[class_id.item()] - pan_seg[thing_mask] = class_id * label_divisor + new_ins_id - - # Paste stuff to unoccupied area. - class_ids = torch.unique(sem_seg) - for class_id in class_ids: - if class_id.item() in thing_ids: - # thing class - continue - # Calculate stuff area. - stuff_mask = (sem_seg == class_id) & (ins_seg == 0) - if stuff_mask.sum().item() >= stuff_area: - pan_seg[stuff_mask] = class_id * label_divisor - - return pan_seg - - -def get_panoptic_segmentation( - sem_seg, - center_heatmap, - offsets, - thing_ids, - label_divisor, - stuff_area, - void_label, - threshold=0.1, - nms_kernel=7, - top_k=200, - foreground_mask=None, -): - """ - Post-processing for panoptic segmentation. - Args: - sem_seg: A Tensor of shape [1, H, W] of predicted semantic label. - center_heatmap: A Tensor of shape [1, H, W] of raw center heatmap output. - offsets: A Tensor of shape [2, H, W] of raw offset output. The order of - second dim is (offset_y, offset_x). - thing_ids: A set of ids from contiguous category ids belonging - to thing categories. - label_divisor: An integer, used to convert panoptic id = - semantic id * label_divisor + instance_id. - stuff_area: An integer, remove stuff whose area is less tan stuff_area. - void_label: An integer, indicates the region has no confident prediction. - threshold: A float, threshold applied to center heatmap score. - nms_kernel: An integer, NMS max pooling kernel size. - top_k: An integer, top k centers to keep. - foreground_mask: Optional, A Tensor of shape [1, H, W] of predicted - binary foreground mask. If not provided, it will be generated from - sem_seg. - Returns: - A Tensor of shape [1, H, W], int64. - """ - if sem_seg.dim() != 3 and sem_seg.size(0) != 1: - raise ValueError("Semantic prediction with un-supported shape: {}.".format(sem_seg.size())) - if center_heatmap.dim() != 3: - raise ValueError( - "Center prediction with un-supported dimension: {}.".format(center_heatmap.dim()) - ) - if offsets.dim() != 3: - raise ValueError("Offset prediction with un-supported dimension: {}.".format(offsets.dim())) - if foreground_mask is not None: - if foreground_mask.dim() != 3 and foreground_mask.size(0) != 1: - raise ValueError( - "Foreground prediction with un-supported shape: {}.".format(sem_seg.size()) - ) - thing_seg = foreground_mask - else: - # inference from semantic segmentation - thing_seg = torch.zeros_like(sem_seg) - for thing_class in list(thing_ids): - thing_seg[sem_seg == thing_class] = 1 - - instance, center = get_instance_segmentation( - sem_seg, - center_heatmap, - offsets, - thing_seg, - thing_ids, - threshold=threshold, - nms_kernel=nms_kernel, - top_k=top_k, - ) - panoptic = merge_semantic_and_instance( - sem_seg, instance, thing_seg, label_divisor, thing_ids, stuff_area, void_label - ) - - return panoptic, center diff --git a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/target_generator.py b/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/target_generator.py deleted file mode 100644 index 318afcc22c0af30481cfbe4e712665bf476d32d5..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/panoptic_deeplab/target_generator.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# Reference: https://github.com/bowenc0221/panoptic-deeplab/blob/aa934324b55a34ce95fea143aea1cb7a6dbe04bd/segmentation/data/transforms/target_transforms.py#L11 # noqa -import numpy as np -import torch - - -class PanopticDeepLabTargetGenerator: - """ - Generates training targets for Panoptic-DeepLab. - """ - - def __init__( - self, - ignore_label, - thing_ids, - sigma=8, - ignore_stuff_in_offset=False, - small_instance_area=0, - small_instance_weight=1, - ignore_crowd_in_semantic=False, - ): - """ - Args: - ignore_label: Integer, the ignore label for semantic segmentation. - thing_ids: Set, a set of ids from contiguous category ids belonging - to thing categories. - sigma: the sigma for Gaussian kernel. - ignore_stuff_in_offset: Boolean, whether to ignore stuff region when - training the offset branch. - small_instance_area: Integer, indicates largest area for small instances. - small_instance_weight: Integer, indicates semantic loss weights for - small instances. - ignore_crowd_in_semantic: Boolean, whether to ignore crowd region in - semantic segmentation branch, crowd region is ignored in the original - TensorFlow implementation. - """ - self.ignore_label = ignore_label - self.thing_ids = set(thing_ids) - self.ignore_stuff_in_offset = ignore_stuff_in_offset - self.small_instance_area = small_instance_area - self.small_instance_weight = small_instance_weight - self.ignore_crowd_in_semantic = ignore_crowd_in_semantic - - # Generate the default Gaussian image for each center - self.sigma = sigma - size = 6 * sigma + 3 - x = np.arange(0, size, 1, float) - y = x[:, np.newaxis] - x0, y0 = 3 * sigma + 1, 3 * sigma + 1 - self.g = np.exp(-((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma**2)) - - def __call__(self, panoptic, segments_info): - """Generates the training target. - reference: https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/createPanopticImgs.py # noqa - reference: https://github.com/facebookresearch/detectron2/blob/main/datasets/prepare_panoptic_fpn.py#L18 # noqa - - Args: - panoptic: numpy.array, panoptic label, we assume it is already - converted from rgb image by panopticapi.utils.rgb2id. - segments_info (list[dict]): see detectron2 documentation of "Use Custom Datasets". - - Returns: - A dictionary with fields: - - sem_seg: Tensor, semantic label, shape=(H, W). - - center: Tensor, center heatmap, shape=(H, W). - - center_points: List, center coordinates, with tuple - (y-coord, x-coord). - - offset: Tensor, offset, shape=(2, H, W), first dim is - (offset_y, offset_x). - - sem_seg_weights: Tensor, loss weight for semantic prediction, - shape=(H, W). - - center_weights: Tensor, ignore region of center prediction, - shape=(H, W), used as weights for center regression 0 is - ignore, 1 is has instance. Multiply this mask to loss. - - offset_weights: Tensor, ignore region of offset prediction, - shape=(H, W), used as weights for offset regression 0 is - ignore, 1 is has instance. Multiply this mask to loss. - """ - height, width = panoptic.shape[0], panoptic.shape[1] - semantic = np.zeros_like(panoptic, dtype=np.uint8) + self.ignore_label - center = np.zeros((height, width), dtype=np.float32) - center_pts = [] - offset = np.zeros((2, height, width), dtype=np.float32) - y_coord, x_coord = np.meshgrid( - np.arange(height, dtype=np.float32), np.arange(width, dtype=np.float32), indexing="ij" - ) - # Generate pixel-wise loss weights - semantic_weights = np.ones_like(panoptic, dtype=np.uint8) - # 0: ignore, 1: has instance - # three conditions for a region to be ignored for instance branches: - # (1) It is labeled as `ignore_label` - # (2) It is crowd region (iscrowd=1) - # (3) (Optional) It is stuff region (for offset branch) - center_weights = np.zeros_like(panoptic, dtype=np.uint8) - offset_weights = np.zeros_like(panoptic, dtype=np.uint8) - for seg in segments_info: - cat_id = seg["category_id"] - if not (self.ignore_crowd_in_semantic and seg["iscrowd"]): - semantic[panoptic == seg["id"]] = cat_id - if not seg["iscrowd"]: - # Ignored regions are not in `segments_info`. - # Handle crowd region. - center_weights[panoptic == seg["id"]] = 1 - if not self.ignore_stuff_in_offset or cat_id in self.thing_ids: - offset_weights[panoptic == seg["id"]] = 1 - if cat_id in self.thing_ids: - # find instance center - mask_index = np.where(panoptic == seg["id"]) - if len(mask_index[0]) == 0: - # the instance is completely cropped - continue - - # Find instance area - ins_area = len(mask_index[0]) - if ins_area < self.small_instance_area: - semantic_weights[panoptic == seg["id"]] = self.small_instance_weight - - center_y, center_x = np.mean(mask_index[0]), np.mean(mask_index[1]) - center_pts.append([center_y, center_x]) - - # generate center heatmap - y, x = int(round(center_y)), int(round(center_x)) - sigma = self.sigma - # upper left - ul = int(np.round(x - 3 * sigma - 1)), int(np.round(y - 3 * sigma - 1)) - # bottom right - br = int(np.round(x + 3 * sigma + 2)), int(np.round(y + 3 * sigma + 2)) - - # start and end indices in default Gaussian image - gaussian_x0, gaussian_x1 = max(0, -ul[0]), min(br[0], width) - ul[0] - gaussian_y0, gaussian_y1 = max(0, -ul[1]), min(br[1], height) - ul[1] - - # start and end indices in center heatmap image - center_x0, center_x1 = max(0, ul[0]), min(br[0], width) - center_y0, center_y1 = max(0, ul[1]), min(br[1], height) - center[center_y0:center_y1, center_x0:center_x1] = np.maximum( - center[center_y0:center_y1, center_x0:center_x1], - self.g[gaussian_y0:gaussian_y1, gaussian_x0:gaussian_x1], - ) - - # generate offset (2, h, w) -> (y-dir, x-dir) - offset[0][mask_index] = center_y - y_coord[mask_index] - offset[1][mask_index] = center_x - x_coord[mask_index] - - center_weights = center_weights[None] - offset_weights = offset_weights[None] - return dict( - sem_seg=torch.as_tensor(semantic.astype("long")), - center=torch.as_tensor(center.astype(np.float32)), - center_points=center_pts, - offset=torch.as_tensor(offset.astype(np.float32)), - sem_seg_weights=torch.as_tensor(semantic_weights.astype(np.float32)), - center_weights=torch.as_tensor(center_weights.astype(np.float32)), - offset_weights=torch.as_tensor(offset_weights.astype(np.float32)), - ) diff --git a/detectron2/projects/Panoptic-DeepLab/train_net.py b/detectron2/projects/Panoptic-DeepLab/train_net.py deleted file mode 100644 index ac6addcffbf516b08f38369da9482365e5869b88..0000000000000000000000000000000000000000 --- a/detectron2/projects/Panoptic-DeepLab/train_net.py +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -Panoptic-DeepLab Training Script. -This script is a simplified version of the training script in detectron2/tools. -""" - -import os -import torch - -import detectron2.data.transforms as T -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog, build_detection_train_loader -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch -from detectron2.evaluation import ( - CityscapesInstanceEvaluator, - CityscapesSemSegEvaluator, - COCOEvaluator, - COCOPanopticEvaluator, - DatasetEvaluators, -) -from detectron2.projects.deeplab import build_lr_scheduler -from detectron2.projects.panoptic_deeplab import ( - PanopticDeeplabDatasetMapper, - add_panoptic_deeplab_config, -) -from detectron2.solver import get_default_optimizer_params -from detectron2.solver.build import maybe_add_gradient_clipping - - -def build_sem_seg_train_aug(cfg): - augs = [ - T.ResizeShortestEdge( - cfg.INPUT.MIN_SIZE_TRAIN, - cfg.INPUT.MAX_SIZE_TRAIN, - cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING, - ) - ] - if cfg.INPUT.CROP.ENABLED: - augs.append(T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) - augs.append(T.RandomFlip()) - return augs - - -class Trainer(DefaultTrainer): - """ - We use the "DefaultTrainer" which contains a number pre-defined logic for - standard training workflow. They may not work for you, especially if you - are working on a new research project. In that case you can use the cleaner - "SimpleTrainer", or write your own training loop. - """ - - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - """ - Create evaluator(s) for a given dataset. - This uses the special metadata "evaluator_type" associated with each builtin dataset. - For your own dataset, you can simply create an evaluator manually in your - script and do not have to worry about the hacky if-else logic here. - """ - if cfg.MODEL.PANOPTIC_DEEPLAB.BENCHMARK_NETWORK_SPEED: - return None - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluator_list = [] - evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - if evaluator_type in ["cityscapes_panoptic_seg", "coco_panoptic_seg"]: - evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) - if evaluator_type == "cityscapes_panoptic_seg": - evaluator_list.append(CityscapesSemSegEvaluator(dataset_name)) - evaluator_list.append(CityscapesInstanceEvaluator(dataset_name)) - if evaluator_type == "coco_panoptic_seg": - # `thing_classes` in COCO panoptic metadata includes both thing and - # stuff classes for visualization. COCOEvaluator requires metadata - # which only contains thing classes, thus we map the name of - # panoptic datasets to their corresponding instance datasets. - dataset_name_mapper = { - "coco_2017_val_panoptic": "coco_2017_val", - "coco_2017_val_100_panoptic": "coco_2017_val_100", - } - evaluator_list.append( - COCOEvaluator(dataset_name_mapper[dataset_name], output_dir=output_folder) - ) - if len(evaluator_list) == 0: - raise NotImplementedError( - "no Evaluator for the dataset {} with the type {}".format( - dataset_name, evaluator_type - ) - ) - elif len(evaluator_list) == 1: - return evaluator_list[0] - return DatasetEvaluators(evaluator_list) - - @classmethod - def build_train_loader(cls, cfg): - mapper = PanopticDeeplabDatasetMapper(cfg, augmentations=build_sem_seg_train_aug(cfg)) - return build_detection_train_loader(cfg, mapper=mapper) - - @classmethod - def build_lr_scheduler(cls, cfg, optimizer): - """ - It now calls :func:`detectron2.solver.build_lr_scheduler`. - Overwrite it if you'd like a different scheduler. - """ - return build_lr_scheduler(cfg, optimizer) - - @classmethod - def build_optimizer(cls, cfg, model): - """ - Build an optimizer from config. - """ - params = get_default_optimizer_params( - model, - weight_decay=cfg.SOLVER.WEIGHT_DECAY, - weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM, - ) - - optimizer_type = cfg.SOLVER.OPTIMIZER - if optimizer_type == "SGD": - return maybe_add_gradient_clipping(cfg, torch.optim.SGD)( - params, - cfg.SOLVER.BASE_LR, - momentum=cfg.SOLVER.MOMENTUM, - nesterov=cfg.SOLVER.NESTEROV, - ) - elif optimizer_type == "ADAM": - return maybe_add_gradient_clipping(cfg, torch.optim.Adam)(params, cfg.SOLVER.BASE_LR) - else: - raise NotImplementedError(f"no optimizer type {optimizer_type}") - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - add_panoptic_deeplab_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - return res - - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - return trainer.train() - - -def invoke_main() -> None: - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) - - -if __name__ == "__main__": - invoke_main() # pragma: no cover diff --git a/detectron2/projects/PointRend/README.md b/detectron2/projects/PointRend/README.md deleted file mode 100644 index 79d75d506c6f5db710044d3c1cd2583027ac3dbe..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/README.md +++ /dev/null @@ -1,167 +0,0 @@ -# PointRend: Image Segmentation as Rendering - -Alexander Kirillov, Yuxin Wu, Kaiming He, Ross Girshick - -[[`arXiv`](https://arxiv.org/abs/1912.08193)] [[`BibTeX`](#CitingPointRend)] - -
- -

- -In this repository, we release code for PointRend in Detectron2. PointRend can be flexibly applied to both instance and semantic segmentation tasks by building on top of existing state-of-the-art models. - -## Quick start and visualization - -This [Colab Notebook](https://colab.research.google.com/drive/1isGPL5h5_cKoPPhVL9XhMokRtHDvmMVL) tutorial contains examples of PointRend usage and visualizations of its point sampling stages. - -## Training - -To train a model with 8 GPUs run: -```bash -cd /path/to/detectron2/projects/PointRend -python train_net.py --config-file configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml --num-gpus 8 -``` - -## Evaluation - -Model evaluation can be done similarly: -```bash -cd /path/to/detectron2/projects/PointRend -python train_net.py --config-file configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml --eval-only MODEL.WEIGHTS /path/to/model_checkpoint -``` - -# Pretrained Models - -## Instance Segmentation -#### COCO - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Mask
head
Backbonelr
sched
Output
resolution
mask
AP
mask
AP*
model iddownload
PointRendR50-FPN224×22436.239.7164254221model | metrics
PointRendR50-FPN224×22438.341.6164955410model | metrics
PointRendR101-FPN224×22440.143.8model | metrics
PointRendX101-FPN224×22441.144.7model | metrics
- -AP* is COCO mask AP evaluated against the higher-quality LVIS annotations; see the paper for details. -Run `python detectron2/datasets/prepare_cocofied_lvis.py` to prepare GT files for AP* evaluation. -Since LVIS annotations are not exhaustive, `lvis-api` and not `cocoapi` should be used to evaluate AP*. - -#### Cityscapes -Cityscapes model is trained with ImageNet pretraining. - - - - - - - - - - - - - - - - - - - - -
Mask
head
Backbonelr
sched
Output
resolution
mask
AP
model iddownload
PointRendR50-FPN224×22435.9164255101model | metrics
- - -## Semantic Segmentation - -#### Cityscapes -Cityscapes model is trained with ImageNet pretraining. - - - - - - - - - - - - - - - - - - -
MethodBackboneOutput
resolution
mIoUmodel iddownload
SemanticFPN + PointRendR101-FPN1024×204878.9202576688model | metrics
- -## Citing PointRend - -If you use PointRend, please use the following BibTeX entry. - -```BibTeX -@InProceedings{kirillov2019pointrend, - title={{PointRend}: Image Segmentation as Rendering}, - author={Alexander Kirillov and Yuxin Wu and Kaiming He and Ross Girshick}, - journal={ArXiv:1912.08193}, - year={2019} -} -``` - -## Citing Implicit PointRend - -If you use Implicit PointRend, please use the following BibTeX entry. - -```BibTeX -@InProceedings{cheng2021pointly, - title={Pointly-Supervised Instance Segmentation, - author={Bowen Cheng and Omkar Parkhi and Alexander Kirillov}, - journal={ArXiv}, - year={2021} -} -``` diff --git a/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-Implicit-PointRend.yaml b/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-Implicit-PointRend.yaml deleted file mode 100644 index 5ebafb30d3d8c5dfd24d03beff6d16bc2c9439fc..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-Implicit-PointRend.yaml +++ /dev/null @@ -1,25 +0,0 @@ -_BASE_: "../../../../configs/Base-RCNN-FPN.yaml" -MODEL: - MASK_ON: true - ROI_MASK_HEAD: - NAME: "ImplicitPointRendMaskHead" - POOLER_TYPE: "" # No RoI pooling, let the head process image features directly - FC_DIM: 1024 - NUM_FC: 2 - POINT_HEAD: - NAME: "ImplicitPointHead" - FC_DIM: 256 - NUM_FC: 3 - IN_FEATURES: ["p2"] - NUM_CLASSES: 80 - CLS_AGNOSTIC_MASK: False - TRAIN_NUM_POINTS: 196 - SUBDIVISION_STEPS: 3 - SUBDIVISION_NUM_POINTS: 784 - IMPLICIT_POINTREND: - IMAGE_FEATURE_ENABLED: True - POS_ENC_ENABLED: True - PARAMS_L2_REGULARIZER: 0.00001 -INPUT: - # PointRend for instance segmentation does not work with "polygon" mask_format. - MASK_FORMAT: "bitmask" diff --git a/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-PointRend-RCNN-FPN.yaml b/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-PointRend-RCNN-FPN.yaml deleted file mode 100644 index e68e707f949f046a3ba0a48bc8e12572982b8316..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-PointRend-RCNN-FPN.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: "../../../../configs/Base-RCNN-FPN.yaml" -MODEL: - MASK_ON: true - ROI_BOX_HEAD: - TRAIN_ON_PRED_BOXES: True - ROI_MASK_HEAD: - POOLER_TYPE: "" # No RoI pooling, let the head process image features directly - NAME: "PointRendMaskHead" - FC_DIM: 1024 - NUM_FC: 2 - OUTPUT_SIDE_RESOLUTION: 7 - IN_FEATURES: ["p2"] # for the coarse mask head - POINT_HEAD_ON: True - POINT_HEAD: - FC_DIM: 256 - NUM_FC: 3 - IN_FEATURES: ["p2"] -INPUT: - # PointRend for instance segmentation does not work with "polygon" mask_format. - MASK_FORMAT: "bitmask" diff --git a/detectron2/projects/PointRend/configs/InstanceSegmentation/implicit_pointrend_R_50_FPN_1x_coco.yaml b/detectron2/projects/PointRend/configs/InstanceSegmentation/implicit_pointrend_R_50_FPN_1x_coco.yaml deleted file mode 100644 index ba35c24679a8b69109c2db3fdd0a9414bd8159a6..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/configs/InstanceSegmentation/implicit_pointrend_R_50_FPN_1x_coco.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-Implicit-PointRend.yaml" -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl - RESNETS: - DEPTH: 50 -# To add COCO AP evaluation against the higher-quality LVIS annotations. -# DATASETS: -# TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied") diff --git a/detectron2/projects/PointRend/configs/InstanceSegmentation/implicit_pointrend_R_50_FPN_3x_coco.yaml b/detectron2/projects/PointRend/configs/InstanceSegmentation/implicit_pointrend_R_50_FPN_3x_coco.yaml deleted file mode 100644 index 884236d07784cbebbf9905e37d9c361e89e25e91..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/configs/InstanceSegmentation/implicit_pointrend_R_50_FPN_3x_coco.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "Base-Implicit-PointRend.yaml" -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 -# To add COCO AP evaluation against the higher-quality LVIS annotations. -# DATASETS: -# TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied") diff --git a/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_101_FPN_3x_coco.yaml b/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_101_FPN_3x_coco.yaml deleted file mode 100644 index 4269130ccd25fa4640f6e6836b5256241f2d50bc..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_101_FPN_3x_coco.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: Base-PointRend-RCNN-FPN.yaml -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-101.pkl - MASK_ON: true - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 -# To add COCO AP evaluation against the higher-quality LVIS annotations. -# DATASETS: -# TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied") diff --git a/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes.yaml b/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes.yaml deleted file mode 100644 index 0402d6d645c0dafed7b8c6623371bd0a4701a85b..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes.yaml +++ /dev/null @@ -1,22 +0,0 @@ -_BASE_: Base-PointRend-RCNN-FPN.yaml -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 8 - POINT_HEAD: - NUM_CLASSES: 8 -DATASETS: - TEST: ("cityscapes_fine_instance_seg_val",) - TRAIN: ("cityscapes_fine_instance_seg_train",) -SOLVER: - BASE_LR: 0.01 - IMS_PER_BATCH: 8 - MAX_ITER: 24000 - STEPS: (18000,) -INPUT: - MAX_SIZE_TEST: 2048 - MAX_SIZE_TRAIN: 2048 - MIN_SIZE_TEST: 1024 - MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) diff --git a/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml b/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml deleted file mode 100644 index 0249b493e7446eccfc9a483287308b8f064e15e9..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: Base-PointRend-RCNN-FPN.yaml -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl - RESNETS: - DEPTH: 50 -# To add COCO AP evaluation against the higher-quality LVIS annotations. -# DATASETS: -# TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied") diff --git a/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml b/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml deleted file mode 100644 index a571b4c71911fa947f5e774f24071bcb37004a28..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: Base-PointRend-RCNN-FPN.yaml -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 -# To add COCO AP evaluation against the higher-quality LVIS annotations. -# DATASETS: -# TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied") - diff --git a/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_coco.yaml b/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_coco.yaml deleted file mode 100644 index 85d26f3fabed2d4cf860cf57eb27808a30db76ee..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_coco.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: Base-PointRend-RCNN-FPN.yaml -MODEL: - MASK_ON: True - WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" - PIXEL_STD: [57.375, 57.120, 58.395] - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 -# To add COCO AP evaluation against the higher-quality LVIS annotations. -# DATASETS: -# TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied") diff --git a/detectron2/projects/PointRend/configs/SemanticSegmentation/Base-PointRend-Semantic-FPN.yaml b/detectron2/projects/PointRend/configs/SemanticSegmentation/Base-PointRend-Semantic-FPN.yaml deleted file mode 100644 index 9b7a1b40bb2e3b9e8e9264c227661dcdb2868348..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/configs/SemanticSegmentation/Base-PointRend-Semantic-FPN.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: "../../../../configs/Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "SemanticSegmentor" - BACKBONE: - FREEZE_AT: 0 - SEM_SEG_HEAD: - NAME: "PointRendSemSegHead" - POINT_HEAD: - NUM_CLASSES: 54 - FC_DIM: 256 - NUM_FC: 3 - IN_FEATURES: ["p2"] - TRAIN_NUM_POINTS: 1024 - SUBDIVISION_STEPS: 2 - SUBDIVISION_NUM_POINTS: 8192 - COARSE_SEM_SEG_HEAD_NAME: "SemSegFPNHead" - COARSE_PRED_EACH_LAYER: False -DATASETS: - TRAIN: ("coco_2017_train_panoptic_stuffonly",) - TEST: ("coco_2017_val_panoptic_stuffonly",) diff --git a/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes.yaml b/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes.yaml deleted file mode 100644 index 6be11fa3e80a83a0f138adbeb794fa98425606cf..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes.yaml +++ /dev/null @@ -1,33 +0,0 @@ -_BASE_: Base-PointRend-Semantic-FPN.yaml -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-101.pkl - RESNETS: - DEPTH: 101 - SEM_SEG_HEAD: - NUM_CLASSES: 19 - POINT_HEAD: - NUM_CLASSES: 19 - TRAIN_NUM_POINTS: 2048 - SUBDIVISION_NUM_POINTS: 8192 -DATASETS: - TRAIN: ("cityscapes_fine_sem_seg_train",) - TEST: ("cityscapes_fine_sem_seg_val",) -SOLVER: - BASE_LR: 0.01 - STEPS: (40000, 55000) - MAX_ITER: 65000 - IMS_PER_BATCH: 32 -INPUT: - MIN_SIZE_TRAIN: (512, 768, 1024, 1280, 1536, 1792, 2048) - MIN_SIZE_TRAIN_SAMPLING: "choice" - MIN_SIZE_TEST: 1024 - MAX_SIZE_TRAIN: 4096 - MAX_SIZE_TEST: 2048 - CROP: - ENABLED: True - TYPE: "absolute" - SIZE: (512, 1024) - SINGLE_CATEGORY_MAX_AREA: 0.75 - COLOR_AUG_SSD: True -DATALOADER: - NUM_WORKERS: 10 diff --git a/detectron2/projects/PointRend/point_rend/__init__.py b/detectron2/projects/PointRend/point_rend/__init__.py deleted file mode 100644 index e3050cbddb92f4ec3acf091cc7aed0ea70484927..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/point_rend/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .config import add_pointrend_config -from .mask_head import PointRendMaskHead, ImplicitPointRendMaskHead -from .semantic_seg import PointRendSemSegHead -from .color_augmentation import ColorAugSSDTransform - -from . import roi_heads as _ # only registration diff --git a/detectron2/projects/PointRend/point_rend/color_augmentation.py b/detectron2/projects/PointRend/point_rend/color_augmentation.py deleted file mode 100644 index cdcb051623d20e3bfad5167715e8082974d51ec2..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/point_rend/color_augmentation.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -import random -import cv2 -from fvcore.transforms.transform import Transform - - -class ColorAugSSDTransform(Transform): - """ - A color related data augmentation used in Single Shot Multibox Detector (SSD). - - Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, - Scott Reed, Cheng-Yang Fu, Alexander C. Berg. - SSD: Single Shot MultiBox Detector. ECCV 2016. - - Implementation based on: - - https://github.com/weiliu89/caffe/blob - /4817bf8b4200b35ada8ed0dc378dceaf38c539e4 - /src/caffe/util/im_transforms.cpp - - https://github.com/chainer/chainercv/blob - /7159616642e0be7c5b3ef380b848e16b7e99355b/chainercv - /links/model/ssd/transforms.py - """ - - def __init__( - self, - img_format, - brightness_delta=32, - contrast_low=0.5, - contrast_high=1.5, - saturation_low=0.5, - saturation_high=1.5, - hue_delta=18, - ): - super().__init__() - assert img_format in ["BGR", "RGB"] - self.is_rgb = img_format == "RGB" - del img_format - self._set_attributes(locals()) - - def apply_coords(self, coords): - return coords - - def apply_segmentation(self, segmentation): - return segmentation - - def apply_image(self, img, interp=None): - if self.is_rgb: - img = img[:, :, [2, 1, 0]] - img = self.brightness(img) - if random.randrange(2): - img = self.contrast(img) - img = self.saturation(img) - img = self.hue(img) - else: - img = self.saturation(img) - img = self.hue(img) - img = self.contrast(img) - if self.is_rgb: - img = img[:, :, [2, 1, 0]] - return img - - def convert(self, img, alpha=1, beta=0): - img = img.astype(np.float32) * alpha + beta - img = np.clip(img, 0, 255) - return img.astype(np.uint8) - - def brightness(self, img): - if random.randrange(2): - return self.convert( - img, beta=random.uniform(-self.brightness_delta, self.brightness_delta) - ) - return img - - def contrast(self, img): - if random.randrange(2): - return self.convert(img, alpha=random.uniform(self.contrast_low, self.contrast_high)) - return img - - def saturation(self, img): - if random.randrange(2): - img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) - img[:, :, 1] = self.convert( - img[:, :, 1], alpha=random.uniform(self.saturation_low, self.saturation_high) - ) - return cv2.cvtColor(img, cv2.COLOR_HSV2BGR) - return img - - def hue(self, img): - if random.randrange(2): - img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) - img[:, :, 0] = ( - img[:, :, 0].astype(int) + random.randint(-self.hue_delta, self.hue_delta) - ) % 180 - return cv2.cvtColor(img, cv2.COLOR_HSV2BGR) - return img diff --git a/detectron2/projects/PointRend/point_rend/config.py b/detectron2/projects/PointRend/point_rend/config.py deleted file mode 100644 index a02c7829533545e81669785a53db90ef7e783156..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/point_rend/config.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -from detectron2.config import CfgNode as CN - - -def add_pointrend_config(cfg): - """ - Add config for PointRend. - """ - # We retry random cropping until no single category in semantic segmentation GT occupies more - # than `SINGLE_CATEGORY_MAX_AREA` part of the crop. - cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 - # Color augmentatition from SSD paper for semantic segmentation model during training. - cfg.INPUT.COLOR_AUG_SSD = False - - # Names of the input feature maps to be used by a coarse mask head. - cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES = ("p2",) - cfg.MODEL.ROI_MASK_HEAD.FC_DIM = 1024 - cfg.MODEL.ROI_MASK_HEAD.NUM_FC = 2 - # The side size of a coarse mask head prediction. - cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION = 7 - # True if point head is used. - cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON = False - - cfg.MODEL.POINT_HEAD = CN() - cfg.MODEL.POINT_HEAD.NAME = "StandardPointHead" - cfg.MODEL.POINT_HEAD.NUM_CLASSES = 80 - # Names of the input feature maps to be used by a mask point head. - cfg.MODEL.POINT_HEAD.IN_FEATURES = ("p2",) - # Number of points sampled during training for a mask point head. - cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS = 14 * 14 - # Oversampling parameter for PointRend point sampling during training. Parameter `k` in the - # original paper. - cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO = 3 - # Importance sampling parameter for PointRend point sampling during training. Parametr `beta` in - # the original paper. - cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO = 0.75 - # Number of subdivision steps during inference. - cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS = 5 - # Maximum number of points selected at each subdivision step (N). - cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS = 28 * 28 - cfg.MODEL.POINT_HEAD.FC_DIM = 256 - cfg.MODEL.POINT_HEAD.NUM_FC = 3 - cfg.MODEL.POINT_HEAD.CLS_AGNOSTIC_MASK = False - # If True, then coarse prediction features are used as inout for each layer in PointRend's MLP. - cfg.MODEL.POINT_HEAD.COARSE_PRED_EACH_LAYER = True - cfg.MODEL.POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME = "SemSegFPNHead" - - """ - Add config for Implicit PointRend. - """ - cfg.MODEL.IMPLICIT_POINTREND = CN() - - cfg.MODEL.IMPLICIT_POINTREND.IMAGE_FEATURE_ENABLED = True - cfg.MODEL.IMPLICIT_POINTREND.POS_ENC_ENABLED = True - - cfg.MODEL.IMPLICIT_POINTREND.PARAMS_L2_REGULARIZER = 0.00001 diff --git a/detectron2/projects/PointRend/point_rend/mask_head.py b/detectron2/projects/PointRend/point_rend/mask_head.py deleted file mode 100644 index 46dd64721578bd45eb208206bbd5e7908cb6a148..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/point_rend/mask_head.py +++ /dev/null @@ -1,435 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import math -import numpy as np -from typing import Dict, List, Tuple -import fvcore.nn.weight_init as weight_init -import torch -from torch import Tensor, nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.layers import Conv2d, ShapeSpec, cat, interpolate -from detectron2.modeling import ROI_MASK_HEAD_REGISTRY -from detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference, mask_rcnn_loss -from detectron2.structures import Boxes - -from .point_features import ( - generate_regular_grid_point_coords, - get_point_coords_wrt_image, - get_uncertain_point_coords_on_grid, - get_uncertain_point_coords_with_randomness, - point_sample, - point_sample_fine_grained_features, - sample_point_labels, -) -from .point_head import build_point_head, roi_mask_point_loss - - -def calculate_uncertainty(logits, classes): - """ - We estimate uncerainty as L1 distance between 0.0 and the logit prediction in 'logits' for the - foreground class in `classes`. - Args: - logits (Tensor): A tensor of shape (R, C, ...) or (R, 1, ...) for class-specific or - class-agnostic, where R is the total number of predicted masks in all images and C is - the number of foreground classes. The values are logits. - classes (list): A list of length R that contains either predicted of ground truth class - for eash predicted mask. - Returns: - scores (Tensor): A tensor of shape (R, 1, ...) that contains uncertainty scores with - the most uncertain locations having the highest uncertainty score. - """ - if logits.shape[1] == 1: - gt_class_logits = logits.clone() - else: - gt_class_logits = logits[ - torch.arange(logits.shape[0], device=logits.device), classes - ].unsqueeze(1) - return -(torch.abs(gt_class_logits)) - - -class ConvFCHead(nn.Module): - """ - A mask head with fully connected layers. Given pooled features it first reduces channels and - spatial dimensions with conv layers and then uses FC layers to predict coarse masks analogously - to the standard box head. - """ - - _version = 2 - - @configurable - def __init__( - self, input_shape: ShapeSpec, *, conv_dim: int, fc_dims: List[int], output_shape: Tuple[int] - ): - """ - Args: - conv_dim: the output dimension of the conv layers - fc_dims: a list of N>0 integers representing the output dimensions of N FC layers - output_shape: shape of the output mask prediction - """ - super().__init__() - - # fmt: off - input_channels = input_shape.channels - input_h = input_shape.height - input_w = input_shape.width - self.output_shape = output_shape - # fmt: on - - self.conv_layers = [] - if input_channels > conv_dim: - self.reduce_channel_dim_conv = Conv2d( - input_channels, - conv_dim, - kernel_size=1, - stride=1, - padding=0, - bias=True, - activation=F.relu, - ) - self.conv_layers.append(self.reduce_channel_dim_conv) - - self.reduce_spatial_dim_conv = Conv2d( - conv_dim, conv_dim, kernel_size=2, stride=2, padding=0, bias=True, activation=F.relu - ) - self.conv_layers.append(self.reduce_spatial_dim_conv) - - input_dim = conv_dim * input_h * input_w - input_dim //= 4 - - self.fcs = [] - for k, fc_dim in enumerate(fc_dims): - fc = nn.Linear(input_dim, fc_dim) - self.add_module("fc{}".format(k + 1), fc) - self.fcs.append(fc) - input_dim = fc_dim - - output_dim = int(np.prod(self.output_shape)) - - self.prediction = nn.Linear(fc_dims[-1], output_dim) - # use normal distribution initialization for mask prediction layer - nn.init.normal_(self.prediction.weight, std=0.001) - nn.init.constant_(self.prediction.bias, 0) - - for layer in self.conv_layers: - weight_init.c2_msra_fill(layer) - for layer in self.fcs: - weight_init.c2_xavier_fill(layer) - - @classmethod - def from_config(cls, cfg, input_shape): - output_shape = ( - cfg.MODEL.ROI_HEADS.NUM_CLASSES, - cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION, - cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION, - ) - fc_dim = cfg.MODEL.ROI_MASK_HEAD.FC_DIM - num_fc = cfg.MODEL.ROI_MASK_HEAD.NUM_FC - ret = dict( - input_shape=input_shape, - conv_dim=cfg.MODEL.ROI_MASK_HEAD.CONV_DIM, - fc_dims=[fc_dim] * num_fc, - output_shape=output_shape, - ) - return ret - - def forward(self, x): - N = x.shape[0] - for layer in self.conv_layers: - x = layer(x) - x = torch.flatten(x, start_dim=1) - for layer in self.fcs: - x = F.relu(layer(x)) - output_shape = [N] + list(self.output_shape) - return self.prediction(x).view(*output_shape) - - def _load_from_state_dict( - self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ): - version = local_metadata.get("version", None) - - if version is None or version < 2: - logger = logging.getLogger(__name__) - logger.warning( - "Weight format of PointRend models have changed! " - "Applying automatic conversion now ..." - ) - for k in list(state_dict.keys()): - newk = k - if k.startswith(prefix + "coarse_mask_fc"): - newk = k.replace(prefix + "coarse_mask_fc", prefix + "fc") - if newk != k: - state_dict[newk] = state_dict[k] - del state_dict[k] - - -@ROI_MASK_HEAD_REGISTRY.register() -class PointRendMaskHead(nn.Module): - def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): - super().__init__() - self._feature_scales = {k: 1.0 / v.stride for k, v in input_shape.items()} - # point head - self._init_point_head(cfg, input_shape) - # coarse mask head - self.roi_pooler_in_features = cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES - self.roi_pooler_size = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION - self._feature_scales = {k: 1.0 / v.stride for k, v in input_shape.items()} - in_channels = np.sum([input_shape[f].channels for f in self.roi_pooler_in_features]) - self._init_roi_head( - cfg, - ShapeSpec( - channels=in_channels, - width=self.roi_pooler_size, - height=self.roi_pooler_size, - ), - ) - - def _init_roi_head(self, cfg, input_shape): - self.coarse_head = ConvFCHead(cfg, input_shape) - - def _init_point_head(self, cfg, input_shape): - # fmt: off - self.mask_point_on = cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON - if not self.mask_point_on: - return - assert cfg.MODEL.ROI_HEADS.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES - self.mask_point_in_features = cfg.MODEL.POINT_HEAD.IN_FEATURES - self.mask_point_train_num_points = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS - self.mask_point_oversample_ratio = cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO - self.mask_point_importance_sample_ratio = cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO - # next three parameters are use in the adaptive subdivions inference procedure - self.mask_point_subdivision_init_resolution = cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION - self.mask_point_subdivision_steps = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS - self.mask_point_subdivision_num_points = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS - # fmt: on - - in_channels = int(np.sum([input_shape[f].channels for f in self.mask_point_in_features])) - self.point_head = build_point_head(cfg, ShapeSpec(channels=in_channels, width=1, height=1)) - - # An optimization to skip unused subdivision steps: if after subdivision, all pixels on - # the mask will be selected and recomputed anyway, we should just double our init_resolution - while ( - 4 * self.mask_point_subdivision_init_resolution**2 - <= self.mask_point_subdivision_num_points - ): - self.mask_point_subdivision_init_resolution *= 2 - self.mask_point_subdivision_steps -= 1 - - def forward(self, features, instances): - """ - Args: - features (dict[str, Tensor]): a dict of image-level features - instances (list[Instances]): proposals in training; detected - instances in inference - """ - if self.training: - proposal_boxes = [x.proposal_boxes for x in instances] - coarse_mask = self.coarse_head(self._roi_pooler(features, proposal_boxes)) - losses = {"loss_mask": mask_rcnn_loss(coarse_mask, instances)} - if not self.mask_point_on: - return losses - - point_coords, point_labels = self._sample_train_points(coarse_mask, instances) - point_fine_grained_features = self._point_pooler(features, proposal_boxes, point_coords) - point_logits = self._get_point_logits( - point_fine_grained_features, point_coords, coarse_mask - ) - losses["loss_mask_point"] = roi_mask_point_loss(point_logits, instances, point_labels) - return losses - else: - pred_boxes = [x.pred_boxes for x in instances] - coarse_mask = self.coarse_head(self._roi_pooler(features, pred_boxes)) - return self._subdivision_inference(features, coarse_mask, instances) - - def _roi_pooler(self, features: List[Tensor], boxes: List[Boxes]): - """ - Extract per-box feature. This is similar to RoIAlign(sampling_ratio=1) except: - 1. It's implemented by point_sample - 2. It pools features across all levels and concat them, while typically - RoIAlign select one level for every box. However in the config we only use - one level (p2) so there is no difference. - - Returns: - Tensor of shape (R, C, pooler_size, pooler_size) where R is the total number of boxes - """ - features_list = [features[k] for k in self.roi_pooler_in_features] - features_scales = [self._feature_scales[k] for k in self.roi_pooler_in_features] - - num_boxes = sum(x.tensor.size(0) for x in boxes) - output_size = self.roi_pooler_size - point_coords = generate_regular_grid_point_coords(num_boxes, output_size, boxes[0].device) - # For regular grids of points, this function is equivalent to `len(features_list)' calls - # of `ROIAlign` (with `SAMPLING_RATIO=1`), and concat the results. - roi_features, _ = point_sample_fine_grained_features( - features_list, features_scales, boxes, point_coords - ) - return roi_features.view(num_boxes, roi_features.shape[1], output_size, output_size) - - def _sample_train_points(self, coarse_mask, instances): - assert self.training - gt_classes = cat([x.gt_classes for x in instances]) - with torch.no_grad(): - # sample point_coords - point_coords = get_uncertain_point_coords_with_randomness( - coarse_mask, - lambda logits: calculate_uncertainty(logits, gt_classes), - self.mask_point_train_num_points, - self.mask_point_oversample_ratio, - self.mask_point_importance_sample_ratio, - ) - # sample point_labels - proposal_boxes = [x.proposal_boxes for x in instances] - cat_boxes = Boxes.cat(proposal_boxes) - point_coords_wrt_image = get_point_coords_wrt_image(cat_boxes.tensor, point_coords) - point_labels = sample_point_labels(instances, point_coords_wrt_image) - return point_coords, point_labels - - def _point_pooler(self, features, proposal_boxes, point_coords): - point_features_list = [features[k] for k in self.mask_point_in_features] - point_features_scales = [self._feature_scales[k] for k in self.mask_point_in_features] - # sample image-level features - point_fine_grained_features, _ = point_sample_fine_grained_features( - point_features_list, point_features_scales, proposal_boxes, point_coords - ) - return point_fine_grained_features - - def _get_point_logits(self, point_fine_grained_features, point_coords, coarse_mask): - coarse_features = point_sample(coarse_mask, point_coords, align_corners=False) - point_logits = self.point_head(point_fine_grained_features, coarse_features) - return point_logits - - def _subdivision_inference(self, features, mask_representations, instances): - assert not self.training - - pred_boxes = [x.pred_boxes for x in instances] - pred_classes = cat([x.pred_classes for x in instances]) - - mask_logits = None - # +1 here to include an initial step to generate the coarsest mask - # prediction with init_resolution, when mask_logits is None. - # We compute initial mask by sampling on a regular grid. coarse_mask - # can be used as initial mask as well, but it's typically very low-res - # so it will be completely overwritten during subdivision anyway. - for _ in range(self.mask_point_subdivision_steps + 1): - if mask_logits is None: - point_coords = generate_regular_grid_point_coords( - pred_classes.size(0), - self.mask_point_subdivision_init_resolution, - pred_boxes[0].device, - ) - else: - mask_logits = interpolate( - mask_logits, scale_factor=2, mode="bilinear", align_corners=False - ) - uncertainty_map = calculate_uncertainty(mask_logits, pred_classes) - point_indices, point_coords = get_uncertain_point_coords_on_grid( - uncertainty_map, self.mask_point_subdivision_num_points - ) - - # Run the point head for every point in point_coords - fine_grained_features = self._point_pooler(features, pred_boxes, point_coords) - point_logits = self._get_point_logits( - fine_grained_features, point_coords, mask_representations - ) - - if mask_logits is None: - # Create initial mask_logits using point_logits on this regular grid - R, C, _ = point_logits.shape - mask_logits = point_logits.reshape( - R, - C, - self.mask_point_subdivision_init_resolution, - self.mask_point_subdivision_init_resolution, - ) - # The subdivision code will fail with the empty list of boxes - if len(pred_classes) == 0: - mask_rcnn_inference(mask_logits, instances) - return instances - else: - # Put point predictions to the right places on the upsampled grid. - R, C, H, W = mask_logits.shape - point_indices = point_indices.unsqueeze(1).expand(-1, C, -1) - mask_logits = ( - mask_logits.reshape(R, C, H * W) - .scatter_(2, point_indices, point_logits) - .view(R, C, H, W) - ) - mask_rcnn_inference(mask_logits, instances) - return instances - - -@ROI_MASK_HEAD_REGISTRY.register() -class ImplicitPointRendMaskHead(PointRendMaskHead): - def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): - super().__init__(cfg, input_shape) - - def _init_roi_head(self, cfg, input_shape): - assert hasattr(self, "num_params"), "Please initialize point_head first!" - self.parameter_head = ConvFCHead(cfg, input_shape, output_shape=(self.num_params,)) - self.regularizer = cfg.MODEL.IMPLICIT_POINTREND.PARAMS_L2_REGULARIZER - - def _init_point_head(self, cfg, input_shape): - # fmt: off - self.mask_point_on = True # always on - assert cfg.MODEL.ROI_HEADS.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES - self.mask_point_in_features = cfg.MODEL.POINT_HEAD.IN_FEATURES - self.mask_point_train_num_points = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS - # next two parameters are use in the adaptive subdivions inference procedure - self.mask_point_subdivision_steps = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS - self.mask_point_subdivision_num_points = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS - # fmt: on - - in_channels = int(np.sum([input_shape[f].channels for f in self.mask_point_in_features])) - self.point_head = build_point_head(cfg, ShapeSpec(channels=in_channels, width=1, height=1)) - self.num_params = self.point_head.num_params - - # inference parameters - self.mask_point_subdivision_init_resolution = int( - math.sqrt(self.mask_point_subdivision_num_points) - ) - assert ( - self.mask_point_subdivision_init_resolution - * self.mask_point_subdivision_init_resolution - == self.mask_point_subdivision_num_points - ) - - def forward(self, features, instances): - """ - Args: - features (dict[str, Tensor]): a dict of image-level features - instances (list[Instances]): proposals in training; detected - instances in inference - """ - if self.training: - proposal_boxes = [x.proposal_boxes for x in instances] - parameters = self.parameter_head(self._roi_pooler(features, proposal_boxes)) - losses = {"loss_l2": self.regularizer * (parameters**2).mean()} - - point_coords, point_labels = self._uniform_sample_train_points(instances) - point_fine_grained_features = self._point_pooler(features, proposal_boxes, point_coords) - point_logits = self._get_point_logits( - point_fine_grained_features, point_coords, parameters - ) - losses["loss_mask_point"] = roi_mask_point_loss(point_logits, instances, point_labels) - return losses - else: - pred_boxes = [x.pred_boxes for x in instances] - parameters = self.parameter_head(self._roi_pooler(features, pred_boxes)) - return self._subdivision_inference(features, parameters, instances) - - def _uniform_sample_train_points(self, instances): - assert self.training - proposal_boxes = [x.proposal_boxes for x in instances] - cat_boxes = Boxes.cat(proposal_boxes) - # uniform sample - point_coords = torch.rand( - len(cat_boxes), self.mask_point_train_num_points, 2, device=cat_boxes.tensor.device - ) - # sample point_labels - point_coords_wrt_image = get_point_coords_wrt_image(cat_boxes.tensor, point_coords) - point_labels = sample_point_labels(instances, point_coords_wrt_image) - return point_coords, point_labels - - def _get_point_logits(self, fine_grained_features, point_coords, parameters): - return self.point_head(fine_grained_features, point_coords, parameters) diff --git a/detectron2/projects/PointRend/point_rend/point_features.py b/detectron2/projects/PointRend/point_rend/point_features.py deleted file mode 100644 index e46f442950ff248555e127dc3923b67adb37fb69..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/point_rend/point_features.py +++ /dev/null @@ -1,259 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import torch -from torch.nn import functional as F - -from detectron2.layers import cat, shapes_to_tensor -from detectron2.structures import BitMasks, Boxes - - -""" -Shape shorthand in this module: - - N: minibatch dimension size, i.e. the number of RoIs for instance segmenation or the - number of images for semantic segmenation. - R: number of ROIs, combined over all images, in the minibatch - P: number of points -""" - - -def point_sample(input, point_coords, **kwargs): - """ - A wrapper around :function:`torch.nn.functional.grid_sample` to support 3D point_coords tensors. - Unlike :function:`torch.nn.functional.grid_sample` it assumes `point_coords` to lie inside - [0, 1] x [0, 1] square. - - Args: - input (Tensor): A tensor of shape (N, C, H, W) that contains features map on a H x W grid. - point_coords (Tensor): A tensor of shape (N, P, 2) or (N, Hgrid, Wgrid, 2) that contains - [0, 1] x [0, 1] normalized point coordinates. - - Returns: - output (Tensor): A tensor of shape (N, C, P) or (N, C, Hgrid, Wgrid) that contains - features for points in `point_coords`. The features are obtained via bilinear - interplation from `input` the same way as :function:`torch.nn.functional.grid_sample`. - """ - add_dim = False - if point_coords.dim() == 3: - add_dim = True - point_coords = point_coords.unsqueeze(2) - output = F.grid_sample(input, 2.0 * point_coords - 1.0, **kwargs) - if add_dim: - output = output.squeeze(3) - return output - - -def generate_regular_grid_point_coords(R, side_size, device): - """ - Generate regular square grid of points in [0, 1] x [0, 1] coordinate space. - - Args: - R (int): The number of grids to sample, one for each region. - side_size (int): The side size of the regular grid. - device (torch.device): Desired device of returned tensor. - - Returns: - (Tensor): A tensor of shape (R, side_size^2, 2) that contains coordinates - for the regular grids. - """ - aff = torch.tensor([[[0.5, 0, 0.5], [0, 0.5, 0.5]]], device=device) - r = F.affine_grid(aff, torch.Size((1, 1, side_size, side_size)), align_corners=False) - return r.view(1, -1, 2).expand(R, -1, -1) - - -def get_uncertain_point_coords_with_randomness( - coarse_logits, uncertainty_func, num_points, oversample_ratio, importance_sample_ratio -): - """ - Sample points in [0, 1] x [0, 1] coordinate space based on their uncertainty. The unceratinties - are calculated for each point using 'uncertainty_func' function that takes point's logit - prediction as input. - See PointRend paper for details. - - Args: - coarse_logits (Tensor): A tensor of shape (N, C, Hmask, Wmask) or (N, 1, Hmask, Wmask) for - class-specific or class-agnostic prediction. - uncertainty_func: A function that takes a Tensor of shape (N, C, P) or (N, 1, P) that - contains logit predictions for P points and returns their uncertainties as a Tensor of - shape (N, 1, P). - num_points (int): The number of points P to sample. - oversample_ratio (int): Oversampling parameter. - importance_sample_ratio (float): Ratio of points that are sampled via importnace sampling. - - Returns: - point_coords (Tensor): A tensor of shape (N, P, 2) that contains the coordinates of P - sampled points. - """ - assert oversample_ratio >= 1 - assert importance_sample_ratio <= 1 and importance_sample_ratio >= 0 - num_boxes = coarse_logits.shape[0] - num_sampled = int(num_points * oversample_ratio) - point_coords = torch.rand(num_boxes, num_sampled, 2, device=coarse_logits.device) - point_logits = point_sample(coarse_logits, point_coords, align_corners=False) - # It is crucial to calculate uncertainty based on the sampled prediction value for the points. - # Calculating uncertainties of the coarse predictions first and sampling them for points leads - # to incorrect results. - # To illustrate this: assume uncertainty_func(logits)=-abs(logits), a sampled point between - # two coarse predictions with -1 and 1 logits has 0 logits, and therefore 0 uncertainty value. - # However, if we calculate uncertainties for the coarse predictions first, - # both will have -1 uncertainty, and the sampled point will get -1 uncertainty. - point_uncertainties = uncertainty_func(point_logits) - num_uncertain_points = int(importance_sample_ratio * num_points) - num_random_points = num_points - num_uncertain_points - idx = torch.topk(point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] - shift = num_sampled * torch.arange(num_boxes, dtype=torch.long, device=coarse_logits.device) - idx += shift[:, None] - point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view( - num_boxes, num_uncertain_points, 2 - ) - if num_random_points > 0: - point_coords = cat( - [ - point_coords, - torch.rand(num_boxes, num_random_points, 2, device=coarse_logits.device), - ], - dim=1, - ) - return point_coords - - -def get_uncertain_point_coords_on_grid(uncertainty_map, num_points): - """ - Find `num_points` most uncertain points from `uncertainty_map` grid. - - Args: - uncertainty_map (Tensor): A tensor of shape (N, 1, H, W) that contains uncertainty - values for a set of points on a regular H x W grid. - num_points (int): The number of points P to select. - - Returns: - point_indices (Tensor): A tensor of shape (N, P) that contains indices from - [0, H x W) of the most uncertain points. - point_coords (Tensor): A tensor of shape (N, P, 2) that contains [0, 1] x [0, 1] normalized - coordinates of the most uncertain points from the H x W grid. - """ - R, _, H, W = uncertainty_map.shape - h_step = 1.0 / float(H) - w_step = 1.0 / float(W) - - num_points = min(H * W, num_points) - point_indices = torch.topk(uncertainty_map.view(R, H * W), k=num_points, dim=1)[1] - point_coords = torch.zeros(R, num_points, 2, dtype=torch.float, device=uncertainty_map.device) - point_coords[:, :, 0] = w_step / 2.0 + (point_indices % W).to(torch.float) * w_step - point_coords[:, :, 1] = h_step / 2.0 + (point_indices // W).to(torch.float) * h_step - return point_indices, point_coords - - -def point_sample_fine_grained_features(features_list, feature_scales, boxes, point_coords): - """ - Get features from feature maps in `features_list` that correspond to specific point coordinates - inside each bounding box from `boxes`. - - Args: - features_list (list[Tensor]): A list of feature map tensors to get features from. - feature_scales (list[float]): A list of scales for tensors in `features_list`. - boxes (list[Boxes]): A list of I Boxes objects that contain R_1 + ... + R_I = R boxes all - together. - point_coords (Tensor): A tensor of shape (R, P, 2) that contains - [0, 1] x [0, 1] box-normalized coordinates of the P sampled points. - - Returns: - point_features (Tensor): A tensor of shape (R, C, P) that contains features sampled - from all features maps in feature_list for P sampled points for all R boxes in `boxes`. - point_coords_wrt_image (Tensor): A tensor of shape (R, P, 2) that contains image-level - coordinates of P points. - """ - cat_boxes = Boxes.cat(boxes) - num_boxes = [b.tensor.size(0) for b in boxes] - - point_coords_wrt_image = get_point_coords_wrt_image(cat_boxes.tensor, point_coords) - split_point_coords_wrt_image = torch.split(point_coords_wrt_image, num_boxes) - - point_features = [] - for idx_img, point_coords_wrt_image_per_image in enumerate(split_point_coords_wrt_image): - point_features_per_image = [] - for idx_feature, feature_map in enumerate(features_list): - h, w = feature_map.shape[-2:] - scale = shapes_to_tensor([w, h]) / feature_scales[idx_feature] - point_coords_scaled = point_coords_wrt_image_per_image / scale.to(feature_map.device) - point_features_per_image.append( - point_sample( - feature_map[idx_img].unsqueeze(0), - point_coords_scaled.unsqueeze(0), - align_corners=False, - ) - .squeeze(0) - .transpose(1, 0) - ) - point_features.append(cat(point_features_per_image, dim=1)) - - return cat(point_features, dim=0), point_coords_wrt_image - - -def get_point_coords_wrt_image(boxes_coords, point_coords): - """ - Convert box-normalized [0, 1] x [0, 1] point cooordinates to image-level coordinates. - - Args: - boxes_coords (Tensor): A tensor of shape (R, 4) that contains bounding boxes. - coordinates. - point_coords (Tensor): A tensor of shape (R, P, 2) that contains - [0, 1] x [0, 1] box-normalized coordinates of the P sampled points. - - Returns: - point_coords_wrt_image (Tensor): A tensor of shape (R, P, 2) that contains - image-normalized coordinates of P sampled points. - """ - with torch.no_grad(): - point_coords_wrt_image = point_coords.clone() - point_coords_wrt_image[:, :, 0] = point_coords_wrt_image[:, :, 0] * ( - boxes_coords[:, None, 2] - boxes_coords[:, None, 0] - ) - point_coords_wrt_image[:, :, 1] = point_coords_wrt_image[:, :, 1] * ( - boxes_coords[:, None, 3] - boxes_coords[:, None, 1] - ) - point_coords_wrt_image[:, :, 0] += boxes_coords[:, None, 0] - point_coords_wrt_image[:, :, 1] += boxes_coords[:, None, 1] - return point_coords_wrt_image - - -def sample_point_labels(instances, point_coords): - """ - Sample point labels from ground truth mask given point_coords. - - Args: - instances (list[Instances]): A list of N Instances, where N is the number of images - in the batch. So, i_th elememt of the list contains R_i objects and R_1 + ... + R_N is - equal to R. The ground-truth gt_masks in each instance will be used to compute labels. - points_coords (Tensor): A tensor of shape (R, P, 2), where R is the total number of - instances and P is the number of points for each instance. The coordinates are in - the absolute image pixel coordinate space, i.e. [0, H] x [0, W]. - - Returns: - Tensor: A tensor of shape (R, P) that contains the labels of P sampled points. - """ - with torch.no_grad(): - gt_mask_logits = [] - point_coords_splits = torch.split( - point_coords, [len(instances_per_image) for instances_per_image in instances] - ) - for i, instances_per_image in enumerate(instances): - if len(instances_per_image) == 0: - continue - assert isinstance( - instances_per_image.gt_masks, BitMasks - ), "Point head works with GT in 'bitmask' format. Set INPUT.MASK_FORMAT to 'bitmask'." - - gt_bit_masks = instances_per_image.gt_masks.tensor - h, w = instances_per_image.gt_masks.image_size - scale = torch.tensor([w, h], dtype=torch.float, device=gt_bit_masks.device) - points_coord_grid_sample_format = point_coords_splits[i] / scale - gt_mask_logits.append( - point_sample( - gt_bit_masks.to(torch.float32).unsqueeze(1), - points_coord_grid_sample_format, - align_corners=False, - ).squeeze(1) - ) - - point_labels = cat(gt_mask_logits) - return point_labels diff --git a/detectron2/projects/PointRend/point_rend/point_head.py b/detectron2/projects/PointRend/point_rend/point_head.py deleted file mode 100644 index 1786fad5c54841faf86b1fbef83d909e3bf2b1f9..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/point_rend/point_head.py +++ /dev/null @@ -1,282 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.layers import ShapeSpec, cat -from detectron2.utils.events import get_event_storage -from detectron2.utils.registry import Registry - -POINT_HEAD_REGISTRY = Registry("POINT_HEAD") -POINT_HEAD_REGISTRY.__doc__ = """ -Registry for point heads, which makes prediction for a given set of per-point features. - -The registered object will be called with `obj(cfg, input_shape)`. -""" - - -def roi_mask_point_loss(mask_logits, instances, point_labels): - """ - Compute the point-based loss for instance segmentation mask predictions - given point-wise mask prediction and its corresponding point-wise labels. - Args: - mask_logits (Tensor): A tensor of shape (R, C, P) or (R, 1, P) for class-specific or - class-agnostic, where R is the total number of predicted masks in all images, C is the - number of foreground classes, and P is the number of points sampled for each mask. - The values are logits. - instances (list[Instances]): A list of N Instances, where N is the number of images - in the batch. These instances are in 1:1 correspondence with the `mask_logits`. So, i_th - elememt of the list contains R_i objects and R_1 + ... + R_N is equal to R. - The ground-truth labels (class, box, mask, ...) associated with each instance are stored - in fields. - point_labels (Tensor): A tensor of shape (R, P), where R is the total number of - predicted masks and P is the number of points for each mask. - Labels with value of -1 will be ignored. - Returns: - point_loss (Tensor): A scalar tensor containing the loss. - """ - with torch.no_grad(): - cls_agnostic_mask = mask_logits.size(1) == 1 - total_num_masks = mask_logits.size(0) - - gt_classes = [] - for instances_per_image in instances: - if len(instances_per_image) == 0: - continue - - if not cls_agnostic_mask: - gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64) - gt_classes.append(gt_classes_per_image) - - gt_mask_logits = point_labels - point_ignores = point_labels == -1 - if gt_mask_logits.shape[0] == 0: - return mask_logits.sum() * 0 - - assert gt_mask_logits.numel() > 0, gt_mask_logits.shape - - if cls_agnostic_mask: - mask_logits = mask_logits[:, 0] - else: - indices = torch.arange(total_num_masks) - gt_classes = cat(gt_classes, dim=0) - mask_logits = mask_logits[indices, gt_classes] - - # Log the training accuracy (using gt classes and 0.0 threshold for the logits) - mask_accurate = (mask_logits > 0.0) == gt_mask_logits.to(dtype=torch.uint8) - mask_accurate = mask_accurate[~point_ignores] - mask_accuracy = mask_accurate.nonzero().size(0) / max(mask_accurate.numel(), 1.0) - get_event_storage().put_scalar("point/accuracy", mask_accuracy) - - point_loss = F.binary_cross_entropy_with_logits( - mask_logits, gt_mask_logits.to(dtype=torch.float32), weight=~point_ignores, reduction="mean" - ) - return point_loss - - -@POINT_HEAD_REGISTRY.register() -class StandardPointHead(nn.Module): - """ - A point head multi-layer perceptron which we model with conv1d layers with kernel 1. The head - takes both fine-grained and coarse prediction features as its input. - """ - - def __init__(self, cfg, input_shape: ShapeSpec): - """ - The following attributes are parsed from config: - fc_dim: the output dimension of each FC layers - num_fc: the number of FC layers - coarse_pred_each_layer: if True, coarse prediction features are concatenated to each - layer's input - """ - super(StandardPointHead, self).__init__() - # fmt: off - num_classes = cfg.MODEL.POINT_HEAD.NUM_CLASSES - fc_dim = cfg.MODEL.POINT_HEAD.FC_DIM - num_fc = cfg.MODEL.POINT_HEAD.NUM_FC - cls_agnostic_mask = cfg.MODEL.POINT_HEAD.CLS_AGNOSTIC_MASK - self.coarse_pred_each_layer = cfg.MODEL.POINT_HEAD.COARSE_PRED_EACH_LAYER - input_channels = input_shape.channels - # fmt: on - - fc_dim_in = input_channels + num_classes - self.fc_layers = [] - for k in range(num_fc): - fc = nn.Conv1d(fc_dim_in, fc_dim, kernel_size=1, stride=1, padding=0, bias=True) - self.add_module("fc{}".format(k + 1), fc) - self.fc_layers.append(fc) - fc_dim_in = fc_dim - fc_dim_in += num_classes if self.coarse_pred_each_layer else 0 - - num_mask_classes = 1 if cls_agnostic_mask else num_classes - self.predictor = nn.Conv1d(fc_dim_in, num_mask_classes, kernel_size=1, stride=1, padding=0) - - for layer in self.fc_layers: - weight_init.c2_msra_fill(layer) - # use normal distribution initialization for mask prediction layer - nn.init.normal_(self.predictor.weight, std=0.001) - if self.predictor.bias is not None: - nn.init.constant_(self.predictor.bias, 0) - - def forward(self, fine_grained_features, coarse_features): - x = torch.cat((fine_grained_features, coarse_features), dim=1) - for layer in self.fc_layers: - x = F.relu(layer(x)) - if self.coarse_pred_each_layer: - x = cat((x, coarse_features), dim=1) - return self.predictor(x) - - -@POINT_HEAD_REGISTRY.register() -class ImplicitPointHead(nn.Module): - """ - A point head multi-layer perceptron which we model with conv1d layers with kernel 1. The head - takes both fine-grained features and instance-wise MLP parameters as its input. - """ - - def __init__(self, cfg, input_shape: ShapeSpec): - """ - The following attributes are parsed from config: - channels: the output dimension of each FC layers - num_layers: the number of FC layers (including the final prediction layer) - image_feature_enabled: if True, fine-grained image-level features are used - positional_encoding_enabled: if True, positional encoding is used - """ - super(ImplicitPointHead, self).__init__() - # fmt: off - self.num_layers = cfg.MODEL.POINT_HEAD.NUM_FC + 1 - self.channels = cfg.MODEL.POINT_HEAD.FC_DIM - self.image_feature_enabled = cfg.MODEL.IMPLICIT_POINTREND.IMAGE_FEATURE_ENABLED - self.positional_encoding_enabled = cfg.MODEL.IMPLICIT_POINTREND.POS_ENC_ENABLED - self.num_classes = ( - cfg.MODEL.POINT_HEAD.NUM_CLASSES if not cfg.MODEL.POINT_HEAD.CLS_AGNOSTIC_MASK else 1 - ) - self.in_channels = input_shape.channels - # fmt: on - - if not self.image_feature_enabled: - self.in_channels = 0 - if self.positional_encoding_enabled: - self.in_channels += 256 - self.register_buffer("positional_encoding_gaussian_matrix", torch.randn((2, 128))) - - assert self.in_channels > 0 - - num_weight_params, num_bias_params = [], [] - assert self.num_layers >= 2 - for l in range(self.num_layers): - if l == 0: - # input layer - num_weight_params.append(self.in_channels * self.channels) - num_bias_params.append(self.channels) - elif l == self.num_layers - 1: - # output layer - num_weight_params.append(self.channels * self.num_classes) - num_bias_params.append(self.num_classes) - else: - # intermediate layer - num_weight_params.append(self.channels * self.channels) - num_bias_params.append(self.channels) - - self.num_weight_params = num_weight_params - self.num_bias_params = num_bias_params - self.num_params = sum(num_weight_params) + sum(num_bias_params) - - def forward(self, fine_grained_features, point_coords, parameters): - # features: [R, channels, K] - # point_coords: [R, K, 2] - num_instances = fine_grained_features.size(0) - num_points = fine_grained_features.size(2) - - if num_instances == 0: - return torch.zeros((0, 1, num_points), device=fine_grained_features.device) - - if self.positional_encoding_enabled: - # locations: [R*K, 2] - locations = 2 * point_coords.reshape(num_instances * num_points, 2) - 1 - locations = locations @ self.positional_encoding_gaussian_matrix.to(locations.device) - locations = 2 * np.pi * locations - locations = torch.cat([torch.sin(locations), torch.cos(locations)], dim=1) - # locations: [R, C, K] - locations = locations.reshape(num_instances, num_points, 256).permute(0, 2, 1) - if not self.image_feature_enabled: - fine_grained_features = locations - else: - fine_grained_features = torch.cat([locations, fine_grained_features], dim=1) - - # features [R, C, K] - mask_feat = fine_grained_features.reshape(num_instances, self.in_channels, num_points) - - weights, biases = self._parse_params( - parameters, - self.in_channels, - self.channels, - self.num_classes, - self.num_weight_params, - self.num_bias_params, - ) - - point_logits = self._dynamic_mlp(mask_feat, weights, biases, num_instances) - point_logits = point_logits.reshape(-1, self.num_classes, num_points) - - return point_logits - - @staticmethod - def _dynamic_mlp(features, weights, biases, num_instances): - assert features.dim() == 3, features.dim() - n_layers = len(weights) - x = features - for i, (w, b) in enumerate(zip(weights, biases)): - x = torch.einsum("nck,ndc->ndk", x, w) + b - if i < n_layers - 1: - x = F.relu(x) - return x - - @staticmethod - def _parse_params( - pred_params, - in_channels, - channels, - num_classes, - num_weight_params, - num_bias_params, - ): - assert pred_params.dim() == 2 - assert len(num_weight_params) == len(num_bias_params) - assert pred_params.size(1) == sum(num_weight_params) + sum(num_bias_params) - - num_instances = pred_params.size(0) - num_layers = len(num_weight_params) - - params_splits = list( - torch.split_with_sizes(pred_params, num_weight_params + num_bias_params, dim=1) - ) - - weight_splits = params_splits[:num_layers] - bias_splits = params_splits[num_layers:] - - for l in range(num_layers): - if l == 0: - # input layer - weight_splits[l] = weight_splits[l].reshape(num_instances, channels, in_channels) - bias_splits[l] = bias_splits[l].reshape(num_instances, channels, 1) - elif l < num_layers - 1: - # intermediate layer - weight_splits[l] = weight_splits[l].reshape(num_instances, channels, channels) - bias_splits[l] = bias_splits[l].reshape(num_instances, channels, 1) - else: - # output layer - weight_splits[l] = weight_splits[l].reshape(num_instances, num_classes, channels) - bias_splits[l] = bias_splits[l].reshape(num_instances, num_classes, 1) - - return weight_splits, bias_splits - - -def build_point_head(cfg, input_channels): - """ - Build a point head defined by `cfg.MODEL.POINT_HEAD.NAME`. - """ - head_name = cfg.MODEL.POINT_HEAD.NAME - return POINT_HEAD_REGISTRY.get(head_name)(cfg, input_channels) diff --git a/detectron2/projects/PointRend/point_rend/roi_heads.py b/detectron2/projects/PointRend/point_rend/roi_heads.py deleted file mode 100644 index 74ccc34a1193c604fcc34b8deed5ece53fee3f19..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/point_rend/roi_heads.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging - -from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads - - -@ROI_HEADS_REGISTRY.register() -class PointRendROIHeads(StandardROIHeads): - """ - Identical to StandardROIHeads, except for some weights conversion code to - handle old models. - """ - - _version = 2 - - def _load_from_state_dict( - self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ): - version = local_metadata.get("version", None) - if version is None or version < 2: - logger = logging.getLogger(__name__) - logger.warning( - "Weight format of PointRend models have changed! " - "Please upgrade your models. Applying automatic conversion now ..." - ) - for k in list(state_dict.keys()): - newk = k - if k.startswith(prefix + "mask_point_head"): - newk = k.replace(prefix + "mask_point_head", prefix + "mask_head.point_head") - if k.startswith(prefix + "mask_coarse_head"): - newk = k.replace(prefix + "mask_coarse_head", prefix + "mask_head.coarse_head") - if newk != k: - state_dict[newk] = state_dict[k] - del state_dict[k] - - @classmethod - def _init_mask_head(cls, cfg, input_shape): - if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.NAME != "PointRendMaskHead": - logger = logging.getLogger(__name__) - logger.warning( - "Config of PointRend models have changed! " - "Please upgrade your models. Applying automatic conversion now ..." - ) - assert cfg.MODEL.ROI_MASK_HEAD.NAME == "CoarseMaskHead" - cfg.defrost() - cfg.MODEL.ROI_MASK_HEAD.NAME = "PointRendMaskHead" - cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "" - cfg.freeze() - return super()._init_mask_head(cfg, input_shape) diff --git a/detectron2/projects/PointRend/point_rend/semantic_seg.py b/detectron2/projects/PointRend/point_rend/semantic_seg.py deleted file mode 100644 index ea65200996777022cbb1c3c5dd9c943b67ca4ab1..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/point_rend/semantic_seg.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -from typing import Dict -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.layers import ShapeSpec, cat -from detectron2.modeling import SEM_SEG_HEADS_REGISTRY - -from .point_features import ( - get_uncertain_point_coords_on_grid, - get_uncertain_point_coords_with_randomness, - point_sample, -) -from .point_head import build_point_head - - -def calculate_uncertainty(sem_seg_logits): - """ - For each location of the prediction `sem_seg_logits` we estimate uncerainty as the - difference between top first and top second predicted logits. - - Args: - mask_logits (Tensor): A tensor of shape (N, C, ...), where N is the minibatch size and - C is the number of foreground classes. The values are logits. - - Returns: - scores (Tensor): A tensor of shape (N, 1, ...) that contains uncertainty scores with - the most uncertain locations having the highest uncertainty score. - """ - top2_scores = torch.topk(sem_seg_logits, k=2, dim=1)[0] - return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1) - - -@SEM_SEG_HEADS_REGISTRY.register() -class PointRendSemSegHead(nn.Module): - """ - A semantic segmentation head that combines a head set in `POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME` - and a point head set in `MODEL.POINT_HEAD.NAME`. - """ - - def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): - super().__init__() - - self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE - - self.coarse_sem_seg_head = SEM_SEG_HEADS_REGISTRY.get( - cfg.MODEL.POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME - )(cfg, input_shape) - self._init_point_head(cfg, input_shape) - - def _init_point_head(self, cfg, input_shape: Dict[str, ShapeSpec]): - # fmt: off - assert cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES - feature_channels = {k: v.channels for k, v in input_shape.items()} - self.in_features = cfg.MODEL.POINT_HEAD.IN_FEATURES - self.train_num_points = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS - self.oversample_ratio = cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO - self.importance_sample_ratio = cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO - self.subdivision_steps = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS - self.subdivision_num_points = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS - # fmt: on - - in_channels = int(np.sum([feature_channels[f] for f in self.in_features])) - self.point_head = build_point_head(cfg, ShapeSpec(channels=in_channels, width=1, height=1)) - - def forward(self, features, targets=None): - coarse_sem_seg_logits = self.coarse_sem_seg_head.layers(features) - - if self.training: - losses = self.coarse_sem_seg_head.losses(coarse_sem_seg_logits, targets) - - with torch.no_grad(): - point_coords = get_uncertain_point_coords_with_randomness( - coarse_sem_seg_logits, - calculate_uncertainty, - self.train_num_points, - self.oversample_ratio, - self.importance_sample_ratio, - ) - coarse_features = point_sample(coarse_sem_seg_logits, point_coords, align_corners=False) - - fine_grained_features = cat( - [ - point_sample(features[in_feature], point_coords, align_corners=False) - for in_feature in self.in_features - ], - dim=1, - ) - point_logits = self.point_head(fine_grained_features, coarse_features) - point_targets = ( - point_sample( - targets.unsqueeze(1).to(torch.float), - point_coords, - mode="nearest", - align_corners=False, - ) - .squeeze(1) - .to(torch.long) - ) - losses["loss_sem_seg_point"] = F.cross_entropy( - point_logits, point_targets, reduction="mean", ignore_index=self.ignore_value - ) - return None, losses - else: - sem_seg_logits = coarse_sem_seg_logits.clone() - for _ in range(self.subdivision_steps): - sem_seg_logits = F.interpolate( - sem_seg_logits, scale_factor=2, mode="bilinear", align_corners=False - ) - uncertainty_map = calculate_uncertainty(sem_seg_logits) - point_indices, point_coords = get_uncertain_point_coords_on_grid( - uncertainty_map, self.subdivision_num_points - ) - fine_grained_features = cat( - [ - point_sample(features[in_feature], point_coords, align_corners=False) - for in_feature in self.in_features - ] - ) - coarse_features = point_sample( - coarse_sem_seg_logits, point_coords, align_corners=False - ) - point_logits = self.point_head(fine_grained_features, coarse_features) - - # put sem seg point predictions to the right places on the upsampled grid. - N, C, H, W = sem_seg_logits.shape - point_indices = point_indices.unsqueeze(1).expand(-1, C, -1) - sem_seg_logits = ( - sem_seg_logits.reshape(N, C, H * W) - .scatter_(2, point_indices, point_logits) - .view(N, C, H, W) - ) - return sem_seg_logits, {} diff --git a/detectron2/projects/PointRend/train_net.py b/detectron2/projects/PointRend/train_net.py deleted file mode 100644 index a490658e1e638d1de343b5af1ce6fee4eda1c2e8..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointRend/train_net.py +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -PointRend Training Script. - -This script is a simplified version of the training script in detectron2/tools. -""" - -import os - -import detectron2.data.transforms as T -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import DatasetMapper, MetadataCatalog, build_detection_train_loader -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch -from detectron2.evaluation import ( - CityscapesInstanceEvaluator, - CityscapesSemSegEvaluator, - COCOEvaluator, - DatasetEvaluators, - LVISEvaluator, - SemSegEvaluator, - verify_results, -) -from detectron2.projects.point_rend import ColorAugSSDTransform, add_pointrend_config - - -def build_sem_seg_train_aug(cfg): - augs = [ - T.ResizeShortestEdge( - cfg.INPUT.MIN_SIZE_TRAIN, - cfg.INPUT.MAX_SIZE_TRAIN, - cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING, - ) - ] - if cfg.INPUT.CROP.ENABLED: - augs.append( - T.RandomCrop_CategoryAreaConstraint( - cfg.INPUT.CROP.TYPE, - cfg.INPUT.CROP.SIZE, - cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA, - cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, - ) - ) - if cfg.INPUT.COLOR_AUG_SSD: - augs.append(ColorAugSSDTransform(img_format=cfg.INPUT.FORMAT)) - augs.append(T.RandomFlip()) - return augs - - -class Trainer(DefaultTrainer): - """ - We use the "DefaultTrainer" which contains a number pre-defined logic for - standard training workflow. They may not work for you, especially if you - are working on a new research project. In that case you can use the cleaner - "SimpleTrainer", or write your own training loop. - """ - - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - """ - Create evaluator(s) for a given dataset. - This uses the special metadata "evaluator_type" associated with each builtin dataset. - For your own dataset, you can simply create an evaluator manually in your - script and do not have to worry about the hacky if-else logic here. - """ - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluator_list = [] - evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - if evaluator_type == "lvis": - return LVISEvaluator(dataset_name, output_dir=output_folder) - if evaluator_type == "coco": - return COCOEvaluator(dataset_name, output_dir=output_folder) - if evaluator_type == "sem_seg": - return SemSegEvaluator( - dataset_name, - distributed=True, - output_dir=output_folder, - ) - if evaluator_type == "cityscapes_instance": - return CityscapesInstanceEvaluator(dataset_name) - if evaluator_type == "cityscapes_sem_seg": - return CityscapesSemSegEvaluator(dataset_name) - if len(evaluator_list) == 0: - raise NotImplementedError( - "no Evaluator for the dataset {} with the type {}".format( - dataset_name, evaluator_type - ) - ) - if len(evaluator_list) == 1: - return evaluator_list[0] - return DatasetEvaluators(evaluator_list) - - @classmethod - def build_train_loader(cls, cfg): - if "SemanticSegmentor" in cfg.MODEL.META_ARCHITECTURE: - mapper = DatasetMapper(cfg, is_train=True, augmentations=build_sem_seg_train_aug(cfg)) - else: - mapper = None - return build_detection_train_loader(cfg, mapper=mapper) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - add_pointrend_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - if comm.is_main_process(): - verify_results(cfg, res) - return res - - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - return trainer.train() - - -def invoke_main() -> None: - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) - - -if __name__ == "__main__": - invoke_main() # pragma: no cover diff --git a/detectron2/projects/PointSup/README.md b/detectron2/projects/PointSup/README.md deleted file mode 100644 index 75ce084530d192a522824d01b98a474d77863e68..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# Pointly-Supervised Instance Segmentation - -Bowen Cheng, Omkar Parkhi, Alexander Kirillov - -[[`arXiv`](https://arxiv.org/abs/2104.06404)] [[`Project`](https://bowenc0221.github.io/point-sup)] [[`BibTeX`](#CitingPointSup)] - -
- -

- -## Data preparation -Please follow these steps to prepare your datasets: -1. Follow official Detectron2 instruction to prepare COCO dataset. Set up `DETECTRON2_DATASETS` environment variable to the location of your Detectron2 dataset. -2. Generate 10-points annotations for COCO by running: `python tools/prepare_coco_point_annotations_without_masks.py 10` - -## Training - -To train a model with 8 GPUs run: -```bash -python train_net.py --config-file configs/mask_rcnn_R_50_FPN_3x_point_sup_point_aug_coco.yaml --num-gpus 8 -``` - -## Evaluation - -Model evaluation can be done similarly: -```bash -python train_net.py --config-file configs/mask_rcnn_R_50_FPN_3x_point_sup_point_aug_coco.yaml --eval-only MODEL.WEIGHTS /path/to/model_checkpoint -``` - -## Citing Pointly-Supervised Instance Segmentation - -If you use PointSup, please use the following BibTeX entry. - -```BibTeX -@article{cheng2021pointly, - title={Pointly-Supervised Instance Segmentation}, - author={Bowen Cheng and Omkar Parkhi and Alexander Kirillov}, - journal={arXiv}, - year={2021} -} -``` diff --git a/detectron2/projects/PointSup/configs/implicit_pointrend_R_50_FPN_3x_point_sup_point_aug_coco.yaml b/detectron2/projects/PointSup/configs/implicit_pointrend_R_50_FPN_3x_point_sup_point_aug_coco.yaml deleted file mode 100644 index 5b3d4272c6f8a3820c8d354bfb3c915ccdebfc4a..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/configs/implicit_pointrend_R_50_FPN_3x_point_sup_point_aug_coco.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../../PointRend/configs/InstanceSegmentation/implicit_pointrend_R_50_FPN_3x_coco.yaml" -MODEL: - ROI_MASK_HEAD: - NAME: "ImplicitPointRendPointSupHead" -INPUT: - POINT_SUP: True - SAMPLE_POINTS: 5 -DATASETS: - TRAIN: ("coco_2017_train_points_n10_v1_without_masks",) diff --git a/detectron2/projects/PointSup/configs/mask_rcnn_R_50_FPN_3x_point_sup_coco.yaml b/detectron2/projects/PointSup/configs/mask_rcnn_R_50_FPN_3x_point_sup_coco.yaml deleted file mode 100644 index 157e3844ef68779cda3579bee5d8c132826c9fba..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/configs/mask_rcnn_R_50_FPN_3x_point_sup_coco.yaml +++ /dev/null @@ -1,15 +0,0 @@ -_BASE_: "../../../configs/Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - ROI_MASK_HEAD: - NAME: "MaskRCNNConvUpsamplePointSupHead" -INPUT: - POINT_SUP: True -DATASETS: - TRAIN: ("coco_2017_train_points_n10_v1_without_masks",) -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/projects/PointSup/configs/mask_rcnn_R_50_FPN_3x_point_sup_point_aug_coco.yaml b/detectron2/projects/PointSup/configs/mask_rcnn_R_50_FPN_3x_point_sup_point_aug_coco.yaml deleted file mode 100644 index 4b11224d595bed88238e02caeb4833b0b1d7b286..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/configs/mask_rcnn_R_50_FPN_3x_point_sup_point_aug_coco.yaml +++ /dev/null @@ -1,3 +0,0 @@ -_BASE_: "mask_rcnn_R_50_FPN_3x_point_sup_coco.yaml" -INPUT: - SAMPLE_POINTS: 5 diff --git a/detectron2/projects/PointSup/point_sup/__init__.py b/detectron2/projects/PointSup/point_sup/__init__.py deleted file mode 100644 index 510e3814ac1bb273b48804191b4a7c1272ea9a9b..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/point_sup/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from . import register_point_annotations -from .config import add_point_sup_config -from .dataset_mapper import PointSupDatasetMapper -from .mask_head import MaskRCNNConvUpsamplePointSupHead -from .point_utils import get_point_coords_from_point_annotation diff --git a/detectron2/projects/PointSup/point_sup/config.py b/detectron2/projects/PointSup/point_sup/config.py deleted file mode 100644 index 5e00b786cf6055a0cda664f143c1fac56a3c6d11..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/point_sup/config.py +++ /dev/null @@ -1,13 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -def add_point_sup_config(cfg): - """ - Add config for point supervision. - """ - # Use point annotation - cfg.INPUT.POINT_SUP = False - # Sample only part of points in each iteration. - # Default: 0, use all available points. - cfg.INPUT.SAMPLE_POINTS = 0 diff --git a/detectron2/projects/PointSup/point_sup/dataset_mapper.py b/detectron2/projects/PointSup/point_sup/dataset_mapper.py deleted file mode 100644 index aba7b9fd2a0366d3761af7022f9325040d750f0b..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/point_sup/dataset_mapper.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import copy -import logging -import numpy as np -from typing import List, Union -import torch - -import detectron2.data.detection_utils as utils -import detectron2.data.transforms as T -from detectron2.config import configurable - -from .detection_utils import annotations_to_instances, transform_instance_annotations - -__all__ = [ - "PointSupDatasetMapper", -] - - -class PointSupDatasetMapper: - """ - The callable currently does the following: - 1. Read the image from "file_name" - 2. Applies transforms to the image and annotations - 3. Prepare data and annotations to Tensor and :class:`Instances` - """ - - @configurable - def __init__( - self, - is_train: bool, - *, - augmentations: List[Union[T.Augmentation, T.Transform]], - image_format: str, - # Extra data augmentation for point supervision - sample_points: int = 0, - ): - """ - NOTE: this interface is experimental. - - Args: - is_train: whether it's used in training or inference - augmentations: a list of augmentations or deterministic transforms to apply - image_format: an image format supported by :func:`detection_utils.read_image`. - sample_points: subsample points at each iteration - """ - # fmt: off - self.is_train = is_train - self.augmentations = T.AugmentationList(augmentations) - self.image_format = image_format - self.sample_points = sample_points - # fmt: on - logger = logging.getLogger(__name__) - mode = "training" if is_train else "inference" - logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}") - logger.info(f"Point Augmentations used in {mode}: sample {sample_points} points") - - @classmethod - def from_config(cls, cfg, is_train: bool = True): - augs = utils.build_augmentation(cfg, is_train) - if cfg.INPUT.CROP.ENABLED and is_train: - raise ValueError("Crop augmentation not supported to point supervision.") - - ret = { - "is_train": is_train, - "augmentations": augs, - "image_format": cfg.INPUT.FORMAT, - "sample_points": cfg.INPUT.SAMPLE_POINTS, - } - - return ret - - def __call__(self, dataset_dict): - """ - Args: - dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. - Returns: - dict: a format that builtin models in detectron2 accept - """ - dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below - image = utils.read_image(dataset_dict["file_name"], format=self.image_format) - utils.check_image_size(dataset_dict, image) - - aug_input = T.AugInput(image) - transforms = self.augmentations(aug_input) - image = aug_input.image - - image_shape = image.shape[:2] # h, w - # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, - # but not efficient on large generic data structures due to the use of pickle & mp.Queue. - # Therefore it's important to use torch.Tensor. - dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) - - if not self.is_train: - dataset_dict.pop("annotations", None) - return dataset_dict - - if "annotations" in dataset_dict: - # Maps points from the closed interval [0, image_size - 1] on discrete - # image coordinates to the half-open interval [x1, x2) on continuous image - # coordinates. We use the continuous-discrete conversion from Heckbert - # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5, - # where d is a discrete coordinate and c is a continuous coordinate. - for ann in dataset_dict["annotations"]: - point_coords_wrt_image = np.array(ann["point_coords"]).astype(float) - point_coords_wrt_image = point_coords_wrt_image + 0.5 - ann["point_coords"] = point_coords_wrt_image - - annos = [ - # also need to transform point coordinates - transform_instance_annotations( - obj, - transforms, - image_shape, - ) - for obj in dataset_dict.pop("annotations") - if obj.get("iscrowd", 0) == 0 - ] - instances = annotations_to_instances( - annos, - image_shape, - sample_points=self.sample_points, - ) - - dataset_dict["instances"] = utils.filter_empty_instances(instances) - return dataset_dict diff --git a/detectron2/projects/PointSup/point_sup/detection_utils.py b/detectron2/projects/PointSup/point_sup/detection_utils.py deleted file mode 100644 index c97ffcbe29ff744a91585e75e225557b6c0f2a35..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/point_sup/detection_utils.py +++ /dev/null @@ -1,103 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import numpy as np -import torch - -# fmt: off -from detectron2.data.detection_utils import \ - annotations_to_instances as base_annotations_to_instances -from detectron2.data.detection_utils import \ - transform_instance_annotations as base_transform_instance_annotations - -# fmt: on - - -def annotations_to_instances(annos, image_size, sample_points=0): - """ - Create an :class:`Instances` object used by the models, - from instance annotations in the dataset dict. - - Args: - annos (list[dict]): a list of instance annotations in one image, each - element for one instance. - image_size (tuple): height, width - sample_points (int): subsample points at each iteration - - Returns: - Instances: - It will contain fields "gt_boxes", "gt_classes", - "gt_point_coords", "gt_point_labels", if they can be obtained from `annos`. - This is the format that builtin models with point supervision expect. - """ - target = base_annotations_to_instances(annos, image_size) - - assert ("point_coords" in annos[0]) == ("point_labels" in annos[0]) - - if len(annos) and "point_labels" in annos[0]: - point_coords = [] - point_labels = [] - for i, _ in enumerate(annos): - # Already in the image coordinate system - point_coords_wrt_image = np.array(annos[i]["point_coords"]) - point_labels_wrt_image = np.array(annos[i]["point_labels"]) - - if sample_points > 0: - random_indices = np.random.choice( - point_coords_wrt_image.shape[0], - sample_points, - replace=point_coords_wrt_image.shape[0] < sample_points, - ).astype(int) - point_coords_wrt_image = point_coords_wrt_image[random_indices] - point_labels_wrt_image = point_labels_wrt_image[random_indices] - assert point_coords_wrt_image.shape[0] == point_labels_wrt_image.size - - point_coords.append(point_coords_wrt_image) - point_labels.append(point_labels_wrt_image) - - point_coords = torch.stack([torch.from_numpy(x) for x in point_coords]) - point_labels = torch.stack([torch.from_numpy(x) for x in point_labels]) - target.gt_point_coords = point_coords - target.gt_point_labels = point_labels - - return target - - -def transform_instance_annotations( - annotation, transforms, image_size, *, keypoint_hflip_indices=None -): - """ - Apply transforms to box, and point annotations of a single instance. - It will use `transforms.apply_box` for the box, and - `transforms.apply_coords` for points. - Args: - annotation (dict): dict of instance annotations for a single instance. - It will be modified in-place. - transforms (TransformList or list[Transform]): - image_size (tuple): the height, width of the transformed image - keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. - Returns: - dict: - the same input dict with fields "bbox", "point_coords", "point_labels" - transformed according to `transforms`. - The "bbox_mode" field will be set to XYXY_ABS. - """ - annotation = base_transform_instance_annotations( - annotation, transforms, image_size, keypoint_hflip_indices - ) - - assert ("point_coords" in annotation) == ("point_labels" in annotation) - if "point_coords" in annotation and "point_labels" in annotation: - point_coords = annotation["point_coords"] - point_labels = np.array(annotation["point_labels"]).astype(float) - point_coords = transforms.apply_coords(point_coords) - - # Set all out-of-boundary points to "unlabeled" - inside = (point_coords >= np.array([0, 0])) & (point_coords <= np.array(image_size[::-1])) - inside = inside.all(axis=1) - point_labels[~inside] = -1 - - annotation["point_coords"] = point_coords - annotation["point_labels"] = point_labels - - return annotation diff --git a/detectron2/projects/PointSup/point_sup/mask_head.py b/detectron2/projects/PointSup/point_sup/mask_head.py deleted file mode 100644 index 81c21f55009b1891c4684e2eaa8fee0f144b0a54..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/point_sup/mask_head.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -from typing import Any, List - -from detectron2.modeling import ROI_MASK_HEAD_REGISTRY -from detectron2.modeling.roi_heads.mask_head import MaskRCNNConvUpsampleHead, mask_rcnn_inference -from detectron2.projects.point_rend import ImplicitPointRendMaskHead -from detectron2.projects.point_rend.point_features import point_sample -from detectron2.projects.point_rend.point_head import roi_mask_point_loss -from detectron2.structures import Instances - -from .point_utils import get_point_coords_from_point_annotation - -__all__ = [ - "ImplicitPointRendPointSupHead", - "MaskRCNNConvUpsamplePointSupHead", -] - - -@ROI_MASK_HEAD_REGISTRY.register() -class MaskRCNNConvUpsamplePointSupHead(MaskRCNNConvUpsampleHead): - """ - A mask head with several conv layers, plus an upsample layer (with `ConvTranspose2d`). - Predictions are made with a final 1x1 conv layer. - - The difference with `MaskRCNNConvUpsampleHead` is that this head is trained - with point supervision. Please use the `MaskRCNNConvUpsampleHead` if you want - to train the model with mask supervision. - """ - - def forward(self, x, instances: List[Instances]) -> Any: - """ - Args: - x: input region feature(s) provided by :class:`ROIHeads`. - instances (list[Instances]): contains the boxes & labels corresponding - to the input features. - Exact format is up to its caller to decide. - Typically, this is the foreground instances in training, with - "proposal_boxes" field and other gt annotations. - In inference, it contains boxes that are already predicted. - Returns: - A dict of losses in training. The predicted "instances" in inference. - """ - x = self.layers(x) - if self.training: - N, C, H, W = x.shape - assert H == W - - proposal_boxes = [x.proposal_boxes for x in instances] - assert N == np.sum(len(x) for x in proposal_boxes) - - if N == 0: - return {"loss_mask": x.sum() * 0} - - # Training with point supervision - point_coords, point_labels = get_point_coords_from_point_annotation(instances) - - mask_logits = point_sample( - x, - point_coords, - align_corners=False, - ) - - return {"loss_mask": roi_mask_point_loss(mask_logits, instances, point_labels)} - else: - mask_rcnn_inference(x, instances) - return instances - - -@ROI_MASK_HEAD_REGISTRY.register() -class ImplicitPointRendPointSupHead(ImplicitPointRendMaskHead): - def _uniform_sample_train_points(self, instances): - assert self.training - # Please keep in mind that "gt_masks" is not used in this mask head. - point_coords, point_labels = get_point_coords_from_point_annotation(instances) - - return point_coords, point_labels diff --git a/detectron2/projects/PointSup/point_sup/point_utils.py b/detectron2/projects/PointSup/point_sup/point_utils.py deleted file mode 100644 index eed876ea9e0127c584c008bd5aab3e16e2c8c66a..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/point_sup/point_utils.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import torch - -from detectron2.layers import cat - - -def get_point_coords_from_point_annotation(instances): - """ - Load point coords and their corresponding labels from point annotation. - - Args: - instances (list[Instances]): A list of N Instances, where N is the number of images - in the batch. These instances are in 1:1 - correspondence with the pred_mask_logits. The ground-truth labels (class, box, mask, - ...) associated with each instance are stored in fields. - Returns: - point_coords (Tensor): A tensor of shape (N, P, 2) that contains the coordinates of P - sampled points. - point_labels (Tensor): A tensor of shape (N, P) that contains the labels of P - sampled points. `point_labels` takes 3 possible values: - - 0: the point belongs to background - - 1: the point belongs to the object - - -1: the point is ignored during training - """ - point_coords_list = [] - point_labels_list = [] - for instances_per_image in instances: - if len(instances_per_image) == 0: - continue - point_coords = instances_per_image.gt_point_coords.to(torch.float32) - point_labels = instances_per_image.gt_point_labels.to(torch.float32).clone() - proposal_boxes_per_image = instances_per_image.proposal_boxes.tensor - - # Convert point coordinate system, ground truth points are in image coord. - point_coords_wrt_box = get_point_coords_wrt_box(proposal_boxes_per_image, point_coords) - - # Ignore points that are outside predicted boxes. - point_ignores = ( - (point_coords_wrt_box[:, :, 0] < 0) - | (point_coords_wrt_box[:, :, 0] > 1) - | (point_coords_wrt_box[:, :, 1] < 0) - | (point_coords_wrt_box[:, :, 1] > 1) - ) - point_labels[point_ignores] = -1 - - point_coords_list.append(point_coords_wrt_box) - point_labels_list.append(point_labels) - - return ( - cat(point_coords_list, dim=0), - cat(point_labels_list, dim=0), - ) - - -def get_point_coords_wrt_box(boxes_coords, point_coords): - """ - Convert image-level absolute coordinates to box-normalized [0, 1] x [0, 1] point cooordinates. - Args: - boxes_coords (Tensor): A tensor of shape (R, 4) that contains bounding boxes. - coordinates. - point_coords (Tensor): A tensor of shape (R, P, 2) that contains - image-normalized coordinates of P sampled points. - Returns: - point_coords_wrt_box (Tensor): A tensor of shape (R, P, 2) that contains - [0, 1] x [0, 1] box-normalized coordinates of the P sampled points. - """ - with torch.no_grad(): - point_coords_wrt_box = point_coords.clone() - point_coords_wrt_box[:, :, 0] -= boxes_coords[:, None, 0] - point_coords_wrt_box[:, :, 1] -= boxes_coords[:, None, 1] - point_coords_wrt_box[:, :, 0] = point_coords_wrt_box[:, :, 0] / ( - boxes_coords[:, None, 2] - boxes_coords[:, None, 0] - ) - point_coords_wrt_box[:, :, 1] = point_coords_wrt_box[:, :, 1] / ( - boxes_coords[:, None, 3] - boxes_coords[:, None, 1] - ) - return point_coords_wrt_box diff --git a/detectron2/projects/PointSup/point_sup/register_point_annotations.py b/detectron2/projects/PointSup/point_sup/register_point_annotations.py deleted file mode 100644 index 32f2bb45e864e5be9d002f4d07badb91700ace4b..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/point_sup/register_point_annotations.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import os - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.data.datasets.builtin import _get_builtin_metadata -from detectron2.data.datasets.coco import load_coco_json - -logger = logging.getLogger(__name__) - - -# COCO dataset -def register_coco_instances_with_points(name, metadata, json_file, image_root): - """ - Register a dataset in COCO's json annotation format for - instance segmentation with point annotation. - - The point annotation json does not have "segmentation" field, instead, - it has "point_coords" and "point_labels" fields. - - Args: - name (str): the name that identifies a dataset, e.g. "coco_2014_train". - metadata (dict): extra metadata associated with this dataset. You can - leave it as an empty dict. - json_file (str): path to the json instance annotation file. - image_root (str or path-like): directory which contains all the images. - """ - assert isinstance(name, str), name - assert isinstance(json_file, (str, os.PathLike)), json_file - assert isinstance(image_root, (str, os.PathLike)), image_root - # 1. register a function which returns dicts - DatasetCatalog.register( - name, lambda: load_coco_json(json_file, image_root, name, ["point_coords", "point_labels"]) - ) - - # 2. Optionally, add metadata about this dataset, - # since they might be useful in evaluation, visualization or logging - MetadataCatalog.get(name).set( - json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata - ) - - -_PREDEFINED_SPLITS_COCO = {} -_PREDEFINED_SPLITS_COCO["coco"] = { - # point annotations without masks - "coco_2017_train_points_n10_v1_without_masks": ( - "coco/train2017", - "coco/annotations/instances_train2017_n10_v1_without_masks.json", - ), -} - - -def register_all_coco_train_points(root): - for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items(): - for key, (image_root, json_file) in splits_per_dataset.items(): - # Assume pre-defined datasets live in `./datasets`. - register_coco_instances_with_points( - key, - _get_builtin_metadata(dataset_name), - os.path.join(root, json_file) if "://" not in json_file else json_file, - os.path.join(root, image_root), - ) - - -# True for open source; -# Internally at fb, we register them elsewhere -if __name__.endswith(".register_point_annotations"): - _root = os.getenv("DETECTRON2_DATASETS", "datasets") - register_all_coco_train_points(_root) diff --git a/detectron2/projects/PointSup/tools/prepare_coco_point_annotations_without_masks.py b/detectron2/projects/PointSup/tools/prepare_coco_point_annotations_without_masks.py deleted file mode 100644 index 6d27a810af449110e3913cabddc6e43e5c58ce9a..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/tools/prepare_coco_point_annotations_without_masks.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import copy -import json -import numpy as np -import os -import sys -import pycocotools.mask as mask_utils - -from detectron2.utils.env import seed_all_rng -from detectron2.utils.file_io import PathManager - - -def get_point_annotations(input_filename, output_filename, num_points_per_instance): - with PathManager.open(input_filename, "r") as f: - coco_json = json.load(f) - - coco_annos = coco_json.pop("annotations") - coco_points_json = copy.deepcopy(coco_json) - - imgs = {} - for img in coco_json["images"]: - imgs[img["id"]] = img - - new_annos = [] - for ann in coco_annos: - # convert mask - t = imgs[ann["image_id"]] - h, w = t["height"], t["width"] - segm = ann.pop("segmentation") - if type(segm) is list: - # polygon -- a single object might consist of multiple parts - # we merge all parts into one mask rle code - rles = mask_utils.frPyObjects(segm, h, w) - rle = mask_utils.merge(rles) - elif type(segm["counts"]) is list: - # uncompressed RLE - rle = mask_utils.frPyObjects(segm, h, w) - else: - # rle - rle = segm - mask = mask_utils.decode(rle) - new_ann = copy.deepcopy(ann) - # sample points in image coordinates - box = ann["bbox"] - point_coords_wrt_image = np.random.rand(num_points_per_instance, 2) - point_coords_wrt_image[:, 0] = point_coords_wrt_image[:, 0] * box[2] - point_coords_wrt_image[:, 1] = point_coords_wrt_image[:, 1] * box[3] - point_coords_wrt_image[:, 0] += box[0] - point_coords_wrt_image[:, 1] += box[1] - # round to integer coordinates - point_coords_wrt_image = np.floor(point_coords_wrt_image).astype(int) - # get labels - assert (point_coords_wrt_image >= 0).all(), (point_coords_wrt_image, mask.shape) - assert (point_coords_wrt_image[:, 0] < w).all(), (point_coords_wrt_image, mask.shape) - assert (point_coords_wrt_image[:, 1] < h).all(), (point_coords_wrt_image, mask.shape) - point_labels = mask[point_coords_wrt_image[:, 1], point_coords_wrt_image[:, 0]] - # store new annotations - new_ann["point_coords"] = point_coords_wrt_image.tolist() - new_ann["point_labels"] = point_labels.tolist() - new_annos.append(new_ann) - coco_points_json["annotations"] = new_annos - - with PathManager.open(output_filename, "w") as f: - json.dump(coco_points_json, f) - - print("{} is modified and stored in {}.".format(input_filename, output_filename)) - - -if __name__ == "__main__": - """ - Generate point-based supervision for COCO dataset. - - Usage: - python tools/prepare_coco_point_annotations_without_masks.py \ - NUM_POINTS_PER_INSTANCE NUM_VERSIONS_WITH_DIFFERENT_SEED - - Example to generate point-based COCO dataset with 10 points per instance: - python tools/prepare_coco_point_annotations_without_masks.py 10 - """ - - # Fix random seed - seed_all_rng(12345) - - assert len(sys.argv) >= 2, "Please provide number of points to sample per instance" - dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "coco/annotations") - num_points_per_instance = int(sys.argv[1]) - if len(sys.argv) == 3: - repeat = int(sys.argv[2]) - else: - repeat = 1 - s = "instances_train2017" - for version in range(repeat): - print( - "Start sampling {} points per instance for annotations {}.".format( - num_points_per_instance, s - ) - ) - get_point_annotations( - os.path.join(dataset_dir, "{}.json".format(s)), - os.path.join( - dataset_dir, - "{}_n{}_v{}_without_masks.json".format(s, num_points_per_instance, version + 1), - ), - num_points_per_instance, - ) diff --git a/detectron2/projects/PointSup/train_net.py b/detectron2/projects/PointSup/train_net.py deleted file mode 100644 index 68f86c9cf4d43db68b7733f3fcacff285057fdec..0000000000000000000000000000000000000000 --- a/detectron2/projects/PointSup/train_net.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. -""" -Point supervision Training Script. - -This script is a simplified version of the training script in detectron2/tools. -""" - -import os - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog, build_detection_train_loader -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch -from detectron2.evaluation import COCOEvaluator, DatasetEvaluators, verify_results -from detectron2.projects.point_rend import add_pointrend_config -from detectron2.utils.logger import setup_logger - -from point_sup import PointSupDatasetMapper, add_point_sup_config - - -class Trainer(DefaultTrainer): - """ - We use the "DefaultTrainer" which contains pre-defined default logic for - standard training workflow. They may not work for you, especially if you - are working on a new research project. In that case you can write your - own training loop. You can use "tools/plain_train_net.py" as an example. - """ - - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - """ - Create evaluator(s) for a given dataset. - This uses the special metadata "evaluator_type" associated with each builtin dataset. - For your own dataset, you can simply create an evaluator manually in your - script and do not have to worry about the hacky if-else logic here. - """ - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluator_list = [] - evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - if evaluator_type == "coco": - evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder)) - if len(evaluator_list) == 0: - raise NotImplementedError( - "no Evaluator for the dataset {} with the type {}".format( - dataset_name, evaluator_type - ) - ) - elif len(evaluator_list) == 1: - return evaluator_list[0] - return DatasetEvaluators(evaluator_list) - - @classmethod - def build_train_loader(cls, cfg): - if cfg.INPUT.POINT_SUP: - mapper = PointSupDatasetMapper(cfg, is_train=True) - else: - mapper = None - return build_detection_train_loader(cfg, mapper=mapper) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - add_pointrend_config(cfg) - add_point_sup_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - # Setup logger for "point_sup" module - setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="point_sup") - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - if cfg.TEST.AUG.ENABLED: - res.update(Trainer.test_with_TTA(cfg, model)) - if comm.is_main_process(): - verify_results(cfg, res) - return res - - """ - If you'd like to do anything fancier than the standard training logic, - consider writing your own training loop (see plain_train_net.py) or - subclassing the trainer. - """ - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - return trainer.train() - - -def invoke_main() -> None: - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) - - -if __name__ == "__main__": - invoke_main() # pragma: no cover diff --git a/detectron2/projects/README.md b/detectron2/projects/README.md deleted file mode 100644 index 7fb29afcf239797ffe5061aabfef3000d820e38f..0000000000000000000000000000000000000000 --- a/detectron2/projects/README.md +++ /dev/null @@ -1,50 +0,0 @@ - -Here are a few projects that are built on detectron2. -They are examples of how to use detectron2 as a library, to make your projects more -maintainable. - -## Projects by Facebook - -Note that these are research projects, and therefore may not have the same level -of support or stability as detectron2. - -+ [DensePose: Dense Human Pose Estimation In The Wild](DensePose) -+ [Scale-Aware Trident Networks for Object Detection](TridentNet) -+ [TensorMask: A Foundation for Dense Object Segmentation](TensorMask) -+ [Mesh R-CNN](https://github.com/facebookresearch/meshrcnn) -+ [PointRend: Image Segmentation as Rendering](PointRend) -+ [Momentum Contrast for Unsupervised Visual Representation Learning](https://github.com/facebookresearch/moco/tree/master/detection) -+ [DETR: End-to-End Object Detection with Transformers](https://github.com/facebookresearch/detr/tree/master/d2) -+ [Panoptic-DeepLab: A Simple, Strong, and Fast Baseline for Bottom-Up Panoptic Segmentation](Panoptic-DeepLab) -+ [D2Go (Detectron2Go)](https://github.com/facebookresearch/d2go), an end-to-end production system for training and deployment for mobile platforms. -+ [Pointly-Supervised Instance Segmentation](PointSup) -+ [Unbiased Teacher for Semi-Supervised Object Detection](https://github.com/facebookresearch/unbiased-teacher) -+ [Rethinking "Batch" in BatchNorm](Rethinking-BatchNorm/) -+ [Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://github.com/facebookresearch/MaskFormer) -+ [Exploring Plain Vision Transformer Backbones for Object Detection](ViTDet/) -+ [MViTv2: Improved Multiscale Vision Transformers for Classification and Detection](MViTv2/) - - -## External Projects - -External projects in the community that use detectron2: - - - -+ [AdelaiDet](https://github.com/aim-uofa/adet), a detection toolbox including FCOS, BlendMask, etc. -+ [CenterMask](https://github.com/youngwanLEE/centermask2) -+ [Res2Net backbones](https://github.com/Res2Net/Res2Net-detectron2) -+ [VoVNet backbones](https://github.com/youngwanLEE/vovnet-detectron2) -+ [FsDet](https://github.com/ucbdrive/few-shot-object-detection), Few-Shot Object Detection. -+ [Sparse R-CNN](https://github.com/PeizeSun/SparseR-CNN) -+ [BCNet](https://github.com/lkeab/BCNet), a bilayer decoupling instance segmentation method. -+ [DD3D](https://github.com/TRI-ML/dd3d), A fully convolutional 3D detector. -+ [detrex](https://github.com/IDEA-Research/detrex), a detection toolbox for transformer-based detection algorithms including Deformable-DETR, DAB-DETR, DN-DETR, DINO, etc. diff --git a/detectron2/projects/Rethinking-BatchNorm/README.md b/detectron2/projects/Rethinking-BatchNorm/README.md deleted file mode 100644 index 42c5c68fb4837043df62ff398f15fe0326f96e1c..0000000000000000000000000000000000000000 --- a/detectron2/projects/Rethinking-BatchNorm/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# Rethinking "Batch" in BatchNorm - -We provide configs that reproduce detection experiments in the paper [Rethinking "Batch" in BatchNorm](https://arxiv.org/abs/2105.07576). - -All configs can be trained with: - -``` -../../tools/lazyconfig_train_net.py --config-file configs/X.py --num-gpus 8 -``` - -## Mask R-CNN - -* `mask_rcnn_BNhead.py`, `mask_rcnn_BNhead_batch_stats.py`: - Mask R-CNN with BatchNorm in the head. See Table 3 in the paper. - -* `mask_rcnn_BNhead_shuffle.py`: Mask R-CNN with cross-GPU shuffling of head inputs. - See Figure 9 and Table 6 in the paper. - -* `mask_rcnn_SyncBNhead.py`: Mask R-CNN with cross-GPU SyncBatchNorm in the head. - It matches Table 6 in the paper. - -## RetinaNet - -* `retinanet_SyncBNhead.py`: RetinaNet with SyncBN in head, a straightforward implementation - which matches row 3 of Table 5. - -* `retinanet_SyncBNhead_SharedTraining.py`: RetinaNet with SyncBN in head, normalizing - all 5 feature levels together. Match row 1 of Table 5. - -The script `retinanet-eval-domain-specific.py` evaluates a checkpoint after recomputing -domain-specific statistics. Running it with -``` -./retinanet-eval-domain-specific.py checkpoint.pth -``` -on a model produced by the above two configs, can produce results that match row 4 and -row 2 of Table 5. diff --git a/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_BNhead.py b/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_BNhead.py deleted file mode 100644 index 336c133e0e34ee82674d595ef98d1844f801fa4f..0000000000000000000000000000000000000000 --- a/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_BNhead.py +++ /dev/null @@ -1,18 +0,0 @@ -from detectron2.model_zoo import get_config - -model = get_config("common/models/mask_rcnn_fpn.py").model - -model.backbone.bottom_up.freeze_at = 2 - -model.roi_heads.box_head.conv_norm = model.roi_heads.mask_head.conv_norm = "BN" -# 4conv1fc head -model.roi_heads.box_head.conv_dims = [256, 256, 256, 256] -model.roi_heads.box_head.fc_dims = [1024] - -dataloader = get_config("common/data/coco.py").dataloader -lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_3x -optimizer = get_config("common/optim.py").SGD -train = get_config("common/train.py").train - -train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" -train.max_iter = 270000 # 3x for batchsize = 16 diff --git a/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_BNhead_batch_stats.py b/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_BNhead_batch_stats.py deleted file mode 100644 index 872e17c8a9aa000250a0a61613ddb3e3886f9991..0000000000000000000000000000000000000000 --- a/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_BNhead_batch_stats.py +++ /dev/null @@ -1,20 +0,0 @@ -from torch.nn import BatchNorm2d -from torch.nn import functional as F - - -class BatchNormBatchStat(BatchNorm2d): - """ - BN that uses batch stat in inference - """ - - def forward(self, input): - if self.training: - return super().forward(input) - return F.batch_norm(input, None, None, self.weight, self.bias, True, 1.0, self.eps) - - -# After training with the base config, it's sufficient to load its model with -# this config only for inference -- because the training-time behavior is identical. -from .mask_rcnn_BNhead import model, dataloader, lr_multiplier, optimizer, train - -model.roi_heads.box_head.conv_norm = model.roi_heads.mask_head.conv_norm = BatchNormBatchStat diff --git a/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_BNhead_shuffle.py b/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_BNhead_shuffle.py deleted file mode 100644 index 5117a7dad0f952af02580e5373a7be52b749ee86..0000000000000000000000000000000000000000 --- a/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_BNhead_shuffle.py +++ /dev/null @@ -1,74 +0,0 @@ -import math -import torch -import torch.distributed as dist - -from detectron2.modeling.roi_heads import FastRCNNConvFCHead, MaskRCNNConvUpsampleHead -from detectron2.utils import comm -from fvcore.nn.distributed import differentiable_all_gather - - -def concat_all_gather(input): - bs_int = input.shape[0] - size_list = comm.all_gather(bs_int) - max_size = max(size_list) - max_shape = (max_size,) + input.shape[1:] - - padded_input = input.new_zeros(max_shape) - padded_input[:bs_int] = input - all_inputs = differentiable_all_gather(padded_input) - inputs = [x[:sz] for sz, x in zip(size_list, all_inputs)] - return inputs, size_list - - -def batch_shuffle(x): - # gather from all gpus - batch_size_this = x.shape[0] - all_xs, batch_size_all = concat_all_gather(x) - all_xs_concat = torch.cat(all_xs, dim=0) - total_bs = sum(batch_size_all) - - rank = dist.get_rank() - assert batch_size_all[rank] == batch_size_this - - idx_range = (sum(batch_size_all[:rank]), sum(batch_size_all[: rank + 1])) - - # random shuffle index - idx_shuffle = torch.randperm(total_bs, device=x.device) - # broadcast to all gpus - dist.broadcast(idx_shuffle, src=0) - - # index for restoring - idx_unshuffle = torch.argsort(idx_shuffle) - - # shuffled index for this gpu - splits = torch.split(idx_shuffle, math.ceil(total_bs / dist.get_world_size())) - if len(splits) > rank: - idx_this = splits[rank] - else: - idx_this = idx_shuffle.new_zeros([0]) - return all_xs_concat[idx_this], idx_unshuffle[idx_range[0] : idx_range[1]] - - -def batch_unshuffle(x, idx_unshuffle): - all_x, _ = concat_all_gather(x) - x_gather = torch.cat(all_x, dim=0) - return x_gather[idx_unshuffle] - - -def wrap_shuffle(module_type, method): - def new_method(self, x): - if self.training: - x, idx = batch_shuffle(x) - x = getattr(module_type, method)(self, x) - if self.training: - x = batch_unshuffle(x, idx) - return x - - return type(module_type.__name__ + "WithShuffle", (module_type,), {method: new_method}) - - -from .mask_rcnn_BNhead import model, dataloader, lr_multiplier, optimizer, train - - -model.roi_heads.box_head._target_ = wrap_shuffle(FastRCNNConvFCHead, "forward") -model.roi_heads.mask_head._target_ = wrap_shuffle(MaskRCNNConvUpsampleHead, "layers") diff --git a/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_SyncBNhead.py b/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_SyncBNhead.py deleted file mode 100644 index 5f05da03514a4ee6aa37d6bc3e678873ead73c61..0000000000000000000000000000000000000000 --- a/detectron2/projects/Rethinking-BatchNorm/configs/mask_rcnn_SyncBNhead.py +++ /dev/null @@ -1,3 +0,0 @@ -from .mask_rcnn_BNhead import model, dataloader, lr_multiplier, optimizer, train - -model.roi_heads.box_head.conv_norm = model.roi_heads.mask_head.conv_norm = "SyncBN" diff --git a/detectron2/projects/Rethinking-BatchNorm/configs/retinanet_SyncBNhead.py b/detectron2/projects/Rethinking-BatchNorm/configs/retinanet_SyncBNhead.py deleted file mode 100644 index 222dfddffb1f9bedf87f4c345534045b29e2d8ee..0000000000000000000000000000000000000000 --- a/detectron2/projects/Rethinking-BatchNorm/configs/retinanet_SyncBNhead.py +++ /dev/null @@ -1,19 +0,0 @@ -from detectron2.model_zoo import get_config -from torch import nn - -model = get_config("common/models/retinanet.py").model -model.backbone.bottom_up.freeze_at = 2 - -# The head will overwrite string "SyncBN" to use domain-specific BN, so we -# provide a class here to use shared BN in training. -model.head.norm = nn.SyncBatchNorm2d - -dataloader = get_config("common/data/coco.py").dataloader -lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_3x -optimizer = get_config("common/optim.py").SGD -train = get_config("common/train.py").train - -optimizer.lr = 0.01 - -train.init_checkpoint = "detectron2://ImageNetPretrained/MSRA/R-50.pkl" -train.max_iter = 270000 # 3x for batchsize = 16 diff --git a/detectron2/projects/Rethinking-BatchNorm/configs/retinanet_SyncBNhead_SharedTraining.py b/detectron2/projects/Rethinking-BatchNorm/configs/retinanet_SyncBNhead_SharedTraining.py deleted file mode 100644 index 3f146009d04aad2fca08d970569a4d76d46c9bd2..0000000000000000000000000000000000000000 --- a/detectron2/projects/Rethinking-BatchNorm/configs/retinanet_SyncBNhead_SharedTraining.py +++ /dev/null @@ -1,32 +0,0 @@ -from typing import List -import torch -from torch import Tensor, nn - -from detectron2.modeling.meta_arch.retinanet import RetinaNetHead - - -def apply_sequential(inputs, modules): - for mod in modules: - if isinstance(mod, (nn.BatchNorm2d, nn.SyncBatchNorm)): - # for BN layer, normalize all inputs together - shapes = [i.shape for i in inputs] - spatial_sizes = [s[2] * s[3] for s in shapes] - x = [i.flatten(2) for i in inputs] - x = torch.cat(x, dim=2).unsqueeze(3) - x = mod(x).split(spatial_sizes, dim=2) - inputs = [i.view(s) for s, i in zip(shapes, x)] - else: - inputs = [mod(i) for i in inputs] - return inputs - - -class RetinaNetHead_SharedTrainingBN(RetinaNetHead): - def forward(self, features: List[Tensor]): - logits = apply_sequential(features, list(self.cls_subnet) + [self.cls_score]) - bbox_reg = apply_sequential(features, list(self.bbox_subnet) + [self.bbox_pred]) - return logits, bbox_reg - - -from .retinanet_SyncBNhead import model, dataloader, lr_multiplier, optimizer, train - -model.head._target_ = RetinaNetHead_SharedTrainingBN diff --git a/detectron2/projects/Rethinking-BatchNorm/retinanet-eval-domain-specific.py b/detectron2/projects/Rethinking-BatchNorm/retinanet-eval-domain-specific.py deleted file mode 100644 index 49a74adf1f286135c5551d9b31e722169f23b8f0..0000000000000000000000000000000000000000 --- a/detectron2/projects/Rethinking-BatchNorm/retinanet-eval-domain-specific.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. -import sys -import torch -from fvcore.nn.precise_bn import update_bn_stats - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import LazyConfig, instantiate -from detectron2.evaluation import inference_on_dataset -from detectron2.layers import CycleBatchNormList -from detectron2.utils.events import EventStorage -from detectron2.utils.logger import setup_logger - -logger = setup_logger() -setup_logger(name="fvcore") - - -if __name__ == "__main__": - checkpoint = sys.argv[1] - cfg = LazyConfig.load_rel("./configs/retinanet_SyncBNhead.py") - model = cfg.model - model.head.norm = lambda c: CycleBatchNormList(len(model.head_in_features), num_features=c) - model = instantiate(model) - model.cuda() - DetectionCheckpointer(model).load(checkpoint) - - cfg.dataloader.train.total_batch_size = 8 - logger.info("Running PreciseBN ...") - with EventStorage(), torch.no_grad(): - update_bn_stats(model, instantiate(cfg.dataloader.train), 500) - - logger.info("Running evaluation ...") - inference_on_dataset( - model, instantiate(cfg.dataloader.test), instantiate(cfg.dataloader.evaluator) - ) diff --git a/detectron2/projects/TensorMask/README.md b/detectron2/projects/TensorMask/README.md deleted file mode 100644 index e81307c4c9be8d1cb2fd27b716531f4ebcd9ae5c..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/README.md +++ /dev/null @@ -1,63 +0,0 @@ - -# TensorMask in Detectron2 -**A Foundation for Dense Object Segmentation** - -Xinlei Chen, Ross Girshick, Kaiming He, Piotr DollΓ‘r - -[[`arXiv`](https://arxiv.org/abs/1903.12174)] [[`BibTeX`](#CitingTensorMask)] - -
- -
- -In this repository, we release code for TensorMask in Detectron2. -TensorMask is a dense sliding-window instance segmentation framework that, for the first time, achieves results close to the well-developed Mask R-CNN framework -- both qualitatively and quantitatively. It establishes a conceptually complementary direction for object instance segmentation research. - -## Installation -First install Detectron2 following the [documentation](https://detectron2.readthedocs.io/tutorials/install.html) and -[setup the dataset](../../datasets). Then compile the TensorMask-specific op (`swap_align2nat`): -```bash -pip install -e /path/to/detectron2/projects/TensorMask -``` - -## Training - -To train a model, run: -```bash -python /path/to/detectron2/projects/TensorMask/train_net.py --config-file -``` - -For example, to launch TensorMask BiPyramid training (1x schedule) with ResNet-50 backbone on 8 GPUs, -one should execute: -```bash -python /path/to/detectron2/projects/TensorMask/train_net.py --config-file configs/tensormask_R_50_FPN_1x.yaml --num-gpus 8 -``` - -## Evaluation - -Model evaluation can be done similarly (6x schedule with scale augmentation): -```bash -python /path/to/detectron2/projects/TensorMask/train_net.py --config-file configs/tensormask_R_50_FPN_6x.yaml --eval-only MODEL.WEIGHTS /path/to/model_checkpoint -``` - -# Pretrained Models - -| Backbone | lr sched | AP box | AP mask | download | -| -------- | -------- | -- | --- | -------- | -| R50 | 1x | 37.6 | 32.4 | model \|  metrics | -| R50 | 6x | 41.4 | 35.8 | model \|  metrics | - - -## Citing TensorMask - -If you use TensorMask, please use the following BibTeX entry. - -``` -@InProceedings{chen2019tensormask, - title={Tensormask: A Foundation for Dense Object Segmentation}, - author={Chen, Xinlei and Girshick, Ross and He, Kaiming and Doll{\'a}r, Piotr}, - journal={The International Conference on Computer Vision (ICCV)}, - year={2019} -} -``` - diff --git a/detectron2/projects/TensorMask/configs/Base-TensorMask.yaml b/detectron2/projects/TensorMask/configs/Base-TensorMask.yaml deleted file mode 100644 index a7245349b4aa9cfa00f20074cc7cb5cdb02607f9..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/configs/Base-TensorMask.yaml +++ /dev/null @@ -1,25 +0,0 @@ -MODEL: - META_ARCHITECTURE: "TensorMask" - MASK_ON: True - BACKBONE: - NAME: "build_retinanet_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res2", "res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: [[44, 60], [88, 120], [176, 240], [352, 480], [704, 960], [1408, 1920]] - ASPECT_RATIOS: [[1.0]] - FPN: - IN_FEATURES: ["res2", "res3", "res4", "res5"] - FUSE_TYPE: "avg" - TENSOR_MASK: - ALIGNED_ON: True - BIPYRAMID_ON: True -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -VERSION: 2 diff --git a/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_1x.yaml b/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_1x.yaml deleted file mode 100644 index 5d5eee135a93149a0c4b2148a47cee02e8aed8eb..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_1x.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_BASE_: "Base-TensorMask.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 diff --git a/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_6x.yaml b/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_6x.yaml deleted file mode 100644 index 366a965c4adfdbba2482593c0c81f3e6af50dfd2..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_6x.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "Base-TensorMask.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (480000, 520000) - MAX_ITER: 540000 -INPUT: - MIN_SIZE_TRAIN_SAMPLING: "range" - MIN_SIZE_TRAIN: (640, 800) diff --git a/detectron2/projects/TensorMask/setup.py b/detectron2/projects/TensorMask/setup.py deleted file mode 100644 index f6980e0dd2d2d239faed11e1474e1a8394c9b843..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/setup.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. - -import glob -import os -from setuptools import find_packages, setup -import torch -from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension - - -def get_extensions(): - this_dir = os.path.dirname(os.path.abspath(__file__)) - extensions_dir = os.path.join(this_dir, "tensormask", "layers", "csrc") - - main_source = os.path.join(extensions_dir, "vision.cpp") - sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp")) - source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob( - os.path.join(extensions_dir, "*.cu") - ) - - sources = [main_source] + sources - - extension = CppExtension - - extra_compile_args = {"cxx": []} - define_macros = [] - - if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": - extension = CUDAExtension - sources += source_cuda - define_macros += [("WITH_CUDA", None)] - extra_compile_args["nvcc"] = [ - "-DCUDA_HAS_FP16=1", - "-D__CUDA_NO_HALF_OPERATORS__", - "-D__CUDA_NO_HALF_CONVERSIONS__", - "-D__CUDA_NO_HALF2_OPERATORS__", - ] - - # It's better if pytorch can do this by default .. - CC = os.environ.get("CC", None) - if CC is not None: - extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) - - sources = [os.path.join(extensions_dir, s) for s in sources] - - include_dirs = [extensions_dir] - - ext_modules = [ - extension( - "tensormask._C", - sources, - include_dirs=include_dirs, - define_macros=define_macros, - extra_compile_args=extra_compile_args, - ) - ] - - return ext_modules - - -setup( - name="tensormask", - version="0.1", - author="FAIR", - packages=find_packages(exclude=("configs", "tests")), - python_requires=">=3.7", - ext_modules=get_extensions(), - cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, -) diff --git a/detectron2/projects/TensorMask/tensormask/__init__.py b/detectron2/projects/TensorMask/tensormask/__init__.py deleted file mode 100644 index eec7978ac3c5204b1e51dac03ba3d45efc5b379d..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/tensormask/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .config import add_tensormask_config -from .arch import TensorMask diff --git a/detectron2/projects/TensorMask/tensormask/arch.py b/detectron2/projects/TensorMask/tensormask/arch.py deleted file mode 100644 index d395beae6f81970cd96bc27331493a5f877024ec..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/tensormask/arch.py +++ /dev/null @@ -1,913 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import math -from typing import List -import torch -import torch.nn.functional as F -from fvcore.nn import sigmoid_focal_loss_star_jit, smooth_l1_loss -from torch import nn - -from detectron2.layers import ShapeSpec, batched_nms, cat, paste_masks_in_image -from detectron2.modeling.anchor_generator import DefaultAnchorGenerator -from detectron2.modeling.backbone import build_backbone -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY -from detectron2.modeling.meta_arch.retinanet import permute_to_N_HWA_K -from detectron2.structures import Boxes, ImageList, Instances - -from tensormask.layers import SwapAlign2Nat - -__all__ = ["TensorMask"] - - -def permute_all_cls_and_box_to_N_HWA_K_and_concat(pred_logits, pred_anchor_deltas, num_classes=80): - """ - Rearrange the tensor layout from the network output, i.e.: - list[Tensor]: #lvl tensors of shape (N, A x K, Hi, Wi) - to per-image predictions, i.e.: - Tensor: of shape (N x sum(Hi x Wi x A), K) - """ - # for each feature level, permute the outputs to make them be in the - # same format as the labels. - pred_logits_flattened = [permute_to_N_HWA_K(x, num_classes) for x in pred_logits] - pred_anchor_deltas_flattened = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas] - # concatenate on the first dimension (representing the feature levels), to - # take into account the way the labels were generated (with all feature maps - # being concatenated as well) - pred_logits = cat(pred_logits_flattened, dim=1).view(-1, num_classes) - pred_anchor_deltas = cat(pred_anchor_deltas_flattened, dim=1).view(-1, 4) - return pred_logits, pred_anchor_deltas - - -def _assignment_rule( - gt_boxes, - anchor_boxes, - unit_lengths, - min_anchor_size, - scale_thresh=2.0, - spatial_thresh=1.0, - uniqueness_on=True, -): - """ - Given two lists of boxes of N ground truth boxes and M anchor boxes, - compute the assignment between the two, following the assignment rules in - https://arxiv.org/abs/1903.12174. - The box order must be (xmin, ymin, xmax, ymax), so please make sure to convert - to BoxMode.XYXY_ABS before calling this function. - - Args: - gt_boxes, anchor_boxes (Boxes): two Boxes. Contains N & M boxes/anchors, respectively. - unit_lengths (Tensor): Contains the unit lengths of M anchor boxes. - min_anchor_size (float): Minimum size of the anchor, in pixels - scale_thresh (float): The `scale` threshold: the maximum size of the anchor - should not be greater than scale_thresh x max(h, w) of - the ground truth box. - spatial_thresh (float): The `spatial` threshold: the l2 distance between the - center of the anchor and the ground truth box should not - be greater than spatial_thresh x u where u is the unit length. - - Returns: - matches (Tensor[int64]): a vector of length M, where matches[i] is a matched - ground-truth index in [0, N) - match_labels (Tensor[int8]): a vector of length M, where pred_labels[i] indicates - whether a prediction is a true or false positive or ignored - """ - gt_boxes, anchor_boxes = gt_boxes.tensor, anchor_boxes.tensor - N = gt_boxes.shape[0] - M = anchor_boxes.shape[0] - if N == 0 or M == 0: - return ( - gt_boxes.new_full((N,), 0, dtype=torch.int64), - gt_boxes.new_full((N,), -1, dtype=torch.int8), - ) - - # Containment rule - lt = torch.min(gt_boxes[:, None, :2], anchor_boxes[:, :2]) # [N,M,2] - rb = torch.max(gt_boxes[:, None, 2:], anchor_boxes[:, 2:]) # [N,M,2] - union = cat([lt, rb], dim=2) # [N,M,4] - - dummy_gt_boxes = torch.zeros_like(gt_boxes) - anchor = dummy_gt_boxes[:, None, :] + anchor_boxes[:, :] # [N,M,4] - - contain_matrix = torch.all(union == anchor, dim=2) # [N,M] - - # Centrality rule, scale - gt_size_lower = torch.max(gt_boxes[:, 2:] - gt_boxes[:, :2], dim=1)[0] # [N] - gt_size_upper = gt_size_lower * scale_thresh # [N] - # Fall back for small objects - gt_size_upper[gt_size_upper < min_anchor_size] = min_anchor_size - # Due to sampling of locations, the anchor sizes are deducted with sampling strides - anchor_size = ( - torch.max(anchor_boxes[:, 2:] - anchor_boxes[:, :2], dim=1)[0] - unit_lengths - ) # [M] - - size_diff_upper = gt_size_upper[:, None] - anchor_size # [N,M] - scale_matrix = size_diff_upper >= 0 # [N,M] - - # Centrality rule, spatial - gt_center = (gt_boxes[:, 2:] + gt_boxes[:, :2]) / 2 # [N,2] - anchor_center = (anchor_boxes[:, 2:] + anchor_boxes[:, :2]) / 2 # [M,2] - offset_center = gt_center[:, None, :] - anchor_center[:, :] # [N,M,2] - offset_center /= unit_lengths[:, None] # [N,M,2] - spatial_square = spatial_thresh * spatial_thresh - spatial_matrix = torch.sum(offset_center * offset_center, dim=2) <= spatial_square - - assign_matrix = (contain_matrix & scale_matrix & spatial_matrix).int() - - # assign_matrix is N (gt) x M (predicted) - # Max over gt elements (dim 0) to find best gt candidate for each prediction - matched_vals, matches = assign_matrix.max(dim=0) - match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8) - - match_labels[matched_vals == 0] = 0 - match_labels[matched_vals == 1] = 1 - - # find all the elements that match to ground truths multiple times - not_unique_idxs = assign_matrix.sum(dim=0) > 1 - if uniqueness_on: - match_labels[not_unique_idxs] = 0 - else: - match_labels[not_unique_idxs] = -1 - - return matches, match_labels - - -# TODO make the paste_mask function in d2 core support mask list -def _paste_mask_lists_in_image(masks, boxes, image_shape, threshold=0.5): - """ - Paste a list of masks that are of various resolutions (e.g., 28 x 28) into an image. - The location, height, and width for pasting each mask is determined by their - corresponding bounding boxes in boxes. - - Args: - masks (list(Tensor)): A list of Tensor of shape (1, Hmask_i, Wmask_i). - Values are in [0, 1]. The list length, Bimg, is the - number of detected object instances in the image. - boxes (Boxes): A Boxes of length Bimg. boxes.tensor[i] and masks[i] correspond - to the same object instance. - image_shape (tuple): height, width - threshold (float): A threshold in [0, 1] for converting the (soft) masks to - binary masks. - - Returns: - img_masks (Tensor): A tensor of shape (Bimg, Himage, Wimage), where Bimg is the - number of detected object instances and Himage, Wimage are the image width - and height. img_masks[i] is a binary mask for object instance i. - """ - if len(masks) == 0: - return torch.empty((0, 1) + image_shape, dtype=torch.uint8) - - # Loop over masks groups. Each group has the same mask prediction size. - img_masks = [] - ind_masks = [] - mask_sizes = torch.tensor([m.shape[-1] for m in masks]) - unique_sizes = torch.unique(mask_sizes) - for msize in unique_sizes.tolist(): - cur_ind = torch.where(mask_sizes == msize)[0] - ind_masks.append(cur_ind) - - cur_masks = cat([masks[i] for i in cur_ind]) - cur_boxes = boxes[cur_ind] - img_masks.append(paste_masks_in_image(cur_masks, cur_boxes, image_shape, threshold)) - - img_masks = cat(img_masks) - ind_masks = cat(ind_masks) - - img_masks_out = torch.empty_like(img_masks) - img_masks_out[ind_masks, :, :] = img_masks - - return img_masks_out - - -def _postprocess(results, result_mask_info, output_height, output_width, mask_threshold=0.5): - """ - Post-process the output boxes for TensorMask. - The input images are often resized when entering an object detector. - As a result, we often need the outputs of the detector in a different - resolution from its inputs. - - This function will postprocess the raw outputs of TensorMask - to produce outputs according to the desired output resolution. - - Args: - results (Instances): the raw outputs from the detector. - `results.image_size` contains the input image resolution the detector sees. - This object might be modified in-place. Note that it does not contain the field - `pred_masks`, which is provided by another input `result_masks`. - result_mask_info (list[Tensor], Boxes): a pair of two items for mask related results. - The first item is a list of #detection tensors, each is the predicted masks. - The second item is the anchors corresponding to the predicted masks. - output_height, output_width: the desired output resolution. - - Returns: - Instances: the postprocessed output from the model, based on the output resolution - """ - scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) - results = Instances((output_height, output_width), **results.get_fields()) - - output_boxes = results.pred_boxes - output_boxes.tensor[:, 0::2] *= scale_x - output_boxes.tensor[:, 1::2] *= scale_y - output_boxes.clip(results.image_size) - - inds_nonempty = output_boxes.nonempty() - results = results[inds_nonempty] - result_masks, result_anchors = result_mask_info - if result_masks: - result_anchors.tensor[:, 0::2] *= scale_x - result_anchors.tensor[:, 1::2] *= scale_y - result_masks = [x for (i, x) in zip(inds_nonempty.tolist(), result_masks) if i] - results.pred_masks = _paste_mask_lists_in_image( - result_masks, - result_anchors[inds_nonempty], - results.image_size, - threshold=mask_threshold, - ) - return results - - -class TensorMaskAnchorGenerator(DefaultAnchorGenerator): - """ - For a set of image sizes and feature maps, computes a set of anchors for TensorMask. - It also computes the unit lengths and indexes for each anchor box. - """ - - def grid_anchors_with_unit_lengths_and_indexes(self, grid_sizes): - anchors = [] - unit_lengths = [] - indexes = [] - for lvl, (size, stride, base_anchors) in enumerate( - zip(grid_sizes, self.strides, self.cell_anchors) - ): - grid_height, grid_width = size - device = base_anchors.device - shifts_x = torch.arange( - 0, grid_width * stride, step=stride, dtype=torch.float32, device=device - ) - shifts_y = torch.arange( - 0, grid_height * stride, step=stride, dtype=torch.float32, device=device - ) - shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) - shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=2) - # Stack anchors in shapes of (HWA, 4) - cur_anchor = (shifts[:, :, None, :] + base_anchors.view(1, 1, -1, 4)).view(-1, 4) - anchors.append(cur_anchor) - unit_lengths.append( - torch.full((cur_anchor.shape[0],), stride, dtype=torch.float32, device=device) - ) - # create mask indexes using mesh grid - shifts_l = torch.full((1,), lvl, dtype=torch.int64, device=device) - shifts_i = torch.zeros((1,), dtype=torch.int64, device=device) - shifts_h = torch.arange(0, grid_height, dtype=torch.int64, device=device) - shifts_w = torch.arange(0, grid_width, dtype=torch.int64, device=device) - shifts_a = torch.arange(0, base_anchors.shape[0], dtype=torch.int64, device=device) - grids = torch.meshgrid(shifts_l, shifts_i, shifts_h, shifts_w, shifts_a) - - indexes.append(torch.stack(grids, dim=5).view(-1, 5)) - - return anchors, unit_lengths, indexes - - def forward(self, features): - """ - Returns: - list[list[Boxes]]: a list of #image elements. Each is a list of #feature level Boxes. - The Boxes contains anchors of this image on the specific feature level. - list[list[Tensor]]: a list of #image elements. Each is a list of #feature level tensors. - The tensor contains strides, or unit lengths for the anchors. - list[list[Tensor]]: a list of #image elements. Each is a list of #feature level tensors. - The Tensor contains indexes for the anchors, with the last dimension meaning - (L, N, H, W, A), where L is level, I is image (not set yet), H is height, - W is width, and A is anchor. - """ - num_images = len(features[0]) - grid_sizes = [feature_map.shape[-2:] for feature_map in features] - anchors_list, lengths_list, indexes_list = self.grid_anchors_with_unit_lengths_and_indexes( - grid_sizes - ) - - # Convert anchors from Tensor to Boxes - anchors_per_im = [Boxes(x) for x in anchors_list] - - # TODO it can be simplified to not return duplicated information for - # each image, just like detectron2's own AnchorGenerator - anchors = [copy.deepcopy(anchors_per_im) for _ in range(num_images)] - unit_lengths = [copy.deepcopy(lengths_list) for _ in range(num_images)] - indexes = [copy.deepcopy(indexes_list) for _ in range(num_images)] - - return anchors, unit_lengths, indexes - - -@META_ARCH_REGISTRY.register() -class TensorMask(nn.Module): - """ - TensorMask model. Creates FPN backbone, anchors and a head for classification - and box regression. Calculates and applies proper losses to class, box, and - masks. - """ - - def __init__(self, cfg): - super().__init__() - - # fmt: off - self.num_classes = cfg.MODEL.TENSOR_MASK.NUM_CLASSES - self.in_features = cfg.MODEL.TENSOR_MASK.IN_FEATURES - self.anchor_sizes = cfg.MODEL.ANCHOR_GENERATOR.SIZES - self.num_levels = len(cfg.MODEL.ANCHOR_GENERATOR.SIZES) - # Loss parameters: - self.focal_loss_alpha = cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_ALPHA - self.focal_loss_gamma = cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_GAMMA - # Inference parameters: - self.score_threshold = cfg.MODEL.TENSOR_MASK.SCORE_THRESH_TEST - self.topk_candidates = cfg.MODEL.TENSOR_MASK.TOPK_CANDIDATES_TEST - self.nms_threshold = cfg.MODEL.TENSOR_MASK.NMS_THRESH_TEST - self.detections_im = cfg.TEST.DETECTIONS_PER_IMAGE - # Mask parameters: - self.mask_on = cfg.MODEL.MASK_ON - self.mask_loss_weight = cfg.MODEL.TENSOR_MASK.MASK_LOSS_WEIGHT - self.mask_pos_weight = torch.tensor(cfg.MODEL.TENSOR_MASK.POSITIVE_WEIGHT, - dtype=torch.float32) - self.bipyramid_on = cfg.MODEL.TENSOR_MASK.BIPYRAMID_ON - # fmt: on - - # build the backbone - self.backbone = build_backbone(cfg) - - backbone_shape = self.backbone.output_shape() - feature_shapes = [backbone_shape[f] for f in self.in_features] - feature_strides = [x.stride for x in feature_shapes] - # build anchors - self.anchor_generator = TensorMaskAnchorGenerator(cfg, feature_shapes) - self.num_anchors = self.anchor_generator.num_cell_anchors[0] - anchors_min_level = cfg.MODEL.ANCHOR_GENERATOR.SIZES[0] - self.mask_sizes = [size // feature_strides[0] for size in anchors_min_level] - self.min_anchor_size = min(anchors_min_level) - feature_strides[0] - - # head of the TensorMask - self.head = TensorMaskHead( - cfg, self.num_levels, self.num_anchors, self.mask_sizes, feature_shapes - ) - # box transform - self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.TENSOR_MASK.BBOX_REG_WEIGHTS) - self.register_buffer("pixel_mean", torch.tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1), False) - self.register_buffer("pixel_std", torch.tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1), False) - - @property - def device(self): - return self.pixel_mean.device - - def forward(self, batched_inputs): - """ - Args: - batched_inputs: a list, batched outputs of :class:`DetectionTransform` . - Each item in the list contains the inputs for one image. - For now, each item in the list is a dict that contains: - image: Tensor, image in (C, H, W) format. - instances: Instances - Other information that's included in the original dicts, such as: - "height", "width" (int): the output resolution of the model, used in inference. - See :meth:`postprocess` for details. - Returns: - losses (dict[str: Tensor]): mapping from a named loss to a tensor - storing the loss. Used during training only. - """ - images = self.preprocess_image(batched_inputs) - if "instances" in batched_inputs[0]: - gt_instances = [x["instances"].to(self.device) for x in batched_inputs] - else: - gt_instances = None - - features = self.backbone(images.tensor) - features = [features[f] for f in self.in_features] - # apply the TensorMask head - pred_logits, pred_deltas, pred_masks = self.head(features) - # generate anchors based on features, is it image specific? - anchors, unit_lengths, indexes = self.anchor_generator(features) - - if self.training: - # get ground truths for class labels and box targets, it will label each anchor - gt_class_info, gt_delta_info, gt_mask_info, num_fg = self.get_ground_truth( - anchors, unit_lengths, indexes, gt_instances - ) - # compute the loss - return self.losses( - gt_class_info, - gt_delta_info, - gt_mask_info, - num_fg, - pred_logits, - pred_deltas, - pred_masks, - ) - else: - # do inference to get the output - results = self.inference(pred_logits, pred_deltas, pred_masks, anchors, indexes, images) - processed_results = [] - for results_im, input_im, image_size in zip( - results, batched_inputs, images.image_sizes - ): - height = input_im.get("height", image_size[0]) - width = input_im.get("width", image_size[1]) - # this is to do post-processing with the image size - result_box, result_mask = results_im - r = _postprocess(result_box, result_mask, height, width) - processed_results.append({"instances": r}) - return processed_results - - def losses( - self, - gt_class_info, - gt_delta_info, - gt_mask_info, - num_fg, - pred_logits, - pred_deltas, - pred_masks, - ): - """ - Args: - For `gt_class_info`, `gt_delta_info`, `gt_mask_info` and `num_fg` parameters, see - :meth:`TensorMask.get_ground_truth`. - For `pred_logits`, `pred_deltas` and `pred_masks`, see - :meth:`TensorMaskHead.forward`. - - Returns: - losses (dict[str: Tensor]): mapping from a named loss to a scalar tensor - storing the loss. Used during training only. The potential dict keys are: - "loss_cls", "loss_box_reg" and "loss_mask". - """ - gt_classes_target, gt_valid_inds = gt_class_info - gt_deltas, gt_fg_inds = gt_delta_info - gt_masks, gt_mask_inds = gt_mask_info - loss_normalizer = torch.tensor(max(1, num_fg), dtype=torch.float32, device=self.device) - - # classification and regression - pred_logits, pred_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat( - pred_logits, pred_deltas, self.num_classes - ) - loss_cls = ( - sigmoid_focal_loss_star_jit( - pred_logits[gt_valid_inds], - gt_classes_target[gt_valid_inds], - alpha=self.focal_loss_alpha, - gamma=self.focal_loss_gamma, - reduction="sum", - ) - / loss_normalizer - ) - - if num_fg == 0: - loss_box_reg = pred_deltas.sum() * 0 - else: - loss_box_reg = ( - smooth_l1_loss(pred_deltas[gt_fg_inds], gt_deltas, beta=0.0, reduction="sum") - / loss_normalizer - ) - losses = {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg} - - # mask prediction - if self.mask_on: - loss_mask = 0 - for lvl in range(self.num_levels): - cur_level_factor = 2**lvl if self.bipyramid_on else 1 - for anc in range(self.num_anchors): - cur_gt_mask_inds = gt_mask_inds[lvl][anc] - if cur_gt_mask_inds is None: - loss_mask += pred_masks[lvl][anc][0, 0, 0, 0] * 0 - else: - cur_mask_size = self.mask_sizes[anc] * cur_level_factor - # TODO maybe there are numerical issues when mask sizes are large - cur_size_divider = torch.tensor( - self.mask_loss_weight / (cur_mask_size**2), - dtype=torch.float32, - device=self.device, - ) - - cur_pred_masks = pred_masks[lvl][anc][ - cur_gt_mask_inds[:, 0], # N - :, # V x U - cur_gt_mask_inds[:, 1], # H - cur_gt_mask_inds[:, 2], # W - ] - - loss_mask += F.binary_cross_entropy_with_logits( - cur_pred_masks.view(-1, cur_mask_size, cur_mask_size), # V, U - gt_masks[lvl][anc].to(dtype=torch.float32), - reduction="sum", - weight=cur_size_divider, - pos_weight=self.mask_pos_weight, - ) - losses["loss_mask"] = loss_mask / loss_normalizer - return losses - - @torch.no_grad() - def get_ground_truth(self, anchors, unit_lengths, indexes, targets): - """ - Args: - anchors (list[list[Boxes]]): a list of N=#image elements. Each is a - list of #feature level Boxes. The Boxes contains anchors of - this image on the specific feature level. - unit_lengths (list[list[Tensor]]): a list of N=#image elements. Each is a - list of #feature level Tensor. The tensor contains unit lengths for anchors of - this image on the specific feature level. - indexes (list[list[Tensor]]): a list of N=#image elements. Each is a - list of #feature level Tensor. The tensor contains the 5D index of - each anchor, the second dimension means (L, N, H, W, A), where L - is level, I is image, H is height, W is width, and A is anchor. - targets (list[Instances]): a list of N `Instances`s. The i-th - `Instances` contains the ground-truth per-instance annotations - for the i-th input image. Specify `targets` during training only. - - Returns: - gt_class_info (Tensor, Tensor): A pair of two tensors for classification. - The first one is an integer tensor of shape (R, #classes) storing ground-truth - labels for each anchor. R is the total number of anchors in the batch. - The second one is an integer tensor of shape (R,), to indicate which - anchors are valid for loss computation, which anchors are not. - gt_delta_info (Tensor, Tensor): A pair of two tensors for boxes. - The first one, of shape (F, 4). F=#foreground anchors. - The last dimension represents ground-truth box2box transform - targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box. - Only foreground anchors have values in this tensor. Could be `None` if F=0. - The second one, of shape (R,), is an integer tensor indicating which anchors - are foreground ones used for box regression. Could be `None` if F=0. - gt_mask_info (list[list[Tensor]], list[list[Tensor]]): A pair of two lists for masks. - The first one is a list of P=#feature level elements. Each is a - list of A=#anchor tensors. Each tensor contains the ground truth - masks of the same size and for the same feature level. Could be `None`. - The second one is a list of P=#feature level elements. Each is a - list of A=#anchor tensors. Each tensor contains the location of the ground truth - masks of the same size and for the same feature level. The second dimension means - (N, H, W), where N is image, H is height, and W is width. Could be `None`. - num_fg (int): F=#foreground anchors, used later for loss normalization. - """ - gt_classes = [] - gt_deltas = [] - gt_masks = [[[] for _ in range(self.num_anchors)] for _ in range(self.num_levels)] - gt_mask_inds = [[[] for _ in range(self.num_anchors)] for _ in range(self.num_levels)] - - anchors = [Boxes.cat(anchors_i) for anchors_i in anchors] - unit_lengths = [cat(unit_lengths_i) for unit_lengths_i in unit_lengths] - indexes = [cat(indexes_i) for indexes_i in indexes] - - num_fg = 0 - for i, (anchors_im, unit_lengths_im, indexes_im, targets_im) in enumerate( - zip(anchors, unit_lengths, indexes, targets) - ): - # Initialize all - gt_classes_i = torch.full_like( - unit_lengths_im, self.num_classes, dtype=torch.int64, device=self.device - ) - # Ground truth classes - has_gt = len(targets_im) > 0 - if has_gt: - # Compute the pairwise matrix - gt_matched_inds, anchor_labels = _assignment_rule( - targets_im.gt_boxes, anchors_im, unit_lengths_im, self.min_anchor_size - ) - # Find the foreground instances - fg_inds = anchor_labels == 1 - fg_anchors = anchors_im[fg_inds] - num_fg += len(fg_anchors) - # Find the ground truths for foreground instances - gt_fg_matched_inds = gt_matched_inds[fg_inds] - # Assign labels for foreground instances - gt_classes_i[fg_inds] = targets_im.gt_classes[gt_fg_matched_inds] - # Anchors with label -1 are ignored, others are left as negative - gt_classes_i[anchor_labels == -1] = -1 - - # Boxes - # Ground truth box regression, only for foregrounds - matched_gt_boxes = targets_im[gt_fg_matched_inds].gt_boxes - # Compute box regression offsets for foregrounds only - gt_deltas_i = self.box2box_transform.get_deltas( - fg_anchors.tensor, matched_gt_boxes.tensor - ) - gt_deltas.append(gt_deltas_i) - - # Masks - if self.mask_on: - # Compute masks for each level and each anchor - matched_indexes = indexes_im[fg_inds, :] - for lvl in range(self.num_levels): - ids_lvl = matched_indexes[:, 0] == lvl - if torch.any(ids_lvl): - cur_level_factor = 2**lvl if self.bipyramid_on else 1 - for anc in range(self.num_anchors): - ids_lvl_anchor = ids_lvl & (matched_indexes[:, 4] == anc) - if torch.any(ids_lvl_anchor): - gt_masks[lvl][anc].append( - targets_im[ - gt_fg_matched_inds[ids_lvl_anchor] - ].gt_masks.crop_and_resize( - fg_anchors[ids_lvl_anchor].tensor, - self.mask_sizes[anc] * cur_level_factor, - ) - ) - # Select (N, H, W) dimensions - gt_mask_inds_lvl_anc = matched_indexes[ids_lvl_anchor, 1:4] - # Set the image index to the current image - gt_mask_inds_lvl_anc[:, 0] = i - gt_mask_inds[lvl][anc].append(gt_mask_inds_lvl_anc) - gt_classes.append(gt_classes_i) - - # Classes and boxes - gt_classes = cat(gt_classes) - gt_valid_inds = gt_classes >= 0 - gt_fg_inds = gt_valid_inds & (gt_classes < self.num_classes) - gt_classes_target = torch.zeros( - (gt_classes.shape[0], self.num_classes), dtype=torch.float32, device=self.device - ) - gt_classes_target[gt_fg_inds, gt_classes[gt_fg_inds]] = 1 - gt_deltas = cat(gt_deltas) if gt_deltas else None - - # Masks - gt_masks = [[cat(mla) if mla else None for mla in ml] for ml in gt_masks] - gt_mask_inds = [[cat(ila) if ila else None for ila in il] for il in gt_mask_inds] - return ( - (gt_classes_target, gt_valid_inds), - (gt_deltas, gt_fg_inds), - (gt_masks, gt_mask_inds), - num_fg, - ) - - def inference(self, pred_logits, pred_deltas, pred_masks, anchors, indexes, images): - """ - Arguments: - pred_logits, pred_deltas, pred_masks: Same as the output of: - meth:`TensorMaskHead.forward` - anchors, indexes: Same as the input of meth:`TensorMask.get_ground_truth` - images (ImageList): the input images - - Returns: - results (List[Instances]): a list of #images elements. - """ - assert len(anchors) == len(images) - results = [] - - pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits] - pred_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_deltas] - - pred_logits = cat(pred_logits, dim=1) - pred_deltas = cat(pred_deltas, dim=1) - - for img_idx, (anchors_im, indexes_im) in enumerate(zip(anchors, indexes)): - # Get the size of the current image - image_size = images.image_sizes[img_idx] - - logits_im = pred_logits[img_idx] - deltas_im = pred_deltas[img_idx] - - if self.mask_on: - masks_im = [[mla[img_idx] for mla in ml] for ml in pred_masks] - else: - masks_im = [None] * self.num_levels - results_im = self.inference_single_image( - logits_im, - deltas_im, - masks_im, - Boxes.cat(anchors_im), - cat(indexes_im), - tuple(image_size), - ) - results.append(results_im) - return results - - def inference_single_image( - self, pred_logits, pred_deltas, pred_masks, anchors, indexes, image_size - ): - """ - Single-image inference. Return bounding-box detection results by thresholding - on scores and applying non-maximum suppression (NMS). - - Arguments: - pred_logits (list[Tensor]): list of #feature levels. Each entry contains - tensor of size (AxHxW, K) - pred_deltas (list[Tensor]): Same shape as 'pred_logits' except that K becomes 4. - pred_masks (list[list[Tensor]]): List of #feature levels, each is a list of #anchors. - Each entry contains tensor of size (M_i*M_i, H, W). `None` if mask_on=False. - anchors (list[Boxes]): list of #feature levels. Each entry contains - a Boxes object, which contains all the anchors for that - image in that feature level. - image_size (tuple(H, W)): a tuple of the image height and width. - - Returns: - Same as `inference`, but for only one image. - """ - pred_logits = pred_logits.flatten().sigmoid_() - # We get top locations across all levels to accelerate the inference speed, - # which does not seem to affect the accuracy. - # First select values above the threshold - logits_top_idxs = torch.where(pred_logits > self.score_threshold)[0] - # Then get the top values - num_topk = min(self.topk_candidates, logits_top_idxs.shape[0]) - pred_prob, topk_idxs = pred_logits[logits_top_idxs].sort(descending=True) - # Keep top k scoring values - pred_prob = pred_prob[:num_topk] - # Keep top k values - top_idxs = logits_top_idxs[topk_idxs[:num_topk]] - - # class index - cls_idxs = top_idxs % self.num_classes - # HWA index - top_idxs //= self.num_classes - # predict boxes - pred_boxes = self.box2box_transform.apply_deltas( - pred_deltas[top_idxs], anchors[top_idxs].tensor - ) - # apply nms - keep = batched_nms(pred_boxes, pred_prob, cls_idxs, self.nms_threshold) - # pick the top ones - keep = keep[: self.detections_im] - - results = Instances(image_size) - results.pred_boxes = Boxes(pred_boxes[keep]) - results.scores = pred_prob[keep] - results.pred_classes = cls_idxs[keep] - - # deal with masks - result_masks, result_anchors = [], None - if self.mask_on: - # index and anchors, useful for masks - top_indexes = indexes[top_idxs] - top_anchors = anchors[top_idxs] - result_indexes = top_indexes[keep] - result_anchors = top_anchors[keep] - # Get masks and do sigmoid - for lvl, _, h, w, anc in result_indexes.tolist(): - cur_size = self.mask_sizes[anc] * (2**lvl if self.bipyramid_on else 1) - result_masks.append( - torch.sigmoid(pred_masks[lvl][anc][:, h, w].view(1, cur_size, cur_size)) - ) - - return results, (result_masks, result_anchors) - - def preprocess_image(self, batched_inputs): - """ - Normalize, pad and batch the input images. - """ - images = [x["image"].to(self.device) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors(images, self.backbone.size_divisibility) - return images - - -class TensorMaskHead(nn.Module): - def __init__(self, cfg, num_levels, num_anchors, mask_sizes, input_shape: List[ShapeSpec]): - """ - TensorMask head. - """ - super().__init__() - # fmt: off - self.in_features = cfg.MODEL.TENSOR_MASK.IN_FEATURES - in_channels = input_shape[0].channels - num_classes = cfg.MODEL.TENSOR_MASK.NUM_CLASSES - cls_channels = cfg.MODEL.TENSOR_MASK.CLS_CHANNELS - num_convs = cfg.MODEL.TENSOR_MASK.NUM_CONVS - # box parameters - bbox_channels = cfg.MODEL.TENSOR_MASK.BBOX_CHANNELS - # mask parameters - self.mask_on = cfg.MODEL.MASK_ON - self.mask_sizes = mask_sizes - mask_channels = cfg.MODEL.TENSOR_MASK.MASK_CHANNELS - self.align_on = cfg.MODEL.TENSOR_MASK.ALIGNED_ON - self.bipyramid_on = cfg.MODEL.TENSOR_MASK.BIPYRAMID_ON - # fmt: on - - # class subnet - cls_subnet = [] - cur_channels = in_channels - for _ in range(num_convs): - cls_subnet.append( - nn.Conv2d(cur_channels, cls_channels, kernel_size=3, stride=1, padding=1) - ) - cur_channels = cls_channels - cls_subnet.append(nn.ReLU()) - - self.cls_subnet = nn.Sequential(*cls_subnet) - self.cls_score = nn.Conv2d( - cur_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1 - ) - modules_list = [self.cls_subnet, self.cls_score] - - # box subnet - bbox_subnet = [] - cur_channels = in_channels - for _ in range(num_convs): - bbox_subnet.append( - nn.Conv2d(cur_channels, bbox_channels, kernel_size=3, stride=1, padding=1) - ) - cur_channels = bbox_channels - bbox_subnet.append(nn.ReLU()) - - self.bbox_subnet = nn.Sequential(*bbox_subnet) - self.bbox_pred = nn.Conv2d( - cur_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1 - ) - modules_list.extend([self.bbox_subnet, self.bbox_pred]) - - # mask subnet - if self.mask_on: - mask_subnet = [] - cur_channels = in_channels - for _ in range(num_convs): - mask_subnet.append( - nn.Conv2d(cur_channels, mask_channels, kernel_size=3, stride=1, padding=1) - ) - cur_channels = mask_channels - mask_subnet.append(nn.ReLU()) - - self.mask_subnet = nn.Sequential(*mask_subnet) - modules_list.append(self.mask_subnet) - for mask_size in self.mask_sizes: - cur_mask_module = "mask_pred_%02d" % mask_size - self.add_module( - cur_mask_module, - nn.Conv2d( - cur_channels, mask_size * mask_size, kernel_size=1, stride=1, padding=0 - ), - ) - modules_list.append(getattr(self, cur_mask_module)) - if self.align_on: - if self.bipyramid_on: - for lvl in range(num_levels): - cur_mask_module = "align2nat_%02d" % lvl - lambda_val = 2**lvl - setattr(self, cur_mask_module, SwapAlign2Nat(lambda_val)) - # Also the fusing layer, stay at the same channel size - mask_fuse = [ - nn.Conv2d(cur_channels, cur_channels, kernel_size=3, stride=1, padding=1), - nn.ReLU(), - ] - self.mask_fuse = nn.Sequential(*mask_fuse) - modules_list.append(self.mask_fuse) - else: - self.align2nat = SwapAlign2Nat(1) - - # Initialization - for modules in modules_list: - for layer in modules.modules(): - if isinstance(layer, nn.Conv2d): - torch.nn.init.normal_(layer.weight, mean=0, std=0.01) - torch.nn.init.constant_(layer.bias, 0) - - # Use prior in model initialization to improve stability - bias_value = -(math.log((1 - 0.01) / 0.01)) - torch.nn.init.constant_(self.cls_score.bias, bias_value) - - def forward(self, features): - """ - Arguments: - features (list[Tensor]): FPN feature map tensors in high to low resolution. - Each tensor in the list correspond to different feature levels. - - Returns: - pred_logits (list[Tensor]): #lvl tensors, each has shape (N, AxK, Hi, Wi). - The tensor predicts the classification probability - at each spatial position for each of the A anchors and K object - classes. - pred_deltas (list[Tensor]): #lvl tensors, each has shape (N, Ax4, Hi, Wi). - The tensor predicts 4-vector (dx,dy,dw,dh) box - regression values for every anchor. These values are the - relative offset between the anchor and the ground truth box. - pred_masks (list(list[Tensor])): #lvl list of tensors, each is a list of - A tensors of shape (N, M_{i,a}, Hi, Wi). - The tensor predicts a dense set of M_ixM_i masks at every location. - """ - pred_logits = [self.cls_score(self.cls_subnet(x)) for x in features] - pred_deltas = [self.bbox_pred(self.bbox_subnet(x)) for x in features] - - pred_masks = None - if self.mask_on: - mask_feats = [self.mask_subnet(x) for x in features] - - if self.bipyramid_on: - mask_feat_high_res = mask_feats[0] - H, W = mask_feat_high_res.shape[-2:] - mask_feats_up = [] - for lvl, mask_feat in enumerate(mask_feats): - lambda_val = 2.0**lvl - mask_feat_up = mask_feat - if lvl > 0: - mask_feat_up = F.interpolate( - mask_feat, scale_factor=lambda_val, mode="bilinear", align_corners=False - ) - mask_feats_up.append( - self.mask_fuse(mask_feat_up[:, :, :H, :W] + mask_feat_high_res) - ) - mask_feats = mask_feats_up - - pred_masks = [] - for lvl, mask_feat in enumerate(mask_feats): - cur_masks = [] - for mask_size in self.mask_sizes: - cur_mask_module = getattr(self, "mask_pred_%02d" % mask_size) - cur_mask = cur_mask_module(mask_feat) - if self.align_on: - if self.bipyramid_on: - cur_mask_module = getattr(self, "align2nat_%02d" % lvl) - cur_mask = cur_mask_module(cur_mask) - else: - cur_mask = self.align2nat(cur_mask) - cur_masks.append(cur_mask) - pred_masks.append(cur_masks) - return pred_logits, pred_deltas, pred_masks diff --git a/detectron2/projects/TensorMask/tensormask/config.py b/detectron2/projects/TensorMask/tensormask/config.py deleted file mode 100644 index cf62d7aea23a9bdf637c9dc80b810e2413c9c0ae..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/tensormask/config.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -from detectron2.config import CfgNode as CN - - -def add_tensormask_config(cfg): - """ - Add config for TensorMask. - """ - cfg.MODEL.TENSOR_MASK = CN() - - # Anchor parameters - cfg.MODEL.TENSOR_MASK.IN_FEATURES = ["p2", "p3", "p4", "p5", "p6", "p7"] - - # Convolutions to use in the towers - cfg.MODEL.TENSOR_MASK.NUM_CONVS = 4 - - # Number of foreground classes. - cfg.MODEL.TENSOR_MASK.NUM_CLASSES = 80 - # Channel size for the classification tower - cfg.MODEL.TENSOR_MASK.CLS_CHANNELS = 256 - - cfg.MODEL.TENSOR_MASK.SCORE_THRESH_TEST = 0.05 - # Only the top (1000 * #levels) candidate boxes across all levels are - # considered jointly during test (to improve speed) - cfg.MODEL.TENSOR_MASK.TOPK_CANDIDATES_TEST = 6000 - cfg.MODEL.TENSOR_MASK.NMS_THRESH_TEST = 0.5 - - # Box parameters - # Channel size for the box tower - cfg.MODEL.TENSOR_MASK.BBOX_CHANNELS = 128 - # Weights on (dx, dy, dw, dh) - cfg.MODEL.TENSOR_MASK.BBOX_REG_WEIGHTS = (1.5, 1.5, 0.75, 0.75) - - # Loss parameters - cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_GAMMA = 3.0 - cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_ALPHA = 0.3 - - # Mask parameters - # Channel size for the mask tower - cfg.MODEL.TENSOR_MASK.MASK_CHANNELS = 128 - # Mask loss weight - cfg.MODEL.TENSOR_MASK.MASK_LOSS_WEIGHT = 2.0 - # weight on positive pixels within the mask - cfg.MODEL.TENSOR_MASK.POSITIVE_WEIGHT = 1.5 - # Whether to predict in the aligned representation - cfg.MODEL.TENSOR_MASK.ALIGNED_ON = False - # Whether to use the bipyramid architecture - cfg.MODEL.TENSOR_MASK.BIPYRAMID_ON = False diff --git a/detectron2/projects/TensorMask/tensormask/layers/__init__.py b/detectron2/projects/TensorMask/tensormask/layers/__init__.py deleted file mode 100644 index 8b8e178445ebb67b84e9c9d547dba9108a30e3d9..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/tensormask/layers/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .swap_align2nat import SwapAlign2Nat, swap_align2nat - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h b/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h deleted file mode 100644 index 75c21785fd60cf05d705707e8a0e04e2b619a85b..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#pragma once -#include - -namespace tensormask { - -#if defined(WITH_CUDA) || defined(WITH_HIP) -at::Tensor SwapAlign2Nat_forward_cuda( - const at::Tensor& X, - const int lambda_val, - const float pad_val); - -at::Tensor SwapAlign2Nat_backward_cuda( - const at::Tensor& gY, - const int lambda_val, - const int batch_size, - const int channel, - const int height, - const int width); -#endif - -inline at::Tensor SwapAlign2Nat_forward( - const at::Tensor& X, - const int lambda_val, - const float pad_val) { - if (X.type().is_cuda()) { -#if defined(WITH_CUDA) || defined(WITH_HIP) - return SwapAlign2Nat_forward_cuda(X, lambda_val, pad_val); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - -inline at::Tensor SwapAlign2Nat_backward( - const at::Tensor& gY, - const int lambda_val, - const int batch_size, - const int channel, - const int height, - const int width) { - if (gY.type().is_cuda()) { -#if defined(WITH_CUDA) || defined(WITH_HIP) - return SwapAlign2Nat_backward_cuda( - gY, lambda_val, batch_size, channel, height, width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - -} // namespace tensormask diff --git a/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat_cuda.cu b/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat_cuda.cu deleted file mode 100644 index 1398d70491bbbd86127a69f348e210e71a937305..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat_cuda.cu +++ /dev/null @@ -1,526 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -#include -#include -#include -#include - -// TODO make it in a common file -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -template -__device__ inline T get_pixel_val( - const T* tensor, - const int idx, - const int H, - const int W, - const int y, - const int x, - const int V, - const int U, - const int v, - const int u, - const T pad_val) { - if ((y < 0) || (y >= H) || (x < 0) || (x >= W) || (v < 0) || (v >= V) || - (u < 0) || (u >= U)) { - return pad_val; - } else { - return tensor[(((idx * V + v) * U + u) * H + y) * W + x]; - } -} - -template -__device__ inline void add_pixel_val( - T* tensor, - const T val, - const int idx, - const int H, - const int W, - const int y, - const int x, - const int V, - const int U, - const int v, - const int u) { - if ((val == 0.) || (y < 0) || (y >= H) || (x < 0) || (x >= W) || (v < 0) || - (v >= V) || (u < 0) || (u >= U)) { - return; - } else { - atomicAdd(tensor + ((((idx * V + v) * U + u) * H + y) * W + x), val); - } -} - -template -__global__ void SwapAlign2NatForwardFeat( - const int nthreads, - const T* bottom_data, - const int Vout, - const int Uout, - const float hVout, - const float hUout, - const int Vin, - const int Uin, - const float lambda, - const int Hin, - const int Win, - const int Hout, - const int Wout, - const T pad_val, - T* top_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int idx = index; - const int x = idx % Wout; - idx /= Wout; - const int y = idx % Hout; - idx /= Hout; - const int u = idx % Uout; - idx /= Uout; - const int v = idx % Vout; - idx /= Vout; - - const float ox = x * lambda + u - hUout + 0.5; - const int xf = static_cast(floor(ox)); - const int xc = static_cast(ceil(ox)); - const float xwc = ox - xf; - const float xwf = 1. - xwc; - - const float oy = y * lambda + v - hVout + 0.5; - const int yf = static_cast(floor(oy)); - const int yc = static_cast(ceil(oy)); - const float ywc = oy - yf; - const float ywf = 1. - ywc; - - const float ou = (u + 0.5) / lambda - 0.5; - const int uf = static_cast(floor(ou)); - const int uc = static_cast(ceil(ou)); - const float uwc = ou - uf; - const float uwf = 1. - uwc; - - const float ov = (v + 0.5) / lambda - 0.5; - const int vf = static_cast(floor(ov)); - const int vc = static_cast(ceil(ov)); - const float vwc = ov - vf; - const float vwf = 1. - vwc; - - T val = ywf * xwf * vwf * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vf, uf, pad_val) + - ywf * xwf * vwf * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vf, uc, pad_val) + - ywf * xwf * vwc * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vc, uf, pad_val) + - ywf * xwf * vwc * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vc, uc, pad_val) + - ywf * xwc * vwf * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vf, uf, pad_val) + - ywf * xwc * vwf * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vf, uc, pad_val) + - ywf * xwc * vwc * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vc, uf, pad_val) + - ywf * xwc * vwc * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vc, uc, pad_val) + - ywc * xwf * vwf * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vf, uf, pad_val) + - ywc * xwf * vwf * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vf, uc, pad_val) + - ywc * xwf * vwc * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vc, uf, pad_val) + - ywc * xwf * vwc * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vc, uc, pad_val) + - ywc * xwc * vwf * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vf, uf, pad_val) + - ywc * xwc * vwf * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vf, uc, pad_val) + - ywc * xwc * vwc * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vc, uf, pad_val) + - ywc * xwc * vwc * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vc, uc, pad_val); - - top_data[index] = val; - } -} - -template -__global__ void SwapAlign2NatBackwardFeat( - const int nthreads, - const T* top_diff, - const int Vout, - const int Uout, - const float hVout, - const float hUout, - const int Vin, - const int Uin, - const float lambda, - const int Hin, - const int Win, - const int Hout, - const int Wout, - T* bottom_diff) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int idx = index; - const int x = idx % Wout; - idx /= Wout; - const int y = idx % Hout; - idx /= Hout; - const int u = idx % Uout; - idx /= Uout; - const int v = idx % Vout; - idx /= Vout; - - const float ox = x * lambda + u - hUout + 0.5; - const int xf = static_cast(floor(ox)); - const int xc = static_cast(ceil(ox)); - const float xwc = ox - xf; - const float xwf = 1. - xwc; - - const float oy = y * lambda + v - hVout + 0.5; - const int yf = static_cast(floor(oy)); - const int yc = static_cast(ceil(oy)); - const float ywc = oy - yf; - const float ywf = 1. - ywc; - - const float ou = (u + 0.5) / lambda - 0.5; - const int uf = static_cast(floor(ou)); - const int uc = static_cast(ceil(ou)); - const float uwc = ou - uf; - const float uwf = 1. - uwc; - - const float ov = (v + 0.5) / lambda - 0.5; - const int vf = static_cast(floor(ov)); - const int vc = static_cast(ceil(ov)); - const float vwc = ov - vf; - const float vwf = 1. - vwc; - - const T grad = top_diff[index]; - - add_pixel_val( - bottom_diff, - ywf * xwf * vwf * uwf * grad, - idx, - Hin, - Win, - yf, - xf, - Vin, - Uin, - vf, - uf); - add_pixel_val( - bottom_diff, - ywf * xwf * vwf * uwc * grad, - idx, - Hin, - Win, - yf, - xf, - Vin, - Uin, - vf, - uc); - add_pixel_val( - bottom_diff, - ywf * xwf * vwc * uwf * grad, - idx, - Hin, - Win, - yf, - xf, - Vin, - Uin, - vc, - uf); - add_pixel_val( - bottom_diff, - ywf * xwf * vwc * uwc * grad, - idx, - Hin, - Win, - yf, - xf, - Vin, - Uin, - vc, - uc); - add_pixel_val( - bottom_diff, - ywf * xwc * vwf * uwf * grad, - idx, - Hin, - Win, - yf, - xc, - Vin, - Uin, - vf, - uf); - add_pixel_val( - bottom_diff, - ywf * xwc * vwf * uwc * grad, - idx, - Hin, - Win, - yf, - xc, - Vin, - Uin, - vf, - uc); - add_pixel_val( - bottom_diff, - ywf * xwc * vwc * uwf * grad, - idx, - Hin, - Win, - yf, - xc, - Vin, - Uin, - vc, - uf); - add_pixel_val( - bottom_diff, - ywf * xwc * vwc * uwc * grad, - idx, - Hin, - Win, - yf, - xc, - Vin, - Uin, - vc, - uc); - add_pixel_val( - bottom_diff, - ywc * xwf * vwf * uwf * grad, - idx, - Hin, - Win, - yc, - xf, - Vin, - Uin, - vf, - uf); - add_pixel_val( - bottom_diff, - ywc * xwf * vwf * uwc * grad, - idx, - Hin, - Win, - yc, - xf, - Vin, - Uin, - vf, - uc); - add_pixel_val( - bottom_diff, - ywc * xwf * vwc * uwf * grad, - idx, - Hin, - Win, - yc, - xf, - Vin, - Uin, - vc, - uf); - add_pixel_val( - bottom_diff, - ywc * xwf * vwc * uwc * grad, - idx, - Hin, - Win, - yc, - xf, - Vin, - Uin, - vc, - uc); - add_pixel_val( - bottom_diff, - ywc * xwc * vwf * uwf * grad, - idx, - Hin, - Win, - yc, - xc, - Vin, - Uin, - vf, - uf); - add_pixel_val( - bottom_diff, - ywc * xwc * vwf * uwc * grad, - idx, - Hin, - Win, - yc, - xc, - Vin, - Uin, - vf, - uc); - add_pixel_val( - bottom_diff, - ywc * xwc * vwc * uwf * grad, - idx, - Hin, - Win, - yc, - xc, - Vin, - Uin, - vc, - uf); - add_pixel_val( - bottom_diff, - ywc * xwc * vwc * uwc * grad, - idx, - Hin, - Win, - yc, - xc, - Vin, - Uin, - vc, - uc); - } -} - -namespace tensormask { - -at::Tensor SwapAlign2Nat_forward_cuda( - const at::Tensor& X, - const int lambda_val, - const float pad_val) { - AT_ASSERTM(X.device().is_cuda(), "input must be a CUDA tensor"); - AT_ASSERTM(X.ndimension() == 4, "input must be a 4D tensor"); - AT_ASSERTM(lambda_val >= 1, "lambda should be greater or equal to 1"); - const int N = X.size(0); - const int C = X.size(1); - const int Vin = static_cast(sqrt(static_cast(C))); - const int Uin = C / Vin; - AT_ASSERTM( - C == Vin * Uin && Vin == Uin, "#channels should be a square number"); - const int Vout = lambda_val * Vin; - const int Uout = lambda_val * Uin; - const int Hin = X.size(2); - const int Win = X.size(3); - const float lambda = static_cast(lambda_val); - const int Hout = static_cast(ceil(Hin / lambda)); - const int Wout = static_cast(ceil(Win / lambda)); - const float hVout = Vout / 2.; - const float hUout = Uout / 2.; - - at::cuda::CUDAGuard device_guard(X.device()); - - at::Tensor Y = at::empty({N, Vout * Uout, Hout, Wout}, X.options()); - - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(at::cuda::ATenCeilDiv(Y.numel(), 512L), 4096L)); - dim3 block(512); - - if (Y.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return Y; - } - - auto X_ = X.contiguous(); - AT_DISPATCH_FLOATING_TYPES(X.scalar_type(), "SwapAlign2Nat_forward", [&] { - SwapAlign2NatForwardFeat<<>>( - Y.numel(), - X_.data_ptr(), - Vout, - Uout, - hVout, - hUout, - Vin, - Uin, - lambda, - Hin, - Win, - Hout, - Wout, - pad_val, - Y.data_ptr()); - }); - cudaDeviceSynchronize(); - AT_CUDA_CHECK(cudaGetLastError()); - return Y; -} - -at::Tensor SwapAlign2Nat_backward_cuda( - const at::Tensor& gY, - const int lambda_val, - const int batch_size, - const int channel, - const int height, - const int width) { - AT_ASSERTM(gY.device().is_cuda(), "input gradient must be a CUDA tensor"); - AT_ASSERTM(gY.ndimension() == 4, "input gradient must be a 4D tensor"); - AT_ASSERTM(lambda_val >= 1, "lambda should be greater or equal to 1"); - const int Vin = static_cast(sqrt(static_cast(channel))); - const int Uin = channel / Vin; - const int Vout = lambda_val * Vin; - const int Uout = lambda_val * Uin; - const float hVout = Vout / 2.; - const float hUout = Uout / 2.; - const int Hout = gY.size(2); - const int Wout = gY.size(3); - - at::cuda::CUDAGuard device_guard(gY.device()); - - at::Tensor gX = at::zeros({batch_size, channel, height, width}, gY.options()); - - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(at::cuda::ATenCeilDiv(gY.numel(), 512L), 4096L)); - dim3 block(512); - - // handle possibly empty gradients - if (gY.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return gX; - } - - auto gY_ = gY.contiguous(); - AT_DISPATCH_FLOATING_TYPES(gY.scalar_type(), "SwapAlign2Nat_backward", [&] { - SwapAlign2NatBackwardFeat<<>>( - gY.numel(), - gY_.data_ptr(), - Vout, - Uout, - hVout, - hUout, - Vin, - Uin, - static_cast(lambda_val), - height, - width, - Hout, - Wout, - gX.data_ptr()); - }); - AT_CUDA_CHECK(cudaGetLastError()); - return gX; -} - -} // namespace tensormask diff --git a/detectron2/projects/TensorMask/tensormask/layers/csrc/vision.cpp b/detectron2/projects/TensorMask/tensormask/layers/csrc/vision.cpp deleted file mode 100644 index ed1ed0b3d5911021bf7b4a03126b5140b5286970..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/tensormask/layers/csrc/vision.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. - -#include -#include "SwapAlign2Nat/SwapAlign2Nat.h" - -namespace tensormask { - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def( - "swap_align2nat_forward", - &SwapAlign2Nat_forward, - "SwapAlign2Nat_forward"); - m.def( - "swap_align2nat_backward", - &SwapAlign2Nat_backward, - "SwapAlign2Nat_backward"); -} - -} // namespace tensormask diff --git a/detectron2/projects/TensorMask/tensormask/layers/swap_align2nat.py b/detectron2/projects/TensorMask/tensormask/layers/swap_align2nat.py deleted file mode 100644 index 2b5e45013c2112187c82a95fe056a0b0a3d43489..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/tensormask/layers/swap_align2nat.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from torch import nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable - -from tensormask import _C - - -class _SwapAlign2Nat(Function): - @staticmethod - def forward(ctx, X, lambda_val, pad_val): - ctx.lambda_val = lambda_val - ctx.input_shape = X.size() - - Y = _C.swap_align2nat_forward(X, lambda_val, pad_val) - return Y - - @staticmethod - @once_differentiable - def backward(ctx, gY): - lambda_val = ctx.lambda_val - bs, ch, h, w = ctx.input_shape - - gX = _C.swap_align2nat_backward(gY, lambda_val, bs, ch, h, w) - - return gX, None, None - - -swap_align2nat = _SwapAlign2Nat.apply - - -class SwapAlign2Nat(nn.Module): - """ - The op `SwapAlign2Nat` described in https://arxiv.org/abs/1903.12174. - Given an input tensor that predicts masks of shape (N, C=VxU, H, W), - apply the op, it will return masks of shape (N, V'xU', H', W') where - the unit lengths of (V, U) and (H, W) are swapped, and the mask representation - is transformed from aligned to natural. - Args: - lambda_val (int): the relative unit length ratio between (V, U) and (H, W), - as we always have larger unit lengths for (V, U) than (H, W), - lambda_val is always >= 1. - pad_val (float): padding value for the values falling outside of the input - tensor, default set to -6 as sigmoid(-6) is ~0, indicating - that is no masks outside of the tensor. - """ - - def __init__(self, lambda_val, pad_val=-6.0): - super(SwapAlign2Nat, self).__init__() - self.lambda_val = lambda_val - self.pad_val = pad_val - - def forward(self, X): - return swap_align2nat(X, self.lambda_val, self.pad_val) - - def __repr__(self): - tmpstr = self.__class__.__name__ + "(" - tmpstr += "lambda_val=" + str(self.lambda_val) - tmpstr += ", pad_val=" + str(self.pad_val) - tmpstr += ")" - return tmpstr diff --git a/detectron2/projects/TensorMask/tests/__init__.py b/detectron2/projects/TensorMask/tests/__init__.py deleted file mode 100644 index 9020c2df23e2af280b7bb168b996ae9eaf312eb8..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. diff --git a/detectron2/projects/TensorMask/tests/test_swap_align2nat.py b/detectron2/projects/TensorMask/tests/test_swap_align2nat.py deleted file mode 100644 index d9ee273de06cf881b89696ee4ee13a0953d6aa25..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/tests/test_swap_align2nat.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. - -import unittest -import torch -from torch.autograd import gradcheck - -from tensormask.layers.swap_align2nat import SwapAlign2Nat - - -class SwapAlign2NatTest(unittest.TestCase): - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_swap_align2nat_gradcheck_cuda(self): - dtype = torch.float64 - device = torch.device("cuda") - m = SwapAlign2Nat(2).to(dtype=dtype, device=device) - x = torch.rand(2, 4, 10, 10, dtype=dtype, device=device, requires_grad=True) - - self.assertTrue(gradcheck(m, x), "gradcheck failed for SwapAlign2Nat CUDA") - - def _swap_align2nat(self, tensor, lambda_val): - """ - The basic setup for testing Swap_Align - """ - op = SwapAlign2Nat(lambda_val, pad_val=0.0) - input = torch.from_numpy(tensor[None, :, :, :].astype("float32")) - output = op.forward(input.cuda()).cpu().numpy() - return output[0] - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/projects/TensorMask/train_net.py b/detectron2/projects/TensorMask/train_net.py deleted file mode 100644 index aeb2cd2fc2093edc48bbe11c68e800b07c76d950..0000000000000000000000000000000000000000 --- a/detectron2/projects/TensorMask/train_net.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -TensorMask Training Script. - -This script is a simplified version of the training script in detectron2/tools. -""" - -import os - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch -from detectron2.evaluation import COCOEvaluator, verify_results - -from tensormask import add_tensormask_config - - -class Trainer(DefaultTrainer): - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - return COCOEvaluator(dataset_name, output_dir=output_folder) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - add_tensormask_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - if comm.is_main_process(): - verify_results(cfg, res) - return res - - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - return trainer.train() - - -def invoke_main() -> None: - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) - - -if __name__ == "__main__": - invoke_main() # pragma: no cover diff --git a/detectron2/projects/TridentNet/README.md b/detectron2/projects/TridentNet/README.md deleted file mode 100644 index 4b7a90102d008a498e93dff595a09206be5269e7..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/README.md +++ /dev/null @@ -1,60 +0,0 @@ - -# TridentNet in Detectron2 -**Scale-Aware Trident Networks for Object Detection** - -Yanghao Li\*, Yuntao Chen\*, Naiyan Wang, Zhaoxiang Zhang - -[[`TridentNet`](https://github.com/TuSimple/simpledet/tree/master/models/tridentnet)] [[`arXiv`](https://arxiv.org/abs/1901.01892)] [[`BibTeX`](#CitingTridentNet)] - -
- -
- -In this repository, we implement TridentNet-Fast in Detectron2. -Trident Network (TridentNet) aims to generate scale-specific feature maps with a uniform representational power. We construct a parallel multi-branch architecture in which each branch shares the same transformation parameters but with different receptive fields. TridentNet-Fast is a fast approximation version of TridentNet that could achieve significant improvements without any additional parameters and computational cost. - -## Training - -To train a model, run -```bash -python /path/to/detectron2/projects/TridentNet/train_net.py --config-file -``` - -For example, to launch end-to-end TridentNet training with ResNet-50 backbone on 8 GPUs, -one should execute: -```bash -python /path/to/detectron2/projects/TridentNet/train_net.py --config-file configs/tridentnet_fast_R_50_C4_1x.yaml --num-gpus 8 -``` - -## Evaluation - -Model evaluation can be done similarly: -```bash -python /path/to/detectron2/projects/TridentNet/train_net.py --config-file configs/tridentnet_fast_R_50_C4_1x.yaml --eval-only MODEL.WEIGHTS model.pth -``` - -## Results on MS-COCO in Detectron2 - -|Model|Backbone|Head|lr sched|AP|AP50|AP75|APs|APm|APl|download| -|-----|--------|----|--------|--|----|----|---|---|---|--------| -|Faster|R50-C4|C5-512ROI|1X|35.7|56.1|38.0|19.2|40.9|48.7|model \| metrics| -|TridentFast|R50-C4|C5-128ROI|1X|38.0|58.1|40.8|19.5|42.2|54.6|model \| metrics| -|Faster|R50-C4|C5-512ROI|3X|38.4|58.7|41.3|20.7|42.7|53.1|model \| metrics| -|TridentFast|R50-C4|C5-128ROI|3X|40.6|60.8|43.6|23.4|44.7|57.1|model \| metrics| -|Faster|R101-C4|C5-512ROI|3X|41.1|61.4|44.0|22.2|45.5|55.9|model \| metrics| -|TridentFast|R101-C4|C5-128ROI|3X|43.6|63.4|47.0|24.3|47.8|60.0|model \| metrics| - - -## Citing TridentNet - -If you use TridentNet, please use the following BibTeX entry. - -``` -@InProceedings{li2019scale, - title={Scale-Aware Trident Networks for Object Detection}, - author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang}, - journal={The International Conference on Computer Vision (ICCV)}, - year={2019} -} -``` - diff --git a/detectron2/projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml b/detectron2/projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml deleted file mode 100644 index 8c3d80797ba9ae63a5669ccbd74a0d2006fee3b7..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml +++ /dev/null @@ -1,29 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - BACKBONE: - NAME: "build_trident_resnet_backbone" - ROI_HEADS: - NAME: "TridentRes5ROIHeads" - POSITIVE_FRACTION: 0.5 - BATCH_SIZE_PER_IMAGE: 128 - PROPOSAL_APPEND_GT: False - PROPOSAL_GENERATOR: - NAME: "TridentRPN" - RPN: - POST_NMS_TOPK_TRAIN: 500 - TRIDENT: - NUM_BRANCH: 3 - BRANCH_DILATIONS: [1, 2, 3] - TEST_BRANCH_IDX: 1 - TRIDENT_STAGE: "res4" -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/detectron2/projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml b/detectron2/projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml deleted file mode 100644 index bc83c2f9e7b7653c8982e657b5f116abe6ad6e1f..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "Base-TridentNet-Fast-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: False - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml b/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml deleted file mode 100644 index fda2cb6622d732c0f70d74d567c26182a9a41c44..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "Base-TridentNet-Fast-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 diff --git a/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_3x.yaml b/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_3x.yaml deleted file mode 100644 index ebf89d03ea043810b02e71ecc2c1711c250e161c..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "Base-TridentNet-Fast-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/detectron2/projects/TridentNet/train_net.py b/detectron2/projects/TridentNet/train_net.py deleted file mode 100644 index 5e5c5fe43a6a888e45dbd9ab7f67cbecc13c9a79..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/train_net.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -TridentNet Training Script. - -This script is a simplified version of the training script in detectron2/tools. -""" - -import os - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch -from detectron2.evaluation import COCOEvaluator - -from tridentnet import add_tridentnet_config - - -class Trainer(DefaultTrainer): - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - return COCOEvaluator(dataset_name, output_dir=output_folder) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - add_tridentnet_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - return res - - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - return trainer.train() - - -def invoke_main() -> None: - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) - - -if __name__ == "__main__": - invoke_main() # pragma: no cover diff --git a/detectron2/projects/TridentNet/tridentnet/__init__.py b/detectron2/projects/TridentNet/tridentnet/__init__.py deleted file mode 100644 index abaa9579051e7ef5ee7f388b9d59b5962440155c..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/tridentnet/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from .config import add_tridentnet_config -from .trident_backbone import ( - TridentBottleneckBlock, - build_trident_resnet_backbone, - make_trident_stage, -) -from .trident_rpn import TridentRPN -from .trident_rcnn import TridentRes5ROIHeads, TridentStandardROIHeads diff --git a/detectron2/projects/TridentNet/tridentnet/config.py b/detectron2/projects/TridentNet/tridentnet/config.py deleted file mode 100644 index 4b8732a43f6974ec60168652bf08e382ddc9c941..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/tridentnet/config.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -from detectron2.config import CfgNode as CN - - -def add_tridentnet_config(cfg): - """ - Add config for tridentnet. - """ - _C = cfg - - _C.MODEL.TRIDENT = CN() - - # Number of branches for TridentNet. - _C.MODEL.TRIDENT.NUM_BRANCH = 3 - # Specify the dilations for each branch. - _C.MODEL.TRIDENT.BRANCH_DILATIONS = [1, 2, 3] - # Specify the stage for applying trident blocks. Default stage is Res4 according to the - # TridentNet paper. - _C.MODEL.TRIDENT.TRIDENT_STAGE = "res4" - # Specify the test branch index TridentNet Fast inference: - # - use -1 to aggregate results of all branches during inference. - # - otherwise, only using specified branch for fast inference. Recommended setting is - # to use the middle branch. - _C.MODEL.TRIDENT.TEST_BRANCH_IDX = 1 diff --git a/detectron2/projects/TridentNet/tridentnet/trident_backbone.py b/detectron2/projects/TridentNet/tridentnet/trident_backbone.py deleted file mode 100644 index 7789bd219b01d452e876ad2ad7f811502719465c..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/tridentnet/trident_backbone.py +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import fvcore.nn.weight_init as weight_init -import torch -import torch.nn.functional as F - -from detectron2.layers import Conv2d, FrozenBatchNorm2d, get_norm -from detectron2.modeling import BACKBONE_REGISTRY, ResNet, ResNetBlockBase -from detectron2.modeling.backbone.resnet import BasicStem, BottleneckBlock, DeformBottleneckBlock - -from .trident_conv import TridentConv - -__all__ = ["TridentBottleneckBlock", "make_trident_stage", "build_trident_resnet_backbone"] - - -class TridentBottleneckBlock(ResNetBlockBase): - def __init__( - self, - in_channels, - out_channels, - *, - bottleneck_channels, - stride=1, - num_groups=1, - norm="BN", - stride_in_1x1=False, - num_branch=3, - dilations=(1, 2, 3), - concat_output=False, - test_branch_idx=-1, - ): - """ - Args: - num_branch (int): the number of branches in TridentNet. - dilations (tuple): the dilations of multiple branches in TridentNet. - concat_output (bool): if concatenate outputs of multiple branches in TridentNet. - Use 'True' for the last trident block. - """ - super().__init__(in_channels, out_channels, stride) - - assert num_branch == len(dilations) - - self.num_branch = num_branch - self.concat_output = concat_output - self.test_branch_idx = test_branch_idx - - if in_channels != out_channels: - self.shortcut = Conv2d( - in_channels, - out_channels, - kernel_size=1, - stride=stride, - bias=False, - norm=get_norm(norm, out_channels), - ) - else: - self.shortcut = None - - stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) - - self.conv1 = Conv2d( - in_channels, - bottleneck_channels, - kernel_size=1, - stride=stride_1x1, - bias=False, - norm=get_norm(norm, bottleneck_channels), - ) - - self.conv2 = TridentConv( - bottleneck_channels, - bottleneck_channels, - kernel_size=3, - stride=stride_3x3, - paddings=dilations, - bias=False, - groups=num_groups, - dilations=dilations, - num_branch=num_branch, - test_branch_idx=test_branch_idx, - norm=get_norm(norm, bottleneck_channels), - ) - - self.conv3 = Conv2d( - bottleneck_channels, - out_channels, - kernel_size=1, - bias=False, - norm=get_norm(norm, out_channels), - ) - - for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: - if layer is not None: # shortcut can be None - weight_init.c2_msra_fill(layer) - - def forward(self, x): - num_branch = self.num_branch if self.training or self.test_branch_idx == -1 else 1 - if not isinstance(x, list): - x = [x] * num_branch - out = [self.conv1(b) for b in x] - out = [F.relu_(b) for b in out] - - out = self.conv2(out) - out = [F.relu_(b) for b in out] - - out = [self.conv3(b) for b in out] - - if self.shortcut is not None: - shortcut = [self.shortcut(b) for b in x] - else: - shortcut = x - - out = [out_b + shortcut_b for out_b, shortcut_b in zip(out, shortcut)] - out = [F.relu_(b) for b in out] - if self.concat_output: - out = torch.cat(out) - return out - - -def make_trident_stage(block_class, num_blocks, **kwargs): - """ - Create a resnet stage by creating many blocks for TridentNet. - """ - concat_output = [False] * (num_blocks - 1) + [True] - kwargs["concat_output_per_block"] = concat_output - return ResNet.make_stage(block_class, num_blocks, **kwargs) - - -@BACKBONE_REGISTRY.register() -def build_trident_resnet_backbone(cfg, input_shape): - """ - Create a ResNet instance from config for TridentNet. - - Returns: - ResNet: a :class:`ResNet` instance. - """ - # need registration of new blocks/stems? - norm = cfg.MODEL.RESNETS.NORM - stem = BasicStem( - in_channels=input_shape.channels, - out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, - norm=norm, - ) - freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT - - if freeze_at >= 1: - for p in stem.parameters(): - p.requires_grad = False - stem = FrozenBatchNorm2d.convert_frozen_batchnorm(stem) - - # fmt: off - out_features = cfg.MODEL.RESNETS.OUT_FEATURES - depth = cfg.MODEL.RESNETS.DEPTH - num_groups = cfg.MODEL.RESNETS.NUM_GROUPS - width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP - bottleneck_channels = num_groups * width_per_group - in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS - out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS - stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 - res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION - deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE - deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED - deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS - num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH - branch_dilations = cfg.MODEL.TRIDENT.BRANCH_DILATIONS - trident_stage = cfg.MODEL.TRIDENT.TRIDENT_STAGE - test_branch_idx = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX - # fmt: on - assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation) - - num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth] - - stages = [] - - res_stage_idx = {"res2": 2, "res3": 3, "res4": 4, "res5": 5} - out_stage_idx = [res_stage_idx[f] for f in out_features] - trident_stage_idx = res_stage_idx[trident_stage] - max_stage_idx = max(out_stage_idx) - for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)): - dilation = res5_dilation if stage_idx == 5 else 1 - first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2 - stage_kargs = { - "num_blocks": num_blocks_per_stage[idx], - "stride_per_block": [first_stride] + [1] * (num_blocks_per_stage[idx] - 1), - "in_channels": in_channels, - "bottleneck_channels": bottleneck_channels, - "out_channels": out_channels, - "num_groups": num_groups, - "norm": norm, - "stride_in_1x1": stride_in_1x1, - "dilation": dilation, - } - if stage_idx == trident_stage_idx: - assert not deform_on_per_stage[ - idx - ], "Not support deformable conv in Trident blocks yet." - stage_kargs["block_class"] = TridentBottleneckBlock - stage_kargs["num_branch"] = num_branch - stage_kargs["dilations"] = branch_dilations - stage_kargs["test_branch_idx"] = test_branch_idx - stage_kargs.pop("dilation") - elif deform_on_per_stage[idx]: - stage_kargs["block_class"] = DeformBottleneckBlock - stage_kargs["deform_modulated"] = deform_modulated - stage_kargs["deform_num_groups"] = deform_num_groups - else: - stage_kargs["block_class"] = BottleneckBlock - blocks = ( - make_trident_stage(**stage_kargs) - if stage_idx == trident_stage_idx - else ResNet.make_stage(**stage_kargs) - ) - in_channels = out_channels - out_channels *= 2 - bottleneck_channels *= 2 - - if freeze_at >= stage_idx: - for block in blocks: - block.freeze() - stages.append(blocks) - return ResNet(stem, stages, out_features=out_features) diff --git a/detectron2/projects/TridentNet/tridentnet/trident_conv.py b/detectron2/projects/TridentNet/tridentnet/trident_conv.py deleted file mode 100644 index 18d5b0b9d73f2da263e7e026a82c62231a88d279..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/tridentnet/trident_conv.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import torch -from torch import nn -from torch.nn import functional as F -from torch.nn.modules.utils import _pair - -from detectron2.layers.wrappers import _NewEmptyTensorOp - - -class TridentConv(nn.Module): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - paddings=0, - dilations=1, - groups=1, - num_branch=1, - test_branch_idx=-1, - bias=False, - norm=None, - activation=None, - ): - super(TridentConv, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.num_branch = num_branch - self.stride = _pair(stride) - self.groups = groups - self.with_bias = bias - if isinstance(paddings, int): - paddings = [paddings] * self.num_branch - if isinstance(dilations, int): - dilations = [dilations] * self.num_branch - self.paddings = [_pair(padding) for padding in paddings] - self.dilations = [_pair(dilation) for dilation in dilations] - self.test_branch_idx = test_branch_idx - self.norm = norm - self.activation = activation - - assert len({self.num_branch, len(self.paddings), len(self.dilations)}) == 1 - - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) - ) - if bias: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.bias = None - - nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") - if self.bias is not None: - nn.init.constant_(self.bias, 0) - - def forward(self, inputs): - num_branch = self.num_branch if self.training or self.test_branch_idx == -1 else 1 - assert len(inputs) == num_branch - - if inputs[0].numel() == 0: - output_shape = [ - (i + 2 * p - (di * (k - 1) + 1)) // s + 1 - for i, p, di, k, s in zip( - inputs[0].shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride - ) - ] - output_shape = [input[0].shape[0], self.weight.shape[0]] + output_shape - return [_NewEmptyTensorOp.apply(input, output_shape) for input in inputs] - - if self.training or self.test_branch_idx == -1: - outputs = [ - F.conv2d(input, self.weight, self.bias, self.stride, padding, dilation, self.groups) - for input, dilation, padding in zip(inputs, self.dilations, self.paddings) - ] - else: - outputs = [ - F.conv2d( - inputs[0], - self.weight, - self.bias, - self.stride, - self.paddings[self.test_branch_idx], - self.dilations[self.test_branch_idx], - self.groups, - ) - ] - - if self.norm is not None: - outputs = [self.norm(x) for x in outputs] - if self.activation is not None: - outputs = [self.activation(x) for x in outputs] - return outputs - - def extra_repr(self): - tmpstr = "in_channels=" + str(self.in_channels) - tmpstr += ", out_channels=" + str(self.out_channels) - tmpstr += ", kernel_size=" + str(self.kernel_size) - tmpstr += ", num_branch=" + str(self.num_branch) - tmpstr += ", test_branch_idx=" + str(self.test_branch_idx) - tmpstr += ", stride=" + str(self.stride) - tmpstr += ", paddings=" + str(self.paddings) - tmpstr += ", dilations=" + str(self.dilations) - tmpstr += ", groups=" + str(self.groups) - tmpstr += ", bias=" + str(self.with_bias) - return tmpstr diff --git a/detectron2/projects/TridentNet/tridentnet/trident_rcnn.py b/detectron2/projects/TridentNet/tridentnet/trident_rcnn.py deleted file mode 100644 index fc22c712c84f96813fb275931ad4e350ee1f3bfd..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/tridentnet/trident_rcnn.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from detectron2.layers import batched_nms -from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads -from detectron2.modeling.roi_heads.roi_heads import Res5ROIHeads -from detectron2.structures import Instances - - -def merge_branch_instances(instances, num_branch, nms_thresh, topk_per_image): - """ - Merge detection results from different branches of TridentNet. - Return detection results by applying non-maximum suppression (NMS) on bounding boxes - and keep the unsuppressed boxes and other instances (e.g mask) if any. - - Args: - instances (list[Instances]): A list of N * num_branch instances that store detection - results. Contain N images and each image has num_branch instances. - num_branch (int): Number of branches used for merging detection results for each image. - nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. - topk_per_image (int): The number of top scoring detections to return. Set < 0 to return - all detections. - - Returns: - results: (list[Instances]): A list of N instances, one for each image in the batch, - that stores the topk most confidence detections after merging results from multiple - branches. - """ - if num_branch == 1: - return instances - - batch_size = len(instances) // num_branch - results = [] - for i in range(batch_size): - instance = Instances.cat([instances[i + batch_size * j] for j in range(num_branch)]) - - # Apply per-class NMS - keep = batched_nms( - instance.pred_boxes.tensor, instance.scores, instance.pred_classes, nms_thresh - ) - keep = keep[:topk_per_image] - result = instance[keep] - - results.append(result) - - return results - - -@ROI_HEADS_REGISTRY.register() -class TridentRes5ROIHeads(Res5ROIHeads): - """ - The TridentNet ROIHeads in a typical "C4" R-CNN model. - See :class:`Res5ROIHeads`. - """ - - def __init__(self, cfg, input_shape): - super().__init__(cfg, input_shape) - - self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH - self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1 - - def forward(self, images, features, proposals, targets=None): - """ - See :class:`Res5ROIHeads.forward`. - """ - num_branch = self.num_branch if self.training or not self.trident_fast else 1 - all_targets = targets * num_branch if targets is not None else None - pred_instances, losses = super().forward(images, features, proposals, all_targets) - del images, all_targets, targets - - if self.training: - return pred_instances, losses - else: - pred_instances = merge_branch_instances( - pred_instances, - num_branch, - self.box_predictor.test_nms_thresh, - self.box_predictor.test_topk_per_image, - ) - - return pred_instances, {} - - -@ROI_HEADS_REGISTRY.register() -class TridentStandardROIHeads(StandardROIHeads): - """ - The `StandardROIHeads` for TridentNet. - See :class:`StandardROIHeads`. - """ - - def __init__(self, cfg, input_shape): - super(TridentStandardROIHeads, self).__init__(cfg, input_shape) - - self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH - self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1 - - def forward(self, images, features, proposals, targets=None): - """ - See :class:`Res5ROIHeads.forward`. - """ - # Use 1 branch if using trident_fast during inference. - num_branch = self.num_branch if self.training or not self.trident_fast else 1 - # Duplicate targets for all branches in TridentNet. - all_targets = targets * num_branch if targets is not None else None - pred_instances, losses = super().forward(images, features, proposals, all_targets) - del images, all_targets, targets - - if self.training: - return pred_instances, losses - else: - pred_instances = merge_branch_instances( - pred_instances, - num_branch, - self.box_predictor.test_nms_thresh, - self.box_predictor.test_topk_per_image, - ) - - return pred_instances, {} diff --git a/detectron2/projects/TridentNet/tridentnet/trident_rpn.py b/detectron2/projects/TridentNet/tridentnet/trident_rpn.py deleted file mode 100644 index f95fbbf8ea59ad014f3337c47d41b5410f2c9d45..0000000000000000000000000000000000000000 --- a/detectron2/projects/TridentNet/tridentnet/trident_rpn.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import torch - -from detectron2.modeling import PROPOSAL_GENERATOR_REGISTRY -from detectron2.modeling.proposal_generator.rpn import RPN -from detectron2.structures import ImageList - - -@PROPOSAL_GENERATOR_REGISTRY.register() -class TridentRPN(RPN): - """ - Trident RPN subnetwork. - """ - - def __init__(self, cfg, input_shape): - super(TridentRPN, self).__init__(cfg, input_shape) - - self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH - self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1 - - def forward(self, images, features, gt_instances=None): - """ - See :class:`RPN.forward`. - """ - num_branch = self.num_branch if self.training or not self.trident_fast else 1 - # Duplicate images and gt_instances for all branches in TridentNet. - all_images = ImageList( - torch.cat([images.tensor] * num_branch), images.image_sizes * num_branch - ) - all_gt_instances = gt_instances * num_branch if gt_instances is not None else None - - return super(TridentRPN, self).forward(all_images, features, all_gt_instances) diff --git a/detectron2/projects/ViTDet/README.md b/detectron2/projects/ViTDet/README.md deleted file mode 100644 index 0a525e00e643017fc971566931936f1573d9b47c..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/README.md +++ /dev/null @@ -1,364 +0,0 @@ -# ViTDet: Exploring Plain Vision Transformer Backbones for Object Detection - -Yanghao Li, Hanzi Mao, Ross Girshick†, Kaiming He† - -[[`arXiv`](https://arxiv.org/abs/2203.16527)] [[`BibTeX`](#CitingViTDet)] - -In this repository, we provide configs and models in Detectron2 for ViTDet as well as MViTv2 and Swin backbones with our implementation and settings as described in [ViTDet](https://arxiv.org/abs/2203.16527) paper. - - -## Pretrained Models - -### COCO - -#### Mask R-CNN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namepre-traintrain
time
(s/im)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
ViTDet, ViT-BIN1K, MAE0.3140.07910.951.645.9325346929model
ViTDet, ViT-LIN1K, MAE0.6030.12520.955.549.2325599698model
ViTDet, ViT-HIN1K, MAE1.0980.17831.556.750.2329145471model
- -#### Cascade Mask R-CNN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namepre-traintrain
time
(s/im)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
Swin-BIN21K, sup0.3890.0778.753.946.2342979038model
Swin-LIN21K, sup0.5080.09712.655.047.2342979186model
MViTv2-BIN21K, sup0.4750.0908.955.648.1325820315model
MViTv2-LIN21K, sup0.8440.15719.755.748.3325607715model
MViTv2-HIN21K, sup1.6550.28518.4*55.948.3326187358model
ViTDet, ViT-BIN1K, MAE0.3620.08912.354.046.7325358525model
ViTDet, ViT-LIN1K, MAE0.6430.14222.357.650.0328021305model
ViTDet, ViT-HIN1K, MAE1.1370.19632.958.751.0328730692model
- - -### LVIS - -#### Mask R-CNN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namepre-traintrain
time
(s/im)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
ViTDet, ViT-BIN1K, MAE0.3170.08514.440.238.2329225748model
ViTDet, ViT-LIN1K, MAE0.5760.13724.746.143.6329211570model
ViTDet, ViT-HIN1K, MAE1.0590.18635.349.146.0332434656model
- -#### Cascade Mask R-CNN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namepre-traintrain
time
(s/im)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
Swin-BIN21K, sup0.3680.09011.544.039.6329222304model
Swin-LIN21K, sup0.4860.10513.846.041.4329222724model
MViTv2-BIN21K, sup0.4750.10011.846.342.0329477206model
MViTv2-LIN21K, sup0.8440.17221.049.444.2329661552model
MViTv2-HIN21K, sup1.6610.29021.3*49.544.1330445165model
ViTDet, ViT-BIN1K, MAE0.3560.09915.243.038.9329226874model
ViTDet, ViT-LIN1K, MAE0.6290.15024.949.244.5329042206model
ViTDet, ViT-HIN1K, MAE1.1000.20435.551.546.6332552778model
- -Note: Unlike the system-level comparisons in the paper, these models use a lower resolution (1024 instead of 1280) and standard NMS (instead of soft NMS). As a result, they have slightly lower box and mask AP. - -We observed higher variance on LVIS evalution results compared to COCO. For example, the standard deviations of box AP and mask AP were 0.30% (compared to 0.10% on COCO) when we trained ViTDet, ViT-B five times with varying random seeds. - -The above models were trained and measured on 8-node with 64 NVIDIA A100 GPUs in total. *: Activation checkpointing is used. - - -## Training -All configs can be trained with: - -``` -../../tools/lazyconfig_train_net.py --config-file configs/path/to/config.py -``` -By default, we use 64 GPUs with batch size as 64 for training. - -## Evaluation -Model evaluation can be done similarly: -``` -../../tools/lazyconfig_train_net.py --config-file configs/path/to/config.py --eval-only train.init_checkpoint=/path/to/model_checkpoint -``` - - -## Citing ViTDet - -If you use ViTDet, please use the following BibTeX entry. - -```BibTeX -@article{li2022exploring, - title={Exploring plain vision transformer backbones for object detection}, - author={Li, Yanghao and Mao, Hanzi and Girshick, Ross and He, Kaiming}, - journal={arXiv preprint arXiv:2203.16527}, - year={2022} -} -``` diff --git a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_b_in21k_100ep.py b/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_b_in21k_100ep.py deleted file mode 100644 index 9dba203086f8b34221ea9bed9f5fc280579f97df..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_b_in21k_100ep.py +++ /dev/null @@ -1,95 +0,0 @@ -from functools import partial -import torch.nn as nn -from fvcore.common.param_scheduler import MultiStepParamScheduler - -from detectron2 import model_zoo -from detectron2.config import LazyCall as L -from detectron2.solver import WarmupParamScheduler -from detectron2.modeling import MViT -from detectron2.layers import ShapeSpec -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.matcher import Matcher -from detectron2.modeling.roi_heads import ( - FastRCNNOutputLayers, - FastRCNNConvFCHead, - CascadeROIHeads, -) - -from ..common.coco_loader_lsj import dataloader - -model = model_zoo.get_config("common/models/mask_rcnn_fpn.py").model -constants = model_zoo.get_config("common/data/constants.py").constants -model.pixel_mean = constants.imagenet_rgb256_mean -model.pixel_std = constants.imagenet_rgb256_std -model.input_format = "RGB" -model.backbone.bottom_up = L(MViT)( - embed_dim=96, - depth=24, - num_heads=1, - last_block_indexes=(1, 4, 20, 23), - residual_pooling=True, - drop_path_rate=0.4, - norm_layer=partial(nn.LayerNorm, eps=1e-6), - out_features=("scale2", "scale3", "scale4", "scale5"), -) -model.backbone.in_features = "${.bottom_up.out_features}" -model.backbone.square_pad = 1024 - -# New heads and LN -model.backbone.norm = "LN" # Use LN in FPN -model.roi_heads.box_head.conv_norm = model.roi_heads.mask_head.conv_norm = "LN" - -# 2conv in RPN: -model.proposal_generator.head.conv_dims = [-1, -1] - -# arguments that don't exist for Cascade R-CNN -[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]] -model.roi_heads.update( - _target_=CascadeROIHeads, - box_heads=[ - L(FastRCNNConvFCHead)( - input_shape=ShapeSpec(channels=256, height=7, width=7), - conv_dims=[256, 256, 256, 256], - fc_dims=[1024], - conv_norm="LN", - ) - for _ in range(3) - ], - box_predictors=[ - L(FastRCNNOutputLayers)( - input_shape=ShapeSpec(channels=1024), - test_score_thresh=0.05, - box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)), - cls_agnostic_bbox_reg=True, - num_classes="${...num_classes}", - ) - for (w1, w2) in [(10, 5), (20, 10), (30, 15)] - ], - proposal_matchers=[ - L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False) - for th in [0.5, 0.6, 0.7] - ], -) - -# Initialization and trainer settings -train = model_zoo.get_config("common/train.py").train -train.amp.enabled = True -train.ddp.fp16_compression = True -train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_B_in21k.pyth" - -# Schedule -# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep -train.max_iter = 184375 -lr_multiplier = L(WarmupParamScheduler)( - scheduler=L(MultiStepParamScheduler)( - values=[1.0, 0.1, 0.01], - milestones=[163889, 177546], - num_updates=train.max_iter, - ), - warmup_length=250 / train.max_iter, - warmup_factor=0.001, -) - -optimizer = model_zoo.get_config("common/optim.py").AdamW -optimizer.params.overrides = {"pos_embed": {"weight_decay": 0.0}} -optimizer.lr = 8e-5 diff --git a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_h_in21k_36ep.py b/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_h_in21k_36ep.py deleted file mode 100644 index 577045043b960384953a00eac4dc45ee43c1045e..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_h_in21k_36ep.py +++ /dev/null @@ -1,39 +0,0 @@ -from fvcore.common.param_scheduler import MultiStepParamScheduler - -from detectron2.config import LazyCall as L -from detectron2.solver import WarmupParamScheduler - -from .cascade_mask_rcnn_mvitv2_b_in21k_100ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, -) - -model.backbone.bottom_up.embed_dim = 192 -model.backbone.bottom_up.depth = 80 -model.backbone.bottom_up.num_heads = 3 -model.backbone.bottom_up.last_block_indexes = (3, 11, 71, 79) -model.backbone.bottom_up.drop_path_rate = 0.6 -model.backbone.bottom_up.use_act_checkpoint = True - - -train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_H_in21k.pyth" - - -# 36 epochs -train.max_iter = 67500 -lr_multiplier = L(WarmupParamScheduler)( - scheduler=L(MultiStepParamScheduler)( - values=[1.0, 0.1, 0.01], - milestones=[ - 52500, - 62500, - 67500, - ], - ), - warmup_length=250 / train.max_iter, - warmup_factor=0.001, -) -optimizer.lr = 1.6e-4 diff --git a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_l_in21k_50ep.py b/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_l_in21k_50ep.py deleted file mode 100644 index c64f0c18aea5dfe49fef028a6300ab1dc9f2537a..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_mvitv2_l_in21k_50ep.py +++ /dev/null @@ -1,22 +0,0 @@ -from .cascade_mask_rcnn_mvitv2_b_in21k_100ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, -) - -model.backbone.bottom_up.embed_dim = 144 -model.backbone.bottom_up.depth = 48 -model.backbone.bottom_up.num_heads = 2 -model.backbone.bottom_up.last_block_indexes = (1, 7, 43, 47) -model.backbone.bottom_up.drop_path_rate = 0.5 - - -train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_L_in21k.pyth" - -train.max_iter = train.max_iter // 2 # 100ep -> 50ep -lr_multiplier.scheduler.milestones = [ - milestone // 2 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_swin_b_in21k_50ep.py b/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_swin_b_in21k_50ep.py deleted file mode 100644 index b2aad98526e39240ff82cbf96cb005ce75e5c577..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_swin_b_in21k_50ep.py +++ /dev/null @@ -1,50 +0,0 @@ -from fvcore.common.param_scheduler import MultiStepParamScheduler - -from detectron2 import model_zoo -from detectron2.config import LazyCall as L -from detectron2.solver import WarmupParamScheduler -from detectron2.modeling import SwinTransformer - -from ..common.coco_loader_lsj import dataloader -from .cascade_mask_rcnn_mvitv2_b_in21k_100ep import model - -model.backbone.bottom_up = L(SwinTransformer)( - depths=[2, 2, 18, 2], - drop_path_rate=0.4, - embed_dim=128, - num_heads=[4, 8, 16, 32], -) -model.backbone.in_features = ("p0", "p1", "p2", "p3") -model.backbone.square_pad = 1024 - -# Initialization and trainer settings -train = model_zoo.get_config("common/train.py").train -train.amp.enabled = True -train.ddp.fp16_compression = True -train.init_checkpoint = "detectron2://ImageNetPretrained/swin/swin_base_patch4_window7_224_22k.pth" - -# Schedule -# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep -train.max_iter = 184375 -lr_multiplier = L(WarmupParamScheduler)( - scheduler=L(MultiStepParamScheduler)( - values=[1.0, 0.1, 0.01], - milestones=[163889, 177546], - num_updates=train.max_iter, - ), - warmup_length=250 / train.max_iter, - warmup_factor=0.001, -) - -# Rescale schedule -train.max_iter = train.max_iter // 2 # 100ep -> 50ep -lr_multiplier.scheduler.milestones = [ - milestone // 2 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter - - -optimizer = model_zoo.get_config("common/optim.py").AdamW -optimizer.lr = 4e-5 -optimizer.weight_decay = 0.05 -optimizer.params.overrides = {"relative_position_bias_table": {"weight_decay": 0.0}} diff --git a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_swin_l_in21k_50ep.py b/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_swin_l_in21k_50ep.py deleted file mode 100644 index 60bc917b5938338f87c96b17041432d1fb637ce3..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_swin_l_in21k_50ep.py +++ /dev/null @@ -1,15 +0,0 @@ -from .cascade_mask_rcnn_swin_b_in21k_50ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, -) - -model.backbone.bottom_up.depths = [2, 2, 18, 2] -model.backbone.bottom_up.drop_path_rate = 0.4 -model.backbone.bottom_up.embed_dim = 192 -model.backbone.bottom_up.num_heads = [6, 12, 24, 48] - - -train.init_checkpoint = "detectron2://ImageNetPretrained/swin/swin_large_patch4_window7_224_22k.pth" diff --git a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_b_100ep.py b/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_b_100ep.py deleted file mode 100644 index 95823ef4fbfa0745713ab6a7df4716056367f8b2..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_b_100ep.py +++ /dev/null @@ -1,48 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.layers import ShapeSpec -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.matcher import Matcher -from detectron2.modeling.roi_heads import ( - FastRCNNOutputLayers, - FastRCNNConvFCHead, - CascadeROIHeads, -) - -from .mask_rcnn_vitdet_b_100ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, - get_vit_lr_decay_rate, -) - -# arguments that don't exist for Cascade R-CNN -[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]] - -model.roi_heads.update( - _target_=CascadeROIHeads, - box_heads=[ - L(FastRCNNConvFCHead)( - input_shape=ShapeSpec(channels=256, height=7, width=7), - conv_dims=[256, 256, 256, 256], - fc_dims=[1024], - conv_norm="LN", - ) - for _ in range(3) - ], - box_predictors=[ - L(FastRCNNOutputLayers)( - input_shape=ShapeSpec(channels=1024), - test_score_thresh=0.05, - box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)), - cls_agnostic_bbox_reg=True, - num_classes="${...num_classes}", - ) - for (w1, w2) in [(10, 5), (20, 10), (30, 15)] - ], - proposal_matchers=[ - L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False) - for th in [0.5, 0.6, 0.7] - ], -) diff --git a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_h_75ep.py b/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_h_75ep.py deleted file mode 100644 index e508a68f5cebbf0960f3c307819dc2f5ef900057..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_h_75ep.py +++ /dev/null @@ -1,33 +0,0 @@ -from functools import partial - -from .cascade_mask_rcnn_vitdet_b_100ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, - get_vit_lr_decay_rate, -) - -train.init_checkpoint = ( - "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_huge_p14to16.pth?matching_heuristics=True" -) - -model.backbone.net.embed_dim = 1280 -model.backbone.net.depth = 32 -model.backbone.net.num_heads = 16 -model.backbone.net.drop_path_rate = 0.5 -# 7, 15, 23, 31 for global attention -model.backbone.net.window_block_indexes = ( - list(range(0, 7)) + list(range(8, 15)) + list(range(16, 23)) + list(range(24, 31)) -) - -optimizer.params.lr_factor_func = partial(get_vit_lr_decay_rate, lr_decay_rate=0.9, num_layers=32) -optimizer.params.overrides = {} -optimizer.params.weight_decay_norm = None - -train.max_iter = train.max_iter * 3 // 4 # 100ep -> 75ep -lr_multiplier.scheduler.milestones = [ - milestone * 3 // 4 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_l_100ep.py b/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_l_100ep.py deleted file mode 100644 index 2743603ad2b6cc3f99aa0600c715887f7550d1cd..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/COCO/cascade_mask_rcnn_vitdet_l_100ep.py +++ /dev/null @@ -1,25 +0,0 @@ -from functools import partial - -from .cascade_mask_rcnn_vitdet_b_100ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, - get_vit_lr_decay_rate, -) - -train.init_checkpoint = ( - "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_large.pth?matching_heuristics=True" -) - -model.backbone.net.embed_dim = 1024 -model.backbone.net.depth = 24 -model.backbone.net.num_heads = 16 -model.backbone.net.drop_path_rate = 0.4 -# 5, 11, 17, 23 for global attention -model.backbone.net.window_block_indexes = ( - list(range(0, 5)) + list(range(6, 11)) + list(range(12, 17)) + list(range(18, 23)) -) - -optimizer.params.lr_factor_func = partial(get_vit_lr_decay_rate, lr_decay_rate=0.8, num_layers=24) diff --git a/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py b/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py deleted file mode 100644 index 8fd36e92da0137df8aae5935e71b7af419ac1016..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_b_100ep.py +++ /dev/null @@ -1,40 +0,0 @@ -from functools import partial -from fvcore.common.param_scheduler import MultiStepParamScheduler - -from detectron2 import model_zoo -from detectron2.config import LazyCall as L -from detectron2.solver import WarmupParamScheduler -from detectron2.modeling.backbone.vit import get_vit_lr_decay_rate - -from ..common.coco_loader_lsj import dataloader - - -model = model_zoo.get_config("common/models/mask_rcnn_vitdet.py").model - -# Initialization and trainer settings -train = model_zoo.get_config("common/train.py").train -train.amp.enabled = True -train.ddp.fp16_compression = True -train.init_checkpoint = ( - "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth?matching_heuristics=True" -) - - -# Schedule -# 100 ep = 184375 iters * 64 images/iter / 118000 images/ep -train.max_iter = 184375 - -lr_multiplier = L(WarmupParamScheduler)( - scheduler=L(MultiStepParamScheduler)( - values=[1.0, 0.1, 0.01], - milestones=[163889, 177546], - num_updates=train.max_iter, - ), - warmup_length=250 / train.max_iter, - warmup_factor=0.001, -) - -# Optimizer -optimizer = model_zoo.get_config("common/optim.py").AdamW -optimizer.params.lr_factor_func = partial(get_vit_lr_decay_rate, num_layers=12, lr_decay_rate=0.7) -optimizer.params.overrides = {"pos_embed": {"weight_decay": 0.0}} diff --git a/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_h_75ep.py b/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_h_75ep.py deleted file mode 100644 index 7de96f0a6c760ac41152726ac1e4faeb1fb9a818..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_h_75ep.py +++ /dev/null @@ -1,33 +0,0 @@ -from functools import partial - -from .mask_rcnn_vitdet_b_100ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, - get_vit_lr_decay_rate, -) - -train.init_checkpoint = ( - "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_huge_p14to16.pth?matching_heuristics=True" -) - -model.backbone.net.embed_dim = 1280 -model.backbone.net.depth = 32 -model.backbone.net.num_heads = 16 -model.backbone.net.drop_path_rate = 0.5 -# 7, 15, 23, 31 for global attention -model.backbone.net.window_block_indexes = ( - list(range(0, 7)) + list(range(8, 15)) + list(range(16, 23)) + list(range(24, 31)) -) - -optimizer.params.lr_factor_func = partial(get_vit_lr_decay_rate, lr_decay_rate=0.9, num_layers=32) -optimizer.params.overrides = {} -optimizer.params.weight_decay_norm = None - -train.max_iter = train.max_iter * 3 // 4 # 100ep -> 75ep -lr_multiplier.scheduler.milestones = [ - milestone * 3 // 4 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter diff --git a/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_l_100ep.py b/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_l_100ep.py deleted file mode 100644 index 0d193cbb1e09943812c23fc16f0cde66f6a59fce..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/COCO/mask_rcnn_vitdet_l_100ep.py +++ /dev/null @@ -1,25 +0,0 @@ -from functools import partial - -from .mask_rcnn_vitdet_b_100ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, - get_vit_lr_decay_rate, -) - -train.init_checkpoint = ( - "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_large.pth?matching_heuristics=True" -) - -model.backbone.net.embed_dim = 1024 -model.backbone.net.depth = 24 -model.backbone.net.num_heads = 16 -model.backbone.net.drop_path_rate = 0.4 -# 5, 11, 17, 23 for global attention -model.backbone.net.window_block_indexes = ( - list(range(0, 5)) + list(range(6, 11)) + list(range(12, 17)) + list(range(18, 23)) -) - -optimizer.params.lr_factor_func = partial(get_vit_lr_decay_rate, lr_decay_rate=0.8, num_layers=24) diff --git a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_mvitv2_b_in21k_100ep.py b/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_mvitv2_b_in21k_100ep.py deleted file mode 100644 index 1cf9c3ea7a962bd890fc3b22e0449323f8dc0dfa..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_mvitv2_b_in21k_100ep.py +++ /dev/null @@ -1,48 +0,0 @@ -from functools import partial -import torch.nn as nn - -from detectron2.config import LazyCall as L -from detectron2.data.detection_utils import get_fed_loss_cls_weights -from detectron2.data.samplers import RepeatFactorTrainingSampler -from detectron2.evaluation.lvis_evaluation import LVISEvaluator - -from ..COCO.cascade_mask_rcnn_mvitv2_b_in21k_100ep import ( - dataloader, - model, - train, - lr_multiplier, - optimizer, -) - -dataloader.train.dataset.names = "lvis_v1_train" -dataloader.train.sampler = L(RepeatFactorTrainingSampler)( - repeat_factors=L(RepeatFactorTrainingSampler.repeat_factors_from_category_frequency)( - dataset_dicts="${dataloader.train.dataset}", repeat_thresh=0.001 - ) -) -dataloader.test.dataset.names = "lvis_v1_val" -dataloader.evaluator = L(LVISEvaluator)( - dataset_name="${..test.dataset.names}", - max_dets_per_image=300, -) - -model.roi_heads.num_classes = 1203 -for i in range(3): - model.roi_heads.box_predictors[i].test_score_thresh = 0.02 - model.roi_heads.box_predictors[i].test_topk_per_image = 300 - model.roi_heads.box_predictors[i].use_sigmoid_ce = True - model.roi_heads.box_predictors[i].use_fed_loss = True - model.roi_heads.box_predictors[i].get_fed_loss_cls_weights = lambda: get_fed_loss_cls_weights( - dataloader.train.dataset.names, 0.5 - ) - -# Schedule -# 100 ep = 156250 iters * 64 images/iter / 100000 images/ep -train.max_iter = 156250 -train.eval_period = 30000 - -lr_multiplier.scheduler.milestones = [138889, 150463] -lr_multiplier.scheduler.num_updates = train.max_iter -lr_multiplier.warmup_length = 250 / train.max_iter - -optimizer.lr = 1e-4 diff --git a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_mvitv2_h_in21k_50ep.py b/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_mvitv2_h_in21k_50ep.py deleted file mode 100644 index 084444bf0338d1bab2ee426ae226a0f8004dd0f5..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_mvitv2_h_in21k_50ep.py +++ /dev/null @@ -1,25 +0,0 @@ -from .cascade_mask_rcnn_mvitv2_b_in21k_100ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, -) - -model.backbone.bottom_up.embed_dim = 192 -model.backbone.bottom_up.depth = 80 -model.backbone.bottom_up.num_heads = 3 -model.backbone.bottom_up.last_block_indexes = (3, 11, 71, 79) -model.backbone.bottom_up.drop_path_rate = 0.6 -model.backbone.bottom_up.use_act_checkpoint = True - -train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_H_in21k.pyth" - -train.max_iter = train.max_iter // 2 # 100ep -> 50ep -lr_multiplier.scheduler.milestones = [ - milestone // 2 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter -lr_multiplier.warmup_length = 250 / train.max_iter - -optimizer.lr = 2e-5 diff --git a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_mvitv2_l_in21k_50ep.py b/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_mvitv2_l_in21k_50ep.py deleted file mode 100644 index 779442c60fa32f1d36e823e86c62979f8e48ec2c..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_mvitv2_l_in21k_50ep.py +++ /dev/null @@ -1,24 +0,0 @@ -from .cascade_mask_rcnn_mvitv2_b_in21k_100ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, -) - -model.backbone.bottom_up.embed_dim = 144 -model.backbone.bottom_up.depth = 48 -model.backbone.bottom_up.num_heads = 2 -model.backbone.bottom_up.last_block_indexes = (1, 7, 43, 47) -model.backbone.bottom_up.drop_path_rate = 0.5 - -train.init_checkpoint = "detectron2://ImageNetPretrained/mvitv2/MViTv2_L_in21k.pyth" - -train.max_iter = train.max_iter // 2 # 100ep -> 50ep -lr_multiplier.scheduler.milestones = [ - milestone // 2 for milestone in lr_multiplier.scheduler.milestones -] -lr_multiplier.scheduler.num_updates = train.max_iter -lr_multiplier.warmup_length = 250 / train.max_iter - -optimizer.lr = 4e-5 diff --git a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_swin_b_in21k_50ep.py b/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_swin_b_in21k_50ep.py deleted file mode 100644 index d18c925f7349b42e52adb9c7b4e5461e1a25657f..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_swin_b_in21k_50ep.py +++ /dev/null @@ -1,49 +0,0 @@ -from detectron2.config.lazy import LazyCall as L -from detectron2.data.detection_utils import get_fed_loss_cls_weights -from detectron2.data.samplers import RepeatFactorTrainingSampler -from detectron2.evaluation.lvis_evaluation import LVISEvaluator - -from ..COCO.cascade_mask_rcnn_swin_b_in21k_50ep import ( - dataloader, - model, - train, - lr_multiplier, - optimizer, -) - -dataloader.train.dataset.names = "lvis_v1_train" -dataloader.train.sampler = L(RepeatFactorTrainingSampler)( - repeat_factors=L(RepeatFactorTrainingSampler.repeat_factors_from_category_frequency)( - dataset_dicts="${dataloader.train.dataset}", repeat_thresh=0.001 - ) -) -dataloader.test.dataset.names = "lvis_v1_val" -dataloader.evaluator = L(LVISEvaluator)( - dataset_name="${..test.dataset.names}", - max_dets_per_image=300, -) - -model.backbone.bottom_up.drop_path_rate = 0.3 - -model.roi_heads.num_classes = 1203 -for i in range(3): - model.roi_heads.box_predictors[i].test_score_thresh = 0.02 - model.roi_heads.box_predictors[i].test_topk_per_image = 300 - model.roi_heads.box_predictors[i].use_sigmoid_ce = True - model.roi_heads.box_predictors[i].use_fed_loss = True - model.roi_heads.box_predictors[i].get_fed_loss_cls_weights = lambda: get_fed_loss_cls_weights( - dataloader.train.dataset.names, 0.5 - ) - -# Schedule -# 100 ep = 156250 iters * 64 images/iter / 100000 images/ep -# 100 ep -> 50 ep as the model achieves better performance with 50 epochs -train.max_iter = 156250 // 2 -train.eval_period = 30000 - -lr_multiplier.scheduler.milestones = [milestone // 2 for milestone in [138889, 150463]] -lr_multiplier.scheduler.num_updates = train.max_iter -lr_multiplier.warmup_length = 250 / train.max_iter - -# Optimized hyperparams -optimizer.lr = 1e-4 diff --git a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_swin_l_in21k_50ep.py b/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_swin_l_in21k_50ep.py deleted file mode 100644 index 9e22e3b28777003776774f61273c04bbb2abea1e..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_swin_l_in21k_50ep.py +++ /dev/null @@ -1,12 +0,0 @@ -from .cascade_mask_rcnn_swin_b_in21k_50ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, -) - -model.backbone.bottom_up.embed_dim = 192 -model.backbone.bottom_up.num_heads = [6, 12, 24, 48] - -train.init_checkpoint = "detectron2://ImageNetPretrained/swin/swin_large_patch4_window7_224_22k.pth" diff --git a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_vitdet_b_100ep.py b/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_vitdet_b_100ep.py deleted file mode 100644 index 8115224ca85b71e772302e97bda676cca3acfbd8..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_vitdet_b_100ep.py +++ /dev/null @@ -1,51 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.data.detection_utils import get_fed_loss_cls_weights -from detectron2.layers import ShapeSpec -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.matcher import Matcher -from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads - -from .mask_rcnn_vitdet_b_100ep import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -# arguments that don't exist for Cascade R-CNN -[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]] - -model.roi_heads.update( - _target_=CascadeROIHeads, - num_classes=1203, - box_heads=[ - L(FastRCNNConvFCHead)( - input_shape=ShapeSpec(channels=256, height=7, width=7), - conv_dims=[256, 256, 256, 256], - fc_dims=[1024], - conv_norm="LN", - ) - for _ in range(3) - ], - box_predictors=[ - L(FastRCNNOutputLayers)( - input_shape=ShapeSpec(channels=1024), - box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)), - num_classes="${...num_classes}", - test_score_thresh=0.02, - test_topk_per_image=300, - cls_agnostic_bbox_reg=True, - use_sigmoid_ce=True, - use_fed_loss=True, - get_fed_loss_cls_weights=lambda: get_fed_loss_cls_weights( - dataloader.train.dataset.names, 0.5 - ), - ) - for (w1, w2) in [(10, 5), (20, 10), (30, 15)] - ], - proposal_matchers=[ - L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False) - for th in [0.5, 0.6, 0.7] - ], -) diff --git a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_vitdet_h_100ep.py b/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_vitdet_h_100ep.py deleted file mode 100644 index 68bec5734456c9bbc813becd5da83bc2a0f90932..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_vitdet_h_100ep.py +++ /dev/null @@ -1,51 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.data.detection_utils import get_fed_loss_cls_weights -from detectron2.layers import ShapeSpec -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.matcher import Matcher -from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads - -from .mask_rcnn_vitdet_h_100ep import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -# arguments that don't exist for Cascade R-CNN -[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]] - -model.roi_heads.update( - _target_=CascadeROIHeads, - num_classes=1203, - box_heads=[ - L(FastRCNNConvFCHead)( - input_shape=ShapeSpec(channels=256, height=7, width=7), - conv_dims=[256, 256, 256, 256], - fc_dims=[1024], - conv_norm="LN", - ) - for _ in range(3) - ], - box_predictors=[ - L(FastRCNNOutputLayers)( - input_shape=ShapeSpec(channels=1024), - box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)), - num_classes="${...num_classes}", - test_score_thresh=0.02, - test_topk_per_image=300, - cls_agnostic_bbox_reg=True, - use_sigmoid_ce=True, - use_fed_loss=True, - get_fed_loss_cls_weights=lambda: get_fed_loss_cls_weights( - dataloader.train.dataset.names, 0.5 - ), - ) - for (w1, w2) in [(10, 5), (20, 10), (30, 15)] - ], - proposal_matchers=[ - L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False) - for th in [0.5, 0.6, 0.7] - ], -) diff --git a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_vitdet_l_100ep.py b/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_vitdet_l_100ep.py deleted file mode 100644 index ebaf526ab7735309d5f50527136ad6207ce9d58b..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/LVIS/cascade_mask_rcnn_vitdet_l_100ep.py +++ /dev/null @@ -1,51 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.data.detection_utils import get_fed_loss_cls_weights -from detectron2.layers import ShapeSpec -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.matcher import Matcher -from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads - -from .mask_rcnn_vitdet_l_100ep import ( - dataloader, - lr_multiplier, - model, - optimizer, - train, -) - -# arguments that don't exist for Cascade R-CNN -[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]] - -model.roi_heads.update( - _target_=CascadeROIHeads, - num_classes=1203, - box_heads=[ - L(FastRCNNConvFCHead)( - input_shape=ShapeSpec(channels=256, height=7, width=7), - conv_dims=[256, 256, 256, 256], - fc_dims=[1024], - conv_norm="LN", - ) - for _ in range(3) - ], - box_predictors=[ - L(FastRCNNOutputLayers)( - input_shape=ShapeSpec(channels=1024), - box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)), - num_classes="${...num_classes}", - test_score_thresh=0.02, - test_topk_per_image=300, - cls_agnostic_bbox_reg=True, - use_sigmoid_ce=True, - use_fed_loss=True, - get_fed_loss_cls_weights=lambda: get_fed_loss_cls_weights( - dataloader.train.dataset.names, 0.5 - ), - ) - for (w1, w2) in [(10, 5), (20, 10), (30, 15)] - ], - proposal_matchers=[ - L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False) - for th in [0.5, 0.6, 0.7] - ], -) diff --git a/detectron2/projects/ViTDet/configs/LVIS/mask_rcnn_vitdet_b_100ep.py b/detectron2/projects/ViTDet/configs/LVIS/mask_rcnn_vitdet_b_100ep.py deleted file mode 100644 index ef905457ba8813f9f293beda4da20f49efca73db..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/LVIS/mask_rcnn_vitdet_b_100ep.py +++ /dev/null @@ -1,44 +0,0 @@ -from detectron2.config import LazyCall as L -from detectron2.data.samplers import RepeatFactorTrainingSampler -from detectron2.evaluation.lvis_evaluation import LVISEvaluator -from detectron2.data.detection_utils import get_fed_loss_cls_weights - -from ..COCO.mask_rcnn_vitdet_b_100ep import ( - dataloader, - model, - train, - lr_multiplier, - optimizer, -) - -dataloader.train.dataset.names = "lvis_v1_train" -dataloader.train.sampler = L(RepeatFactorTrainingSampler)( - repeat_factors=L(RepeatFactorTrainingSampler.repeat_factors_from_category_frequency)( - dataset_dicts="${dataloader.train.dataset}", repeat_thresh=0.001 - ) -) -dataloader.test.dataset.names = "lvis_v1_val" -dataloader.evaluator = L(LVISEvaluator)( - dataset_name="${..test.dataset.names}", - max_dets_per_image=300, -) - -model.roi_heads.num_classes = 1203 -model.roi_heads.box_predictor.test_score_thresh = 0.02 -model.roi_heads.box_predictor.test_topk_per_image = 300 -model.roi_heads.box_predictor.use_sigmoid_ce = True -model.roi_heads.box_predictor.use_fed_loss = True -model.roi_heads.box_predictor.get_fed_loss_cls_weights = lambda: get_fed_loss_cls_weights( - dataloader.train.dataset.names, 0.5 -) - -# Schedule -# 100 ep = 156250 iters * 64 images/iter / 100000 images/ep -train.max_iter = 156250 -train.eval_period = 30000 - -lr_multiplier.scheduler.milestones = [138889, 150463] -lr_multiplier.scheduler.num_updates = train.max_iter -lr_multiplier.warmup_length = 250 / train.max_iter - -optimizer.lr = 2e-4 diff --git a/detectron2/projects/ViTDet/configs/LVIS/mask_rcnn_vitdet_h_100ep.py b/detectron2/projects/ViTDet/configs/LVIS/mask_rcnn_vitdet_h_100ep.py deleted file mode 100644 index 0f99bad24e6702e91abe226446e7d7b00ef14df2..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/LVIS/mask_rcnn_vitdet_h_100ep.py +++ /dev/null @@ -1,30 +0,0 @@ -from functools import partial - -from detectron2.modeling.backbone.vit import get_vit_lr_decay_rate - -from .mask_rcnn_vitdet_b_100ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, -) - -train.init_checkpoint = ( - "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_huge_p14to16.pth?matching_heuristics=True" -) - -model.backbone.net.embed_dim = 1280 -model.backbone.net.depth = 32 -model.backbone.net.num_heads = 16 -model.backbone.net.drop_path_rate = 0.4 -# 7, 15, 23, 31 for global attention -model.backbone.net.window_block_indexes = ( - list(range(0, 7)) + list(range(8, 15)) + list(range(16, 23)) + list(range(24, 31)) -) - - -optimizer.lr = 1e-4 -optimizer.params.lr_factor_func = partial(get_vit_lr_decay_rate, lr_decay_rate=0.9, num_layers=32) -optimizer.params.overrides = {} -optimizer.params.weight_decay_norm = None diff --git a/detectron2/projects/ViTDet/configs/LVIS/mask_rcnn_vitdet_l_100ep.py b/detectron2/projects/ViTDet/configs/LVIS/mask_rcnn_vitdet_l_100ep.py deleted file mode 100644 index 15d879230fb3b8e4e0cb4bd6c8c07de8e2dda268..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/LVIS/mask_rcnn_vitdet_l_100ep.py +++ /dev/null @@ -1,26 +0,0 @@ -from functools import partial - -from detectron2.modeling.backbone.vit import get_vit_lr_decay_rate - -from .mask_rcnn_vitdet_b_100ep import ( - dataloader, - lr_multiplier, - model, - train, - optimizer, -) - -train.init_checkpoint = ( - "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_large.pth?matching_heuristics=True" -) - -model.backbone.net.embed_dim = 1024 -model.backbone.net.depth = 24 -model.backbone.net.num_heads = 16 -model.backbone.net.drop_path_rate = 0.4 -# 5, 11, 17, 23 for global attention -model.backbone.net.window_block_indexes = ( - list(range(0, 5)) + list(range(6, 11)) + list(range(12, 17)) + list(range(18, 23)) -) - -optimizer.params.lr_factor_func = partial(get_vit_lr_decay_rate, lr_decay_rate=0.8, num_layers=24) diff --git a/detectron2/projects/ViTDet/configs/common/coco_loader_lsj.py b/detectron2/projects/ViTDet/configs/common/coco_loader_lsj.py deleted file mode 100644 index e6c2f1e913a9f629290ce345fc4ffd4db4037e14..0000000000000000000000000000000000000000 --- a/detectron2/projects/ViTDet/configs/common/coco_loader_lsj.py +++ /dev/null @@ -1,22 +0,0 @@ -import detectron2.data.transforms as T -from detectron2 import model_zoo -from detectron2.config import LazyCall as L - -# Data using LSJ -image_size = 1024 -dataloader = model_zoo.get_config("common/data/coco.py").dataloader -dataloader.train.mapper.augmentations = [ - L(T.RandomFlip)(horizontal=True), # flip first - L(T.ResizeScale)( - min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size - ), - L(T.FixedSizeCrop)(crop_size=(image_size, image_size), pad=False), -] -dataloader.train.mapper.image_format = "RGB" -dataloader.train.total_batch_size = 64 -# recompute boxes due to cropping -dataloader.train.mapper.recompute_boxes = True - -dataloader.test.mapper.augmentations = [ - L(T.ResizeShortestEdge)(short_edge_length=image_size, max_size=image_size), -] diff --git a/detectron2/setup.cfg b/detectron2/setup.cfg deleted file mode 100644 index f127d7ba0575e80cc08d35e71272756aff840ee2..0000000000000000000000000000000000000000 --- a/detectron2/setup.cfg +++ /dev/null @@ -1,26 +0,0 @@ -[isort] -line_length=100 -multi_line_output=3 -include_trailing_comma=True -known_standard_library=numpy,setuptools,mock -skip=./datasets,docs -skip_glob=*/__init__.py,**/configs/**,**/tests/config/** -known_myself=detectron2 -known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx,panopticapi,black,isort,av,iopath,omegaconf,hydra,yaml,pydoc,submitit,cloudpickle,packaging,timm,pandas,fairscale,pytorch3d,pytorch_lightning -no_lines_before=STDLIB,THIRDPARTY -sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER -default_section=FIRSTPARTY - -[mypy] -python_version=3.7 -ignore_missing_imports = True -warn_unused_configs = True -disallow_untyped_defs = True -check_untyped_defs = True -warn_unused_ignores = True -warn_redundant_casts = True -show_column_numbers = True -follow_imports = silent -allow_redefinition = True -; Require all functions to be annotated -disallow_incomplete_defs = True diff --git a/detectron2/setup.py b/detectron2/setup.py deleted file mode 100644 index b51307a625e7102d19cb18fb0cd555f9bc3ba36f..0000000000000000000000000000000000000000 --- a/detectron2/setup.py +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. - -import glob -import os -import shutil -from os import path -from setuptools import find_packages, setup -from typing import List -import torch -from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension - -torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] -assert torch_ver >= [1, 8], "Requires PyTorch >= 1.8" - - -def get_version(): - init_py_path = path.join(path.abspath(path.dirname(__file__)), "detectron2", "__init__.py") - init_py = open(init_py_path, "r").readlines() - version_line = [l.strip() for l in init_py if l.startswith("__version__")][0] - version = version_line.split("=")[-1].strip().strip("'\"") - - # The following is used to build release packages. - # Users should never use it. - suffix = os.getenv("D2_VERSION_SUFFIX", "") - version = version + suffix - if os.getenv("BUILD_NIGHTLY", "0") == "1": - from datetime import datetime - - date_str = datetime.today().strftime("%y%m%d") - version = version + ".dev" + date_str - - new_init_py = [l for l in init_py if not l.startswith("__version__")] - new_init_py.append('__version__ = "{}"\n'.format(version)) - with open(init_py_path, "w") as f: - f.write("".join(new_init_py)) - return version - - -def get_extensions(): - this_dir = path.dirname(path.abspath(__file__)) - extensions_dir = path.join(this_dir, "detectron2", "layers", "csrc") - - main_source = path.join(extensions_dir, "vision.cpp") - sources = glob.glob(path.join(extensions_dir, "**", "*.cpp")) - - from torch.utils.cpp_extension import ROCM_HOME - - is_rocm_pytorch = ( - True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False - ) - if is_rocm_pytorch: - assert torch_ver >= [1, 8], "ROCM support requires PyTorch >= 1.8!" - - # common code between cuda and rocm platforms, for hipify version [1,0,0] and later. - source_cuda = glob.glob(path.join(extensions_dir, "**", "*.cu")) + glob.glob( - path.join(extensions_dir, "*.cu") - ) - sources = [main_source] + sources - - extension = CppExtension - - extra_compile_args = {"cxx": []} - define_macros = [] - - if (torch.cuda.is_available() and ((CUDA_HOME is not None) or is_rocm_pytorch)) or os.getenv( - "FORCE_CUDA", "0" - ) == "1": - extension = CUDAExtension - sources += source_cuda - - if not is_rocm_pytorch: - define_macros += [("WITH_CUDA", None)] - extra_compile_args["nvcc"] = [ - "-O3", - "-DCUDA_HAS_FP16=1", - "-D__CUDA_NO_HALF_OPERATORS__", - "-D__CUDA_NO_HALF_CONVERSIONS__", - "-D__CUDA_NO_HALF2_OPERATORS__", - ] - else: - define_macros += [("WITH_HIP", None)] - extra_compile_args["nvcc"] = [] - - nvcc_flags_env = os.getenv("NVCC_FLAGS", "") - if nvcc_flags_env != "": - extra_compile_args["nvcc"].extend(nvcc_flags_env.split(" ")) - - if torch_ver < [1, 7]: - # supported by https://github.com/pytorch/pytorch/pull/43931 - CC = os.environ.get("CC", None) - if CC is not None: - extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) - - include_dirs = [extensions_dir] - - ext_modules = [ - extension( - "detectron2._C", - sources, - include_dirs=include_dirs, - define_macros=define_macros, - extra_compile_args=extra_compile_args, - ) - ] - - return ext_modules - - -def get_model_zoo_configs() -> List[str]: - """ - Return a list of configs to include in package for model zoo. Copy over these configs inside - detectron2/model_zoo. - """ - - # Use absolute paths while symlinking. - source_configs_dir = path.join(path.dirname(path.realpath(__file__)), "configs") - destination = path.join( - path.dirname(path.realpath(__file__)), "detectron2", "model_zoo", "configs" - ) - # Symlink the config directory inside package to have a cleaner pip install. - - # Remove stale symlink/directory from a previous build. - if path.exists(source_configs_dir): - if path.islink(destination): - os.unlink(destination) - elif path.isdir(destination): - shutil.rmtree(destination) - - if not path.exists(destination): - try: - os.symlink(source_configs_dir, destination) - except OSError: - # Fall back to copying if symlink fails: ex. on Windows. - shutil.copytree(source_configs_dir, destination) - - config_paths = glob.glob("configs/**/*.yaml", recursive=True) + glob.glob( - "configs/**/*.py", recursive=True - ) - return config_paths - - -# For projects that are relative small and provide features that are very close -# to detectron2's core functionalities, we install them under detectron2.projects -PROJECTS = { - "detectron2.projects.point_rend": "projects/PointRend/point_rend", - "detectron2.projects.deeplab": "projects/DeepLab/deeplab", - "detectron2.projects.panoptic_deeplab": "projects/Panoptic-DeepLab/panoptic_deeplab", -} - -setup( - name="detectron2", - version=get_version(), - author="FAIR", - url="https://github.com/facebookresearch/detectron2", - description="Detectron2 is FAIR's next-generation research " - "platform for object detection and segmentation.", - packages=find_packages(exclude=("configs", "tests*")) + list(PROJECTS.keys()), - package_dir=PROJECTS, - package_data={"detectron2.model_zoo": get_model_zoo_configs()}, - python_requires=">=3.7", - install_requires=[ - # These dependencies are not pure-python. - # In general, avoid adding dependencies that are not pure-python because they are not - # guaranteed to be installable by `pip install` on all platforms. - "Pillow>=7.1", # or use pillow-simd for better performance - "matplotlib", # TODO move it to optional after we add opencv visualization - "pycocotools>=2.0.2", # corresponds to https://github.com/ppwwyyxx/cocoapi - # Do not add opencv here. Just like pytorch, user should install - # opencv themselves, preferrably by OS's package manager, or by - # choosing the proper pypi package name at https://github.com/skvark/opencv-python - # Also, avoid adding dependencies that transitively depend on pytorch or opencv. - # ------------------------------------------------------------ - # The following are pure-python dependencies that should be easily installable. - # But still be careful when adding more: fewer people are able to use the software - # with every new dependency added. - "termcolor>=1.1", - "yacs>=0.1.8", - "tabulate", - "cloudpickle", - "tqdm>4.29.0", - "tensorboard", - # Lock version of fvcore/iopath because they may have breaking changes - # NOTE: when updating fvcore/iopath version, make sure fvcore depends - # on compatible version of iopath. - "fvcore>=0.1.5,<0.1.6", # required like this to make it pip installable - "iopath>=0.1.7,<0.1.10", - "dataclasses; python_version<'3.7'", - "omegaconf>=2.1,<2.4", - "hydra-core>=1.1", - "black", - "packaging", - # NOTE: When adding new dependencies, if it is required at import time (in addition - # to runtime), it probably needs to appear in docs/requirements.txt, or as a mock - # in docs/conf.py - ], - extras_require={ - # optional dependencies, required by some features - "all": [ - "fairscale", - "timm", # Used by a few ViT models. - "scipy>1.5.1", - "shapely", - "pygments>=2.2", - "psutil", - "panopticapi @ https://github.com/cocodataset/panopticapi/archive/master.zip", - ], - # dev dependencies. Install them by `pip install 'detectron2[dev]'` - "dev": [ - "flake8==3.8.1", - "isort==4.3.21", - "flake8-bugbear", - "flake8-comprehensions", - "black==22.3.0", - ], - }, - ext_modules=get_extensions(), - cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, -) diff --git a/detectron2/tests/README.md b/detectron2/tests/README.md deleted file mode 100644 index f560384045ab4f6bc2beabef1170308fca117eb3..0000000000000000000000000000000000000000 --- a/detectron2/tests/README.md +++ /dev/null @@ -1,9 +0,0 @@ -## Unit Tests - -To run the unittests, do: -``` -cd detectron2 -python -m unittest discover -v -s ./tests -``` - -There are also end-to-end inference & training tests, in [dev/run_*_tests.sh](../dev). diff --git a/detectron2/tests/__init__.py b/detectron2/tests/__init__.py deleted file mode 100644 index 9020c2df23e2af280b7bb168b996ae9eaf312eb8..0000000000000000000000000000000000000000 --- a/detectron2/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. diff --git a/detectron2/tests/config/dir1/bad_import.py b/detectron2/tests/config/dir1/bad_import.py deleted file mode 100644 index d7452c4dfc211223c946f22df7a2eb6bdc2cd829..0000000000000000000000000000000000000000 --- a/detectron2/tests/config/dir1/bad_import.py +++ /dev/null @@ -1,2 +0,0 @@ -# import from directory is not allowed -from . import dir1a diff --git a/detectron2/tests/config/dir1/bad_import2.py b/detectron2/tests/config/dir1/bad_import2.py deleted file mode 100644 index 085a4dfa84a28b92f7d515e1911ac2cc12cbbf7d..0000000000000000000000000000000000000000 --- a/detectron2/tests/config/dir1/bad_import2.py +++ /dev/null @@ -1 +0,0 @@ -from .does_not_exist import x diff --git a/detectron2/tests/config/dir1/dir1_a.py b/detectron2/tests/config/dir1/dir1_a.py deleted file mode 100644 index a939955124556355524f48c0f0c16abb07cfc4c4..0000000000000000000000000000000000000000 --- a/detectron2/tests/config/dir1/dir1_a.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -dir1a_str = "base_a_1" -dir1a_dict = {"a": 1, "b": 2} diff --git a/detectron2/tests/config/dir1/dir1_b.py b/detectron2/tests/config/dir1/dir1_b.py deleted file mode 100644 index 2dcb54cb1054c5d80ccc823af21f13b9ebbcf1a3..0000000000000000000000000000000000000000 --- a/detectron2/tests/config/dir1/dir1_b.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from detectron2.config import LazyConfig - -# equivalent to relative import -dir1a_str, dir1a_dict = LazyConfig.load_rel("dir1_a.py", ("dir1a_str", "dir1a_dict")) - -dir1b_str = dir1a_str + "_from_b" -dir1b_dict = dir1a_dict - -# Every import is a reload: not modified by other config files -assert dir1a_dict.a == 1 diff --git a/detectron2/tests/config/dir1/load_rel.py b/detectron2/tests/config/dir1/load_rel.py deleted file mode 100644 index 22d10db7fe28ad66819aeb8e991f129301095ea1..0000000000000000000000000000000000000000 --- a/detectron2/tests/config/dir1/load_rel.py +++ /dev/null @@ -1,5 +0,0 @@ -# test that load_rel can work -from detectron2.config import LazyConfig - -x = LazyConfig.load_rel("dir1_a.py", "dir1a_dict") -assert x["a"] == 1 diff --git a/detectron2/tests/config/root_cfg.py b/detectron2/tests/config/root_cfg.py deleted file mode 100644 index 33d1d4bd2d9ddf31d55c655c49d13a8b7ac7b376..0000000000000000000000000000000000000000 --- a/detectron2/tests/config/root_cfg.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from itertools import count - -from detectron2.config import LazyCall as L - -from .dir1.dir1_a import dir1a_dict, dir1a_str - -dir1a_dict.a = "modified" - -# modification above won't affect future imports -from .dir1.dir1_b import dir1b_dict, dir1b_str - - -lazyobj = L(count)(x=dir1a_str, y=dir1b_str) diff --git a/detectron2/tests/config/test_instantiate_config.py b/detectron2/tests/config/test_instantiate_config.py deleted file mode 100644 index 6b728943ada9bc20af5a60fbe2b3ea58d804a362..0000000000000000000000000000000000000000 --- a/detectron2/tests/config/test_instantiate_config.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import os -import tempfile -import unittest -import yaml -from omegaconf import OmegaConf -from omegaconf import __version__ as oc_version -from dataclasses import dataclass - -from detectron2.config import LazyConfig, instantiate, LazyCall as L -from detectron2.layers import ShapeSpec -from detectron2.utils.testing import reload_lazy_config - -OC_VERSION = tuple(int(x) for x in oc_version.split(".")[:2]) - - -class TestClass: - def __init__(self, int_arg, list_arg=None, dict_arg=None, extra_arg=None): - self.int_arg = int_arg - self.list_arg = list_arg - self.dict_arg = dict_arg - self.extra_arg = extra_arg - - def __call__(self, call_arg): - return call_arg + self.int_arg - - -@unittest.skipIf(OC_VERSION < (2, 1), "omegaconf version too old") -class TestConstruction(unittest.TestCase): - def test_basic_construct(self): - cfg = L(TestClass)( - int_arg=3, - list_arg=[10], - dict_arg={}, - extra_arg=L(TestClass)(int_arg=4, list_arg="${..list_arg}"), - ) - - for x in [cfg, reload_lazy_config(cfg)]: - obj = instantiate(x) - self.assertIsInstance(obj, TestClass) - self.assertEqual(obj.int_arg, 3) - self.assertEqual(obj.extra_arg.int_arg, 4) - self.assertEqual(obj.extra_arg.list_arg, obj.list_arg) - - # Test interpolation - x.extra_arg.list_arg = [5] - obj = instantiate(x) - self.assertIsInstance(obj, TestClass) - self.assertEqual(obj.extra_arg.list_arg, [5]) - - def test_instantiate_other_obj(self): - # do nothing for other obj - self.assertEqual(instantiate(5), 5) - x = [3, 4, 5] - self.assertEqual(instantiate(x), x) - x = TestClass(1) - self.assertIs(instantiate(x), x) - x = {"xx": "yy"} - self.assertIs(instantiate(x), x) - - def test_instantiate_lazy_target(self): - # _target_ is result of instantiate - objconf = L(L(len)(int_arg=3))(call_arg=4) - objconf._target_._target_ = TestClass - self.assertEqual(instantiate(objconf), 7) - - def test_instantiate_list(self): - lst = [1, 2, L(TestClass)(int_arg=1)] - x = L(TestClass)(int_arg=lst) # list as an argument should be recursively instantiated - x = instantiate(x).int_arg - self.assertEqual(x[:2], [1, 2]) - self.assertIsInstance(x[2], TestClass) - self.assertEqual(x[2].int_arg, 1) - - def test_instantiate_dataclass(self): - cfg = L(ShapeSpec)(channels=1, width=3) - # Test original cfg as well as serialization - for x in [cfg, reload_lazy_config(cfg)]: - obj = instantiate(x) - self.assertIsInstance(obj, ShapeSpec) - self.assertEqual(obj.channels, 1) - self.assertEqual(obj.height, None) - - def test_instantiate_dataclass_as_subconfig(self): - cfg = L(TestClass)(int_arg=1, extra_arg=ShapeSpec(channels=1, width=3)) - # Test original cfg as well as serialization - for x in [cfg, reload_lazy_config(cfg)]: - obj = instantiate(x) - self.assertIsInstance(obj.extra_arg, ShapeSpec) - self.assertEqual(obj.extra_arg.channels, 1) - self.assertEqual(obj.extra_arg.height, None) - - def test_bad_lazycall(self): - with self.assertRaises(Exception): - L(3) - - def test_interpolation(self): - cfg = L(TestClass)(int_arg=3, extra_arg="${int_arg}") - - cfg.int_arg = 4 - obj = instantiate(cfg) - self.assertEqual(obj.extra_arg, 4) - - # Test that interpolation still works after serialization - cfg = reload_lazy_config(cfg) - cfg.int_arg = 5 - obj = instantiate(cfg) - self.assertEqual(obj.extra_arg, 5) diff --git a/detectron2/tests/config/test_lazy_config.py b/detectron2/tests/config/test_lazy_config.py deleted file mode 100644 index cdab59c13f5b5bce1839957cf74b0bfbc3790258..0000000000000000000000000000000000000000 --- a/detectron2/tests/config/test_lazy_config.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import os -import unittest -import tempfile -from itertools import count - -from detectron2.config import LazyConfig, LazyCall as L -from omegaconf import DictConfig - - -class TestLazyPythonConfig(unittest.TestCase): - def setUp(self): - self.curr_dir = os.path.dirname(__file__) - self.root_filename = os.path.join(self.curr_dir, "root_cfg.py") - - def test_load(self): - cfg = LazyConfig.load(self.root_filename) - - self.assertEqual(cfg.dir1a_dict.a, "modified") - self.assertEqual(cfg.dir1b_dict.a, 1) - self.assertEqual(cfg.lazyobj.x, "base_a_1") - - cfg.lazyobj.x = "new_x" - # reload - cfg = LazyConfig.load(self.root_filename) - self.assertEqual(cfg.lazyobj.x, "base_a_1") - - def test_save_load(self): - cfg = LazyConfig.load(self.root_filename) - with tempfile.TemporaryDirectory(prefix="detectron2") as d: - fname = os.path.join(d, "test_config.yaml") - LazyConfig.save(cfg, fname) - cfg2 = LazyConfig.load(fname) - - self.assertEqual(cfg2.lazyobj._target_, "itertools.count") - self.assertEqual(cfg.lazyobj._target_, count) - cfg2.lazyobj.pop("_target_") - cfg.lazyobj.pop("_target_") - # the rest are equal - self.assertEqual(cfg, cfg2) - - def test_failed_save(self): - cfg = DictConfig({"x": lambda: 3}, flags={"allow_objects": True}) - with tempfile.TemporaryDirectory(prefix="detectron2") as d: - fname = os.path.join(d, "test_config.yaml") - LazyConfig.save(cfg, fname) - self.assertTrue(os.path.exists(fname)) - self.assertTrue(os.path.exists(fname + ".pkl")) - - def test_overrides(self): - cfg = LazyConfig.load(self.root_filename) - LazyConfig.apply_overrides(cfg, ["lazyobj.x=123", 'dir1b_dict.a="123"']) - self.assertEqual(cfg.dir1b_dict.a, "123") - self.assertEqual(cfg.lazyobj.x, 123) - - LazyConfig.apply_overrides(cfg, ["dir1b_dict.a='abc'"]) - self.assertEqual(cfg.dir1b_dict.a, "abc") - - def test_invalid_overrides(self): - cfg = LazyConfig.load(self.root_filename) - with self.assertRaises(KeyError): - LazyConfig.apply_overrides(cfg, ["lazyobj.x.xxx=123"]) - - def test_to_py(self): - cfg = LazyConfig.load(self.root_filename) - cfg.lazyobj.x = {"a": 1, "b": 2, "c": L(count)(x={"r": "a", "s": 2.4, "t": [1, 2, 3, "z"]})} - cfg.list = ["a", 1, "b", 3.2] - py_str = LazyConfig.to_py(cfg) - expected = """cfg.dir1a_dict.a = "modified" -cfg.dir1a_dict.b = 2 -cfg.dir1b_dict.a = 1 -cfg.dir1b_dict.b = 2 -cfg.lazyobj = itertools.count( - x={ - "a": 1, - "b": 2, - "c": itertools.count(x={"r": "a", "s": 2.4, "t": [1, 2, 3, "z"]}), - }, - y="base_a_1_from_b", -) -cfg.list = ["a", 1, "b", 3.2] -""" - self.assertEqual(py_str, expected) - - def test_bad_import(self): - file = os.path.join(self.curr_dir, "dir1", "bad_import.py") - with self.assertRaisesRegex(ImportError, "relative import"): - LazyConfig.load(file) - - def test_bad_import2(self): - file = os.path.join(self.curr_dir, "dir1", "bad_import2.py") - with self.assertRaisesRegex(ImportError, "not exist"): - LazyConfig.load(file) - - def test_load_rel(self): - file = os.path.join(self.curr_dir, "dir1", "load_rel.py") - cfg = LazyConfig.load(file) - self.assertIn("x", cfg) diff --git a/detectron2/tests/config/test_yacs_config.py b/detectron2/tests/config/test_yacs_config.py deleted file mode 100644 index 01dd6955f78e2700ffc10ed723ab1c95df0e5a18..0000000000000000000000000000000000000000 --- a/detectron2/tests/config/test_yacs_config.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. - - -import os -import tempfile -import unittest -import torch -from omegaconf import OmegaConf - -from detectron2 import model_zoo -from detectron2.config import configurable, downgrade_config, get_cfg, upgrade_config -from detectron2.layers import ShapeSpec -from detectron2.modeling import build_model - -_V0_CFG = """ -MODEL: - RPN_HEAD: - NAME: "TEST" -VERSION: 0 -""" - -_V1_CFG = """ -MODEL: - WEIGHT: "/path/to/weight" -""" - - -class TestConfigVersioning(unittest.TestCase): - def test_upgrade_downgrade_consistency(self): - cfg = get_cfg() - # check that custom is preserved - cfg.USER_CUSTOM = 1 - - down = downgrade_config(cfg, to_version=0) - up = upgrade_config(down) - self.assertTrue(up == cfg) - - def _merge_cfg_str(self, cfg, merge_str): - f = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) - try: - f.write(merge_str) - f.close() - cfg.merge_from_file(f.name) - finally: - os.remove(f.name) - return cfg - - def test_auto_upgrade(self): - cfg = get_cfg() - latest_ver = cfg.VERSION - cfg.USER_CUSTOM = 1 - - self._merge_cfg_str(cfg, _V0_CFG) - - self.assertEqual(cfg.MODEL.RPN.HEAD_NAME, "TEST") - self.assertEqual(cfg.VERSION, latest_ver) - - def test_guess_v1(self): - cfg = get_cfg() - latest_ver = cfg.VERSION - self._merge_cfg_str(cfg, _V1_CFG) - self.assertEqual(cfg.VERSION, latest_ver) - - -class _TestClassA(torch.nn.Module): - @configurable - def __init__(self, arg1, arg2, arg3=3): - super().__init__() - self.arg1 = arg1 - self.arg2 = arg2 - self.arg3 = arg3 - assert arg1 == 1 - assert arg2 == 2 - assert arg3 == 3 - - @classmethod - def from_config(cls, cfg): - args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2} - return args - - -class _TestClassB(_TestClassA): - @configurable - def __init__(self, input_shape, arg1, arg2, arg3=3): - """ - Doc of _TestClassB - """ - assert input_shape == "shape" - super().__init__(arg1, arg2, arg3) - - @classmethod - def from_config(cls, cfg, input_shape): # test extra positional arg in from_config - args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2} - args["input_shape"] = input_shape - return args - - -class _LegacySubClass(_TestClassB): - # an old subclass written in cfg style - def __init__(self, cfg, input_shape, arg4=4): - super().__init__(cfg, input_shape) - assert self.arg1 == 1 - assert self.arg2 == 2 - assert self.arg3 == 3 - - -class _NewSubClassNewInit(_TestClassB): - # test new subclass with a new __init__ - @configurable - def __init__(self, input_shape, arg4=4, **kwargs): - super().__init__(input_shape, **kwargs) - assert self.arg1 == 1 - assert self.arg2 == 2 - assert self.arg3 == 3 - - -class _LegacySubClassNotCfg(_TestClassB): - # an old subclass written in cfg style, but argument is not called "cfg" - def __init__(self, config, input_shape): - super().__init__(config, input_shape) - assert self.arg1 == 1 - assert self.arg2 == 2 - assert self.arg3 == 3 - - -class _TestClassC(_TestClassB): - @classmethod - def from_config(cls, cfg, input_shape, **kwargs): # test extra kwarg overwrite - args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2} - args["input_shape"] = input_shape - args.update(kwargs) - return args - - -class _TestClassD(_TestClassA): - @configurable - def __init__(self, input_shape: ShapeSpec, arg1: int, arg2, arg3=3): - assert input_shape == "shape" - super().__init__(arg1, arg2, arg3) - - # _TestClassA.from_config does not have input_shape args. - # Test whether input_shape will be forwarded to __init__ - - -@configurable(from_config=lambda cfg, arg2: {"arg1": cfg.ARG1, "arg2": arg2, "arg3": cfg.ARG3}) -def _test_func(arg1, arg2=2, arg3=3, arg4=4): - return arg1, arg2, arg3, arg4 - - -class TestConfigurable(unittest.TestCase): - def testInitWithArgs(self): - _ = _TestClassA(arg1=1, arg2=2, arg3=3) - _ = _TestClassB("shape", arg1=1, arg2=2) - _ = _TestClassC("shape", arg1=1, arg2=2) - _ = _TestClassD("shape", arg1=1, arg2=2, arg3=3) - - def testPatchedAttr(self): - self.assertTrue("Doc" in _TestClassB.__init__.__doc__) - self.assertEqual(_TestClassD.__init__.__annotations__["arg1"], int) - - def testInitWithCfg(self): - cfg = get_cfg() - cfg.ARG1 = 1 - cfg.ARG2 = 2 - cfg.ARG3 = 3 - _ = _TestClassA(cfg) - _ = _TestClassB(cfg, input_shape="shape") - _ = _TestClassC(cfg, input_shape="shape") - _ = _TestClassD(cfg, input_shape="shape") - _ = _LegacySubClass(cfg, input_shape="shape") - _ = _NewSubClassNewInit(cfg, input_shape="shape") - _ = _LegacySubClassNotCfg(cfg, input_shape="shape") - with self.assertRaises(TypeError): - # disallow forwarding positional args to __init__ since it's prone to errors - _ = _TestClassD(cfg, "shape") - - # call with kwargs instead - _ = _TestClassA(cfg=cfg) - _ = _TestClassB(cfg=cfg, input_shape="shape") - _ = _TestClassC(cfg=cfg, input_shape="shape") - _ = _TestClassD(cfg=cfg, input_shape="shape") - _ = _LegacySubClass(cfg=cfg, input_shape="shape") - _ = _NewSubClassNewInit(cfg=cfg, input_shape="shape") - _ = _LegacySubClassNotCfg(config=cfg, input_shape="shape") - - def testInitWithCfgOverwrite(self): - cfg = get_cfg() - cfg.ARG1 = 1 - cfg.ARG2 = 999 # wrong config - with self.assertRaises(AssertionError): - _ = _TestClassA(cfg, arg3=3) - - # overwrite arg2 with correct config later: - _ = _TestClassA(cfg, arg2=2, arg3=3) - _ = _TestClassB(cfg, input_shape="shape", arg2=2, arg3=3) - _ = _TestClassC(cfg, input_shape="shape", arg2=2, arg3=3) - _ = _TestClassD(cfg, input_shape="shape", arg2=2, arg3=3) - - # call with kwargs cfg=cfg instead - _ = _TestClassA(cfg=cfg, arg2=2, arg3=3) - _ = _TestClassB(cfg=cfg, input_shape="shape", arg2=2, arg3=3) - _ = _TestClassC(cfg=cfg, input_shape="shape", arg2=2, arg3=3) - _ = _TestClassD(cfg=cfg, input_shape="shape", arg2=2, arg3=3) - - def testInitWithCfgWrongArgs(self): - cfg = get_cfg() - cfg.ARG1 = 1 - cfg.ARG2 = 2 - with self.assertRaises(TypeError): - _ = _TestClassB(cfg, "shape", not_exist=1) - with self.assertRaises(TypeError): - _ = _TestClassC(cfg, "shape", not_exist=1) - with self.assertRaises(TypeError): - _ = _TestClassD(cfg, "shape", not_exist=1) - - def testBadClass(self): - class _BadClass1: - @configurable - def __init__(self, a=1, b=2): - pass - - class _BadClass2: - @configurable - def __init__(self, a=1, b=2): - pass - - def from_config(self, cfg): # noqa - pass - - class _BadClass3: - @configurable - def __init__(self, a=1, b=2): - pass - - # bad name: must be cfg - @classmethod - def from_config(cls, config): # noqa - pass - - with self.assertRaises(AttributeError): - _ = _BadClass1(a=1) - - with self.assertRaises(TypeError): - _ = _BadClass2(a=1) - - with self.assertRaises(TypeError): - _ = _BadClass3(get_cfg()) - - def testFuncWithCfg(self): - cfg = get_cfg() - cfg.ARG1 = 10 - cfg.ARG3 = 30 - - self.assertEqual(_test_func(1), (1, 2, 3, 4)) - with self.assertRaises(TypeError): - _test_func(cfg) - self.assertEqual(_test_func(cfg, arg2=2), (10, 2, 30, 4)) - self.assertEqual(_test_func(cfg, arg1=100, arg2=20), (100, 20, 30, 4)) - self.assertEqual(_test_func(cfg, arg1=100, arg2=20, arg4=40), (100, 20, 30, 40)) - - self.assertTrue(callable(_test_func.from_config)) - - def testOmegaConf(self): - cfg = model_zoo.get_config("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml") - cfg = OmegaConf.create(cfg.dump()) - if not torch.cuda.is_available(): - cfg.MODEL.DEVICE = "cpu" - # test that a model can be built with omegaconf config as well - build_model(cfg) diff --git a/detectron2/tests/data/__init__.py b/detectron2/tests/data/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/detectron2/tests/data/test_coco.py b/detectron2/tests/data/test_coco.py deleted file mode 100644 index caabead5527639056daeef71027a69c47ee2ebf7..0000000000000000000000000000000000000000 --- a/detectron2/tests/data/test_coco.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import json -import numpy as np -import os -import tempfile -import unittest -import pycocotools.mask as mask_util - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.data.datasets.coco import convert_to_coco_dict, load_coco_json -from detectron2.structures import BoxMode - - -def make_mask(): - """ - Makes a donut shaped binary mask. - """ - H = 100 - W = 100 - mask = np.zeros([H, W], dtype=np.uint8) - for x in range(W): - for y in range(H): - d = np.linalg.norm(np.array([W, H]) / 2 - np.array([x, y])) - if d > 10 and d < 20: - mask[y, x] = 1 - return mask - - -def uncompressed_rle(mask): - l = mask.flatten(order="F").tolist() - counts = [] - p = False - cnt = 0 - for i in l: - if i == p: - cnt += 1 - else: - counts.append(cnt) - p = i - cnt = 1 - counts.append(cnt) - return {"counts": counts, "size": [mask.shape[0], mask.shape[1]]} - - -def make_dataset_dicts(mask, compressed: bool = True): - """ - Returns a list of dicts that represents a single COCO data point for - object detection. The single instance given by `mask` is represented by - RLE, either compressed or uncompressed. - """ - record = {} - record["file_name"] = "test" - record["image_id"] = 0 - record["height"] = mask.shape[0] - record["width"] = mask.shape[1] - - y, x = np.nonzero(mask) - if compressed: - segmentation = mask_util.encode(np.asarray(mask, order="F")) - else: - segmentation = uncompressed_rle(mask) - min_x = np.min(x) - max_x = np.max(x) - min_y = np.min(y) - max_y = np.max(y) - obj = { - "bbox": [min_x, min_y, max_x, max_y], - "bbox_mode": BoxMode.XYXY_ABS, - "category_id": 0, - "iscrowd": 0, - "segmentation": segmentation, - } - record["annotations"] = [obj] - return [record] - - -class TestRLEToJson(unittest.TestCase): - def test(self): - # Make a dummy dataset. - mask = make_mask() - DatasetCatalog.register("test_dataset", lambda: make_dataset_dicts(mask)) - MetadataCatalog.get("test_dataset").set(thing_classes=["test_label"]) - - # Dump to json. - json_dict = convert_to_coco_dict("test_dataset") - with tempfile.TemporaryDirectory() as tmpdir: - json_file_name = os.path.join(tmpdir, "test.json") - with open(json_file_name, "w") as f: - json.dump(json_dict, f) - # Load from json. - dicts = load_coco_json(json_file_name, "") - - # Check the loaded mask matches the original. - anno = dicts[0]["annotations"][0] - loaded_mask = mask_util.decode(anno["segmentation"]) - self.assertTrue(np.array_equal(loaded_mask, mask)) - DatasetCatalog.pop("test_dataset") - MetadataCatalog.pop("test_dataset") - - def test_uncompressed_RLE(self): - mask = make_mask() - rle = mask_util.encode(np.asarray(mask, order="F")) - uncompressed = uncompressed_rle(mask) - compressed = mask_util.frPyObjects(uncompressed, *rle["size"]) - self.assertEqual(rle, compressed) - - -class TestConvertCOCO(unittest.TestCase): - @staticmethod - def generate_data(): - record = { - "file_name": "test", - "image_id": 0, - "height": 100, - "width": 100, - "annotations": [ - { - "bbox": [10, 10, 10, 10, 5], - "bbox_mode": BoxMode.XYWHA_ABS, - "category_id": 0, - "iscrowd": 0, - }, - { - "bbox": [15, 15, 3, 3], - "bbox_mode": BoxMode.XYXY_ABS, - "category_id": 0, - "iscrowd": 0, - }, - ], - } - - return [record] - - def test_convert_to_coco(self): - DatasetCatalog.register("test_dataset", lambda: TestConvertCOCO.generate_data()) - MetadataCatalog.get("test_dataset").set(thing_classes=["test_label"]) - convert_to_coco_dict("test_dataset") - DatasetCatalog.pop("test_dataset") - MetadataCatalog.pop("test_dataset") diff --git a/detectron2/tests/data/test_coco_evaluation.py b/detectron2/tests/data/test_coco_evaluation.py deleted file mode 100644 index 964f00284df64d3378ebfe32913c07deb5a1f819..0000000000000000000000000000000000000000 --- a/detectron2/tests/data/test_coco_evaluation.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import contextlib -import copy -import io -import json -import numpy as np -import os -import tempfile -import unittest -import torch -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - -from detectron2.data import DatasetCatalog -from detectron2.evaluation import COCOEvaluator -from detectron2.evaluation.fast_eval_api import COCOeval_opt -from detectron2.structures import Boxes, Instances - - -class TestCOCOeval(unittest.TestCase): - def test_fast_eval(self): - # A small set of images/categories from COCO val - # fmt: off - detections = [{"image_id": 139, "category_id": 1, "bbox": [417.3332824707031, 159.27003479003906, 47.66064453125, 143.00193786621094], "score": 0.9949821829795837, "segmentation": {"size": [426, 640], "counts": "Tc`52W=3N0N4aNN^E7]:4XE1g:8kDMT;U100000001O1gE[Nk8h1dFiNY9Z1aFkN]9g2J3NdN`FlN`9S1cFRN07]9g1bFoM6;X9c1cFoM=8R9g1bFQN>3U9Y30O01OO1O001N2O1N1O4L4L5UNoE3V:CVF6Q:@YF9l9@ZF 0 else 0.0 - msg = "%s: comparing COCO APIs, %s differs by %f" % (name, k, abs_diff) - self.assertTrue(abs_diff < 1e-4, msg=msg) - - def test_unknown_category(self): - dataset = "coco_2017_val_100" - evaluator = COCOEvaluator(dataset) - evaluator.reset() - inputs = DatasetCatalog.get(dataset)[:2] - pred = Instances((100, 100)) - pred.pred_boxes = Boxes(torch.rand(2, 4)) - pred.scores = torch.rand(2) - pred.pred_classes = torch.tensor([10, 80]) - output = {"instances": pred} - evaluator.process(inputs, [output, output]) - with self.assertRaises(AssertionError): - evaluator.evaluate() diff --git a/detectron2/tests/data/test_dataset.py b/detectron2/tests/data/test_dataset.py deleted file mode 100644 index b412ca760ab4e47d2a93b975decedbdf8578ca3f..0000000000000000000000000000000000000000 --- a/detectron2/tests/data/test_dataset.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import os -import pickle -import sys -import unittest -from functools import partial -import torch -from iopath.common.file_io import LazyPath - -from detectron2 import model_zoo -from detectron2.config import get_cfg, instantiate -from detectron2.data import ( - DatasetCatalog, - DatasetFromList, - MapDataset, - ToIterableDataset, - build_batch_data_loader, - build_detection_test_loader, - build_detection_train_loader, -) -from detectron2.data.common import ( - AspectRatioGroupedDataset, - set_default_dataset_from_list_serialize_method, -) -from detectron2.data.samplers import InferenceSampler, TrainingSampler - - -def _a_slow_func(x): - return "path/{}".format(x) - - -class TestDatasetFromList(unittest.TestCase): - # Failing for py3.6, likely due to pickle - @unittest.skipIf(sys.version_info.minor <= 6, "Not supported in Python 3.6") - def test_using_lazy_path(self): - dataset = [] - for i in range(10): - dataset.append({"file_name": LazyPath(partial(_a_slow_func, i))}) - - dataset = DatasetFromList(dataset) - for i in range(10): - path = dataset[i]["file_name"] - self.assertTrue(isinstance(path, LazyPath)) - self.assertEqual(os.fspath(path), _a_slow_func(i)) - - def test_alternative_serialize_method(self): - dataset = [1, 2, 3] - dataset = DatasetFromList(dataset, serialize=torch.tensor) - self.assertEqual(dataset[2], torch.tensor(3)) - - def test_change_default_serialize_method(self): - dataset = [1, 2, 3] - with set_default_dataset_from_list_serialize_method(torch.tensor): - dataset_1 = DatasetFromList(dataset, serialize=True) - self.assertEqual(dataset_1[2], torch.tensor(3)) - dataset_2 = DatasetFromList(dataset, serialize=True) - self.assertEqual(dataset_2[2], 3) - - -class TestMapDataset(unittest.TestCase): - @staticmethod - def map_func(x): - if x == 2: - return None - return x * 2 - - def test_map_style(self): - ds = DatasetFromList([1, 2, 3]) - ds = MapDataset(ds, TestMapDataset.map_func) - self.assertEqual(ds[0], 2) - self.assertEqual(ds[2], 6) - self.assertIn(ds[1], [2, 6]) - - def test_iter_style(self): - class DS(torch.utils.data.IterableDataset): - def __iter__(self): - yield from [1, 2, 3] - - ds = DS() - ds = MapDataset(ds, TestMapDataset.map_func) - self.assertIsInstance(ds, torch.utils.data.IterableDataset) - - data = list(iter(ds)) - self.assertEqual(data, [2, 6]) - - def test_pickleability(self): - ds = DatasetFromList([1, 2, 3]) - ds = MapDataset(ds, lambda x: x * 2) - ds = pickle.loads(pickle.dumps(ds)) - self.assertEqual(ds[0], 2) - - -class TestAspectRatioGrouping(unittest.TestCase): - def test_reiter_leak(self): - data = [(1, 0), (0, 1), (1, 0), (0, 1)] - data = [{"width": a, "height": b} for (a, b) in data] - batchsize = 2 - dataset = AspectRatioGroupedDataset(data, batchsize) - - for _ in range(5): - for idx, __ in enumerate(dataset): - if idx == 1: - # manually break, so the iterator does not stop by itself - break - # check that bucket sizes are valid - for bucket in dataset._buckets: - self.assertLess(len(bucket), batchsize) - - -class _MyData(torch.utils.data.IterableDataset): - def __iter__(self): - while True: - yield 1 - - -class TestDataLoader(unittest.TestCase): - def _get_kwargs(self): - # get kwargs of build_detection_train_loader - cfg = model_zoo.get_config("common/data/coco.py").dataloader.train - cfg.dataset.names = "coco_2017_val_100" - cfg.pop("_target_") - kwargs = {k: instantiate(v) for k, v in cfg.items()} - return kwargs - - def test_build_dataloader_train(self): - kwargs = self._get_kwargs() - dl = build_detection_train_loader(**kwargs) - next(iter(dl)) - - def test_build_iterable_dataloader_train(self): - kwargs = self._get_kwargs() - ds = DatasetFromList(kwargs.pop("dataset")) - ds = ToIterableDataset(ds, TrainingSampler(len(ds))) - dl = build_detection_train_loader(dataset=ds, **kwargs) - next(iter(dl)) - - def test_build_iterable_dataloader_from_cfg(self): - cfg = get_cfg() - cfg.DATASETS.TRAIN = ["iter_data"] - DatasetCatalog.register("iter_data", lambda: _MyData()) - dl = build_detection_train_loader(cfg, mapper=lambda x: x, aspect_ratio_grouping=False) - next(iter(dl)) - - dl = build_detection_test_loader(cfg, "iter_data", mapper=lambda x: x) - next(iter(dl)) - - def _check_is_range(self, data_loader, N): - # check that data_loader produces range(N) - data = list(iter(data_loader)) - data = [x for batch in data for x in batch] # flatten the batches - self.assertEqual(len(data), N) - self.assertEqual(set(data), set(range(N))) - - def test_build_batch_dataloader_inference(self): - # Test that build_batch_data_loader can be used for inference - N = 96 - ds = DatasetFromList(list(range(N))) - sampler = InferenceSampler(len(ds)) - dl = build_batch_data_loader(ds, sampler, 8, num_workers=3) - self._check_is_range(dl, N) - - def test_build_batch_dataloader_inference_incomplete_batch(self): - # Test that build_batch_data_loader works when dataset size is not multiple of - # batch size or num_workers - def _test(N, batch_size, num_workers): - ds = DatasetFromList(list(range(N))) - sampler = InferenceSampler(len(ds)) - - dl = build_batch_data_loader(ds, sampler, batch_size, num_workers=num_workers) - data = list(iter(dl)) - self.assertEqual(len(data), len(dl)) # floor(N / batch_size) - self._check_is_range(dl, N // batch_size * batch_size) - - dl = build_batch_data_loader( - ds, sampler, batch_size, num_workers=num_workers, drop_last=False - ) - data = list(iter(dl)) - self.assertEqual(len(data), len(dl)) # ceil(N / batch_size) - self._check_is_range(dl, N) - - _test(48, batch_size=8, num_workers=3) - _test(47, batch_size=8, num_workers=3) - _test(46, batch_size=8, num_workers=3) - _test(40, batch_size=8, num_workers=3) - _test(39, batch_size=8, num_workers=3) - - def test_build_dataloader_inference(self): - N = 50 - ds = DatasetFromList(list(range(N))) - sampler = InferenceSampler(len(ds)) - # test that parallel loader works correctly - dl = build_detection_test_loader( - dataset=ds, sampler=sampler, mapper=lambda x: x, num_workers=3 - ) - self._check_is_range(dl, N) - - # test that batch_size works correctly - dl = build_detection_test_loader( - dataset=ds, sampler=sampler, mapper=lambda x: x, batch_size=4, num_workers=0 - ) - self._check_is_range(dl, N) - - def test_build_iterable_dataloader_inference(self): - # Test that build_detection_test_loader supports iterable dataset - N = 50 - ds = DatasetFromList(list(range(N))) - ds = ToIterableDataset(ds, InferenceSampler(len(ds))) - dl = build_detection_test_loader(dataset=ds, mapper=lambda x: x, num_workers=3) - self._check_is_range(dl, N) diff --git a/detectron2/tests/data/test_detection_utils.py b/detectron2/tests/data/test_detection_utils.py deleted file mode 100644 index aac56c07da2be4e181e3e95de8cee1fc2858286d..0000000000000000000000000000000000000000 --- a/detectron2/tests/data/test_detection_utils.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import copy -import numpy as np -import os -import unittest -import pycocotools.mask as mask_util - -from detectron2.data import MetadataCatalog, detection_utils -from detectron2.data import transforms as T -from detectron2.structures import BitMasks, BoxMode -from detectron2.utils.file_io import PathManager - - -class TestTransformAnnotations(unittest.TestCase): - def test_transform_simple_annotation(self): - transforms = T.TransformList([T.HFlipTransform(400)]) - anno = { - "bbox": np.asarray([10, 10, 200, 300]), - "bbox_mode": BoxMode.XYXY_ABS, - "category_id": 3, - "segmentation": [[10, 10, 100, 100, 100, 10], [150, 150, 200, 150, 200, 200]], - } - - output = detection_utils.transform_instance_annotations(anno, transforms, (400, 400)) - self.assertTrue(np.allclose(output["bbox"], [200, 10, 390, 300])) - self.assertEqual(len(output["segmentation"]), len(anno["segmentation"])) - self.assertTrue(np.allclose(output["segmentation"][0], [390, 10, 300, 100, 300, 10])) - - detection_utils.annotations_to_instances([output, output], (400, 400)) - - def test_transform_empty_annotation(self): - detection_utils.annotations_to_instances([], (400, 400)) - - def test_flip_keypoints(self): - transforms = T.TransformList([T.HFlipTransform(400)]) - anno = { - "bbox": np.asarray([10, 10, 200, 300]), - "bbox_mode": BoxMode.XYXY_ABS, - "keypoints": np.random.rand(17, 3) * 50 + 15, - } - - output = detection_utils.transform_instance_annotations( - copy.deepcopy(anno), - transforms, - (400, 400), - keypoint_hflip_indices=detection_utils.create_keypoint_hflip_indices( - ["keypoints_coco_2017_train"] - ), - ) - # The first keypoint is nose - self.assertTrue(np.allclose(output["keypoints"][0, 0], 400 - anno["keypoints"][0, 0])) - # The last 16 keypoints are 8 left-right pairs - self.assertTrue( - np.allclose( - output["keypoints"][1:, 0].reshape(-1, 2)[:, ::-1], - 400 - anno["keypoints"][1:, 0].reshape(-1, 2), - ) - ) - self.assertTrue( - np.allclose( - output["keypoints"][1:, 1:].reshape(-1, 2, 2)[:, ::-1, :], - anno["keypoints"][1:, 1:].reshape(-1, 2, 2), - ) - ) - - def test_crop(self): - transforms = T.TransformList([T.CropTransform(300, 300, 10, 10)]) - keypoints = np.random.rand(17, 3) * 50 + 15 - keypoints[:, 2] = 2 - anno = { - "bbox": np.asarray([10, 10, 200, 400]), - "bbox_mode": BoxMode.XYXY_ABS, - "keypoints": keypoints, - } - - output = detection_utils.transform_instance_annotations( - copy.deepcopy(anno), transforms, (10, 10) - ) - # box is shifted and cropped - self.assertTrue((output["bbox"] == np.asarray([0, 0, 0, 10])).all()) - # keypoints are no longer visible - self.assertTrue((output["keypoints"][:, 2] == 0).all()) - - def test_transform_RLE(self): - transforms = T.TransformList([T.HFlipTransform(400)]) - mask = np.zeros((300, 400), order="F").astype("uint8") - mask[:, :200] = 1 - - anno = { - "bbox": np.asarray([10, 10, 200, 300]), - "bbox_mode": BoxMode.XYXY_ABS, - "segmentation": mask_util.encode(mask[:, :, None])[0], - "category_id": 3, - } - output = detection_utils.transform_instance_annotations( - copy.deepcopy(anno), transforms, (300, 400) - ) - mask = output["segmentation"] - self.assertTrue((mask[:, 200:] == 1).all()) - self.assertTrue((mask[:, :200] == 0).all()) - - inst = detection_utils.annotations_to_instances( - [output, output], (400, 400), mask_format="bitmask" - ) - self.assertTrue(isinstance(inst.gt_masks, BitMasks)) - - def test_transform_RLE_resize(self): - transforms = T.TransformList( - [T.HFlipTransform(400), T.ScaleTransform(300, 400, 400, 400, "bilinear")] - ) - mask = np.zeros((300, 400), order="F").astype("uint8") - mask[:, :200] = 1 - - anno = { - "bbox": np.asarray([10, 10, 200, 300]), - "bbox_mode": BoxMode.XYXY_ABS, - "segmentation": mask_util.encode(mask[:, :, None])[0], - "category_id": 3, - } - output = detection_utils.transform_instance_annotations( - copy.deepcopy(anno), transforms, (400, 400) - ) - - inst = detection_utils.annotations_to_instances( - [output, output], (400, 400), mask_format="bitmask" - ) - self.assertTrue(isinstance(inst.gt_masks, BitMasks)) - - def test_gen_crop(self): - instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS} - t = detection_utils.gen_crop_transform_with_instance((10, 10), (150, 150), instance) - # the box center must fall into the cropped region - self.assertTrue(t.x0 <= 55 <= t.x0 + t.w) - - def test_gen_crop_outside_boxes(self): - instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS} - with self.assertRaises(AssertionError): - detection_utils.gen_crop_transform_with_instance((10, 10), (15, 15), instance) - - def test_read_sem_seg(self): - cityscapes_dir = MetadataCatalog.get("cityscapes_fine_sem_seg_val").gt_dir - sem_seg_gt_path = os.path.join( - cityscapes_dir, "frankfurt", "frankfurt_000001_083852_gtFine_labelIds.png" - ) - if not PathManager.exists(sem_seg_gt_path): - raise unittest.SkipTest( - "Semantic segmentation ground truth {} not found.".format(sem_seg_gt_path) - ) - sem_seg = detection_utils.read_image(sem_seg_gt_path, "L") - self.assertEqual(sem_seg.ndim, 3) - self.assertEqual(sem_seg.shape[2], 1) - self.assertEqual(sem_seg.dtype, np.uint8) - self.assertEqual(sem_seg.max(), 32) - self.assertEqual(sem_seg.min(), 1) - - def test_read_exif_orientation(self): - # https://github.com/recurser/exif-orientation-examples/raw/master/Landscape_5.jpg - URL = "detectron2://assets/Landscape_5.jpg" - img = detection_utils.read_image(URL, "RGB") - self.assertEqual(img.ndim, 3) - self.assertEqual(img.dtype, np.uint8) - self.assertEqual(img.shape, (1200, 1800, 3)) # check that shape is not transposed - - def test_opencv_exif_orientation(self): - import cv2 - - URL = "detectron2://assets/Landscape_5.jpg" - with PathManager.open(URL, "rb") as f: - img = cv2.imdecode(np.frombuffer(f.read(), dtype="uint8"), cv2.IMREAD_COLOR) - self.assertEqual(img.dtype, np.uint8) - self.assertEqual(img.shape, (1200, 1800, 3)) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/data/test_rotation_transform.py b/detectron2/tests/data/test_rotation_transform.py deleted file mode 100644 index 0e8299ed78a425c91fc2e43fede0b26461d1c9ff..0000000000000000000000000000000000000000 --- a/detectron2/tests/data/test_rotation_transform.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -import unittest - -from detectron2.data.transforms.transform import RotationTransform - - -class TestRotationTransform(unittest.TestCase): - def assertEqualsArrays(self, a1, a2): - self.assertTrue(np.allclose(a1, a2)) - - def randomData(self, h=5, w=5): - image = np.random.rand(h, w) - coords = np.array([[i, j] for j in range(h + 1) for i in range(w + 1)], dtype=float) - return image, coords, h, w - - def test180(self): - image, coords, h, w = self.randomData(6, 6) - rot = RotationTransform(h, w, 180, expand=False, center=None) - self.assertEqualsArrays(rot.apply_image(image), image[::-1, ::-1]) - rotated_coords = [[w - c[0], h - c[1]] for c in coords] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test45_coords(self): - _, coords, h, w = self.randomData(4, 6) - rot = RotationTransform(h, w, 45, expand=False, center=None) - rotated_coords = [ - [(x + y - (h + w) / 2) / np.sqrt(2) + w / 2, h / 2 + (y + (w - h) / 2 - x) / np.sqrt(2)] - for (x, y) in coords - ] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test90(self): - image, coords, h, w = self.randomData() - rot = RotationTransform(h, w, 90, expand=False, center=None) - self.assertEqualsArrays(rot.apply_image(image), image.T[::-1]) - rotated_coords = [[c[1], w - c[0]] for c in coords] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test90_expand(self): # non-square image - image, coords, h, w = self.randomData(h=5, w=8) - rot = RotationTransform(h, w, 90, expand=True, center=None) - self.assertEqualsArrays(rot.apply_image(image), image.T[::-1]) - rotated_coords = [[c[1], w - c[0]] for c in coords] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test_center_expand(self): - # center has no effect if expand=True because it only affects shifting - image, coords, h, w = self.randomData(h=5, w=8) - angle = np.random.randint(360) - rot1 = RotationTransform(h, w, angle, expand=True, center=None) - rot2 = RotationTransform(h, w, angle, expand=True, center=(0, 0)) - rot3 = RotationTransform(h, w, angle, expand=True, center=(h, w)) - rot4 = RotationTransform(h, w, angle, expand=True, center=(2, 5)) - for r1 in [rot1, rot2, rot3, rot4]: - for r2 in [rot1, rot2, rot3, rot4]: - self.assertEqualsArrays(r1.apply_image(image), r2.apply_image(image)) - self.assertEqualsArrays(r1.apply_coords(coords), r2.apply_coords(coords)) - - def test_inverse_transform(self): - image, coords, h, w = self.randomData(h=5, w=8) - rot = RotationTransform(h, w, 90, expand=True, center=None) - rot_image = rot.apply_image(image) - self.assertEqualsArrays(rot.inverse().apply_image(rot_image), image) - rot = RotationTransform(h, w, 65, expand=True, center=None) - rotated_coords = rot.apply_coords(coords) - self.assertEqualsArrays(rot.inverse().apply_coords(rotated_coords), coords) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/data/test_sampler.py b/detectron2/tests/data/test_sampler.py deleted file mode 100644 index 0d2784390801314862524e1b85703535d199e41d..0000000000000000000000000000000000000000 --- a/detectron2/tests/data/test_sampler.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import itertools -import math -import operator -import unittest -import torch -from torch.utils import data -from torch.utils.data.sampler import SequentialSampler - -from detectron2.data.build import worker_init_reset_seed -from detectron2.data.common import DatasetFromList, ToIterableDataset -from detectron2.data.samplers import ( - GroupedBatchSampler, - InferenceSampler, - RepeatFactorTrainingSampler, - TrainingSampler, -) -from detectron2.utils.env import seed_all_rng - - -class TestGroupedBatchSampler(unittest.TestCase): - def test_missing_group_id(self): - sampler = SequentialSampler(list(range(100))) - group_ids = [1] * 100 - samples = GroupedBatchSampler(sampler, group_ids, 2) - - for mini_batch in samples: - self.assertEqual(len(mini_batch), 2) - - def test_groups(self): - sampler = SequentialSampler(list(range(100))) - group_ids = [1, 0] * 50 - samples = GroupedBatchSampler(sampler, group_ids, 2) - - for mini_batch in samples: - self.assertEqual((mini_batch[0] + mini_batch[1]) % 2, 0) - - -class TestSamplerDeterministic(unittest.TestCase): - def test_to_iterable(self): - sampler = TrainingSampler(100, seed=10) - gt_output = list(itertools.islice(sampler, 100)) - self.assertEqual(set(gt_output), set(range(100))) - - dataset = DatasetFromList(list(range(100))) - dataset = ToIterableDataset(dataset, sampler) - data_loader = data.DataLoader(dataset, num_workers=0, collate_fn=operator.itemgetter(0)) - - output = list(itertools.islice(data_loader, 100)) - self.assertEqual(output, gt_output) - - data_loader = data.DataLoader( - dataset, - num_workers=2, - collate_fn=operator.itemgetter(0), - worker_init_fn=worker_init_reset_seed, - # reset seed should not affect behavior of TrainingSampler - ) - output = list(itertools.islice(data_loader, 100)) - # multiple workers should not lead to duplicate or different data - self.assertEqual(output, gt_output) - - def test_training_sampler_seed(self): - seed_all_rng(42) - sampler = TrainingSampler(30) - data = list(itertools.islice(sampler, 65)) - - seed_all_rng(42) - sampler = TrainingSampler(30) - seed_all_rng(999) # should be ineffective - data2 = list(itertools.islice(sampler, 65)) - self.assertEqual(data, data2) - - -class TestRepeatFactorTrainingSampler(unittest.TestCase): - def test_repeat_factors_from_category_frequency(self): - repeat_thresh = 0.5 - - dataset_dicts = [ - {"annotations": [{"category_id": 0}, {"category_id": 1}]}, - {"annotations": [{"category_id": 0}]}, - {"annotations": []}, - ] - - rep_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( - dataset_dicts, repeat_thresh - ) - - expected_rep_factors = torch.tensor([math.sqrt(3 / 2), 1.0, 1.0]) - self.assertTrue(torch.allclose(rep_factors, expected_rep_factors)) - - -class TestInferenceSampler(unittest.TestCase): - def test_local_indices(self): - sizes = [0, 16, 2, 42] - world_sizes = [5, 2, 3, 4] - - expected_results = [ - [range(0) for _ in range(5)], - [range(8), range(8, 16)], - [range(1), range(1, 2), range(0)], - [range(11), range(11, 22), range(22, 32), range(32, 42)], - ] - - for size, world_size, expected_result in zip(sizes, world_sizes, expected_results): - with self.subTest(f"size={size}, world_size={world_size}"): - local_indices = [ - InferenceSampler._get_local_indices(size, world_size, r) - for r in range(world_size) - ] - self.assertEqual(local_indices, expected_result) diff --git a/detectron2/tests/data/test_transforms.py b/detectron2/tests/data/test_transforms.py deleted file mode 100644 index 382048e533708dec3fabf89528564ebc2ad4c83f..0000000000000000000000000000000000000000 --- a/detectron2/tests/data/test_transforms.py +++ /dev/null @@ -1,268 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import logging -import numpy as np -import unittest -from unittest import mock -import torch -from PIL import Image, ImageOps -from torch.nn import functional as F - -from detectron2.config import get_cfg -from detectron2.data import detection_utils -from detectron2.data import transforms as T -from detectron2.utils.logger import setup_logger - -logger = logging.getLogger(__name__) - - -def polygon_allclose(poly1, poly2): - """ - Test whether two polygons are the same. - Both arguments are nx2 numpy arrays. - """ - # ABCD and CDAB are the same polygon. So it's important to check after rolling - for k in range(len(poly1)): - rolled_poly1 = np.roll(poly1, k, axis=0) - if np.allclose(rolled_poly1, poly2): - return True - return False - - -class TestTransforms(unittest.TestCase): - def setUp(self): - setup_logger() - - def test_apply_rotated_boxes(self): - np.random.seed(125) - cfg = get_cfg() - is_train = True - augs = detection_utils.build_augmentation(cfg, is_train) - image = np.random.rand(200, 300) - image, transforms = T.apply_augmentations(augs, image) - image_shape = image.shape[:2] # h, w - assert image_shape == (800, 1200) - annotation = {"bbox": [179, 97, 62, 40, -56]} - - boxes = np.array([annotation["bbox"]], dtype=np.float64) # boxes.shape = (1, 5) - transformed_bbox = transforms.apply_rotated_box(boxes)[0] - - expected_bbox = np.array([484, 388, 248, 160, 56], dtype=np.float64) - err_msg = "transformed_bbox = {}, expected {}".format(transformed_bbox, expected_bbox) - assert np.allclose(transformed_bbox, expected_bbox), err_msg - - def test_resize_and_crop(self): - np.random.seed(125) - min_scale = 0.2 - max_scale = 2.0 - target_height = 1100 - target_width = 1000 - resize_aug = T.ResizeScale(min_scale, max_scale, target_height, target_width) - fixed_size_crop_aug = T.FixedSizeCrop((target_height, target_width)) - hflip_aug = T.RandomFlip() - augs = [resize_aug, fixed_size_crop_aug, hflip_aug] - original_image = np.random.rand(900, 800) - image, transforms = T.apply_augmentations(augs, original_image) - image_shape = image.shape[:2] # h, w - self.assertEqual((1100, 1000), image_shape) - - boxes = np.array( - [[91, 46, 144, 111], [523, 251, 614, 295]], - dtype=np.float64, - ) - transformed_bboxs = transforms.apply_box(boxes) - expected_bboxs = np.array( - [ - [895.42, 33.42666667, 933.91125, 80.66], - [554.0825, 182.39333333, 620.17125, 214.36666667], - ], - dtype=np.float64, - ) - err_msg = "transformed_bbox = {}, expected {}".format(transformed_bboxs, expected_bboxs) - self.assertTrue(np.allclose(transformed_bboxs, expected_bboxs), err_msg) - - polygon = np.array([[91, 46], [144, 46], [144, 111], [91, 111]]) - transformed_polygons = transforms.apply_polygons([polygon]) - expected_polygon = np.array([[934.0, 33.0], [934.0, 80.0], [896.0, 80.0], [896.0, 33.0]]) - self.assertEqual(1, len(transformed_polygons)) - err_msg = "transformed_polygon = {}, expected {}".format( - transformed_polygons[0], expected_polygon - ) - self.assertTrue(polygon_allclose(transformed_polygons[0], expected_polygon), err_msg) - - def test_apply_rotated_boxes_unequal_scaling_factor(self): - np.random.seed(125) - h, w = 400, 200 - newh, neww = 800, 800 - image = np.random.rand(h, w) - augs = [] - augs.append(T.Resize(shape=(newh, neww))) - image, transforms = T.apply_augmentations(augs, image) - image_shape = image.shape[:2] # h, w - assert image_shape == (newh, neww) - - boxes = np.array( - [ - [150, 100, 40, 20, 0], - [150, 100, 40, 20, 30], - [150, 100, 40, 20, 90], - [150, 100, 40, 20, -90], - ], - dtype=np.float64, - ) - transformed_boxes = transforms.apply_rotated_box(boxes) - - expected_bboxes = np.array( - [ - [600, 200, 160, 40, 0], - [600, 200, 144.22205102, 52.91502622, 49.10660535], - [600, 200, 80, 80, 90], - [600, 200, 80, 80, -90], - ], - dtype=np.float64, - ) - err_msg = "transformed_boxes = {}, expected {}".format(transformed_boxes, expected_bboxes) - assert np.allclose(transformed_boxes, expected_bboxes), err_msg - - def test_print_augmentation(self): - t = T.RandomCrop("relative", (100, 100)) - self.assertEqual(str(t), "RandomCrop(crop_type='relative', crop_size=(100, 100))") - - t0 = T.RandomFlip(prob=0.5) - self.assertEqual(str(t0), "RandomFlip(prob=0.5)") - - t1 = T.RandomFlip() - self.assertEqual(str(t1), "RandomFlip()") - - t = T.AugmentationList([t0, t1]) - self.assertEqual(str(t), f"AugmentationList[{t0}, {t1}]") - - def test_random_apply_prob_out_of_range_check(self): - test_probabilities = {0.0: True, 0.5: True, 1.0: True, -0.01: False, 1.01: False} - - for given_probability, is_valid in test_probabilities.items(): - if not is_valid: - self.assertRaises(AssertionError, T.RandomApply, None, prob=given_probability) - else: - T.RandomApply(T.NoOpTransform(), prob=given_probability) - - def test_random_apply_wrapping_aug_probability_occured_evaluation(self): - transform_mock = mock.MagicMock(name="MockTransform", spec=T.Augmentation) - image_mock = mock.MagicMock(name="MockImage") - random_apply = T.RandomApply(transform_mock, prob=0.001) - - with mock.patch.object(random_apply, "_rand_range", return_value=0.0001): - transform = random_apply.get_transform(image_mock) - transform_mock.get_transform.assert_called_once_with(image_mock) - self.assertIsNot(transform, transform_mock) - - def test_random_apply_wrapping_std_transform_probability_occured_evaluation(self): - transform_mock = mock.MagicMock(name="MockTransform", spec=T.Transform) - image_mock = mock.MagicMock(name="MockImage") - random_apply = T.RandomApply(transform_mock, prob=0.001) - - with mock.patch.object(random_apply, "_rand_range", return_value=0.0001): - transform = random_apply.get_transform(image_mock) - self.assertIs(transform, transform_mock) - - def test_random_apply_probability_not_occured_evaluation(self): - transform_mock = mock.MagicMock(name="MockTransform", spec=T.Augmentation) - image_mock = mock.MagicMock(name="MockImage") - random_apply = T.RandomApply(transform_mock, prob=0.001) - - with mock.patch.object(random_apply, "_rand_range", return_value=0.9): - transform = random_apply.get_transform(image_mock) - transform_mock.get_transform.assert_not_called() - self.assertIsInstance(transform, T.NoOpTransform) - - def test_augmentation_input_args(self): - input_shape = (100, 100) - output_shape = (50, 50) - - # define two augmentations with different args - class TG1(T.Augmentation): - def get_transform(self, image, sem_seg): - return T.ResizeTransform( - input_shape[0], input_shape[1], output_shape[0], output_shape[1] - ) - - class TG2(T.Augmentation): - def get_transform(self, image): - assert image.shape[:2] == output_shape # check that TG1 is applied - return T.HFlipTransform(output_shape[1]) - - image = np.random.rand(*input_shape).astype("float32") - sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8") - inputs = T.AugInput(image, sem_seg=sem_seg) # provide two args - tfms = inputs.apply_augmentations([TG1(), TG2()]) - self.assertIsInstance(tfms[0], T.ResizeTransform) - self.assertIsInstance(tfms[1], T.HFlipTransform) - self.assertTrue(inputs.image.shape[:2] == output_shape) - self.assertTrue(inputs.sem_seg.shape[:2] == output_shape) - - class TG3(T.Augmentation): - def get_transform(self, image, nonexist): - pass - - with self.assertRaises(AttributeError): - inputs.apply_augmentations([TG3()]) - - def test_augmentation_list(self): - input_shape = (100, 100) - image = np.random.rand(*input_shape).astype("float32") - sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8") - inputs = T.AugInput(image, sem_seg=sem_seg) # provide two args - - augs = T.AugmentationList([T.RandomFlip(), T.Resize(20)]) - _ = T.AugmentationList([augs, T.Resize(30)])(inputs) - # 3 in latest fvcore (flattened transformlist), 2 in older - # self.assertEqual(len(tfms), 3) - - def test_color_transforms(self): - rand_img = np.random.random((100, 100, 3)) * 255 - rand_img = rand_img.astype("uint8") - - # Test no-op - noop_transform = T.ColorTransform(lambda img: img) - self.assertTrue(np.array_equal(rand_img, noop_transform.apply_image(rand_img))) - - # Test a ImageOps operation - magnitude = np.random.randint(0, 256) - solarize_transform = T.PILColorTransform(lambda img: ImageOps.solarize(img, magnitude)) - expected_img = ImageOps.solarize(Image.fromarray(rand_img), magnitude) - self.assertTrue(np.array_equal(expected_img, solarize_transform.apply_image(rand_img))) - - def test_resize_transform(self): - input_shapes = [(100, 100), (100, 100, 1), (100, 100, 3)] - output_shapes = [(200, 200), (200, 200, 1), (200, 200, 3)] - for in_shape, out_shape in zip(input_shapes, output_shapes): - in_img = np.random.randint(0, 255, size=in_shape, dtype=np.uint8) - tfm = T.ResizeTransform(in_shape[0], in_shape[1], out_shape[0], out_shape[1]) - out_img = tfm.apply_image(in_img) - self.assertEqual(out_img.shape, out_shape) - - def test_resize_shorted_edge_scriptable(self): - def f(image): - newh, neww = T.ResizeShortestEdge.get_output_shape( - image.shape[-2], image.shape[-1], 80, 133 - ) - return F.interpolate(image.unsqueeze(0), size=(newh, neww)) - - input = torch.randn(3, 10, 10) - script_f = torch.jit.script(f) - self.assertTrue(torch.allclose(f(input), script_f(input))) - - # generalize to new shapes - input = torch.randn(3, 8, 100) - self.assertTrue(torch.allclose(f(input), script_f(input))) - - def test_extent_transform(self): - input_shapes = [(100, 100), (100, 100, 1), (100, 100, 3)] - src_rect = (20, 20, 80, 80) - output_shapes = [(200, 200), (200, 200, 1), (200, 200, 3)] - for in_shape, out_shape in zip(input_shapes, output_shapes): - in_img = np.random.randint(0, 255, size=in_shape, dtype=np.uint8) - tfm = T.ExtentTransform(src_rect, out_shape[:2]) - out_img = tfm.apply_image(in_img) - self.assertTrue(out_img.shape == out_shape) diff --git a/detectron2/tests/export/test_c10.py b/detectron2/tests/export/test_c10.py deleted file mode 100644 index 55076abd15beb50b1774f0b5fe399b22d7cc630f..0000000000000000000000000000000000000000 --- a/detectron2/tests/export/test_c10.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import unittest - -try: - # Caffe2 used to be included in PyTorch, but since PyTorch 1.10+, - # it is not included in pre-built packages. This is a safety BC check - from detectron2.config import get_cfg - from detectron2.export.c10 import Caffe2RPN - from detectron2.layers import ShapeSpec -except ImportError: - raise unittest.SkipTest( - f"PyTorch does not have Caffe2 support. Skipping all tests in {__name__}" - ) from None - - -class TestCaffe2RPN(unittest.TestCase): - def test_instantiation(self): - cfg = get_cfg() - cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) - input_shapes = {"res4": ShapeSpec(channels=256, stride=4)} - rpn = Caffe2RPN(cfg, input_shapes) - assert rpn is not None - cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) - with self.assertRaises(AssertionError): - rpn = Caffe2RPN(cfg, input_shapes) diff --git a/detectron2/tests/layers/__init__.py b/detectron2/tests/layers/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/detectron2/tests/layers/test_blocks.py b/detectron2/tests/layers/test_blocks.py deleted file mode 100644 index 5a0488adbfcf0c7eca08616f43ebf695acad4b7e..0000000000000000000000000000000000000000 --- a/detectron2/tests/layers/test_blocks.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import unittest -import torch -from torch import nn - -from detectron2.layers import ASPP, DepthwiseSeparableConv2d, FrozenBatchNorm2d -from detectron2.modeling.backbone.resnet import BasicStem, ResNet - - -""" -Test for misc layers. -""" - - -class TestBlocks(unittest.TestCase): - def test_separable_conv(self): - DepthwiseSeparableConv2d(3, 10, norm1="BN", activation1=nn.PReLU()) - - def test_aspp(self): - m = ASPP(3, 10, [2, 3, 4], norm="", activation=nn.PReLU()) - self.assertIsNot(m.convs[0].activation.weight, m.convs[1].activation.weight) - self.assertIsNot(m.convs[0].activation.weight, m.project.activation.weight) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_frozen_batchnorm_fp16(self): - from torch.cuda.amp import autocast - - C = 10 - input = torch.rand(1, C, 10, 10).cuda() - m = FrozenBatchNorm2d(C).cuda() - with autocast(): - output = m(input.half()) - self.assertEqual(output.dtype, torch.float16) - - # requires_grad triggers a different codepath - input.requires_grad_() - with autocast(): - output = m(input.half()) - self.assertEqual(output.dtype, torch.float16) - - def test_resnet_unused_stages(self): - resnet = ResNet(BasicStem(), ResNet.make_default_stages(18), out_features=["res2"]) - self.assertTrue(hasattr(resnet, "res2")) - self.assertFalse(hasattr(resnet, "res3")) - self.assertFalse(hasattr(resnet, "res5")) - - resnet = ResNet(BasicStem(), ResNet.make_default_stages(18), out_features=["res2", "res5"]) - self.assertTrue(hasattr(resnet, "res2")) - self.assertTrue(hasattr(resnet, "res4")) - self.assertTrue(hasattr(resnet, "res5")) diff --git a/detectron2/tests/layers/test_deformable.py b/detectron2/tests/layers/test_deformable.py deleted file mode 100644 index 4aa319fc7e614f6a7a8ece7a45c177211c03012d..0000000000000000000000000000000000000000 --- a/detectron2/tests/layers/test_deformable.py +++ /dev/null @@ -1,175 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -import unittest -import torch - -from detectron2.layers import DeformConv, ModulatedDeformConv -from detectron2.utils.env import TORCH_VERSION - - -@unittest.skipIf( - TORCH_VERSION == (1, 8) and torch.cuda.is_available(), - "This test fails under cuda11 + torch1.8.", -) -class DeformableTest(unittest.TestCase): - @unittest.skipIf(not torch.cuda.is_available(), "Deformable not supported for cpu") - def test_forward_output(self): - device = torch.device("cuda") - N, C, H, W = shape = 1, 1, 5, 5 - kernel_size = 3 - padding = 1 - - inputs = torch.arange(np.prod(shape), dtype=torch.float32).reshape(*shape).to(device) - """ - 0 1 2 3 4 - 5 6 7 8 9 - 10 11 12 13 14 - 15 16 17 18 19 - 20 21 22 23 24 - """ - offset_channels = kernel_size * kernel_size * 2 - offset = torch.full((N, offset_channels, H, W), 0.5, dtype=torch.float32).to(device) - - # Test DCN v1 - deform = DeformConv(C, C, kernel_size=kernel_size, padding=padding).to(device) - deform.weight = torch.nn.Parameter(torch.ones_like(deform.weight)) - output = deform(inputs, offset) - output = output.detach().cpu().numpy() - deform_results = np.array( - [ - [30, 41.25, 48.75, 45, 28.75], - [62.25, 81, 90, 80.25, 50.25], - [99.75, 126, 135, 117.75, 72.75], - [105, 131.25, 138.75, 120, 73.75], - [71.75, 89.25, 93.75, 80.75, 49.5], - ] - ) - self.assertTrue(np.allclose(output.flatten(), deform_results.flatten())) - - # Test DCN v2 - mask_channels = kernel_size * kernel_size - mask = torch.full((N, mask_channels, H, W), 0.5, dtype=torch.float32).to(device) - modulate_deform = ModulatedDeformConv(C, C, kernel_size, padding=padding, bias=False).to( - device - ) - modulate_deform.weight = deform.weight - output = modulate_deform(inputs, offset, mask) - output = output.detach().cpu().numpy() - self.assertTrue(np.allclose(output.flatten(), deform_results.flatten() * 0.5)) - - def test_forward_output_on_cpu(self): - device = torch.device("cpu") - N, C, H, W = shape = 1, 1, 5, 5 - kernel_size = 3 - padding = 1 - - inputs = torch.arange(np.prod(shape), dtype=torch.float32).reshape(*shape).to(device) - - offset_channels = kernel_size * kernel_size * 2 - offset = torch.full((N, offset_channels, H, W), 0.5, dtype=torch.float32).to(device) - - # Test DCN v1 on cpu - deform = DeformConv(C, C, kernel_size=kernel_size, padding=padding).to(device) - deform.weight = torch.nn.Parameter(torch.ones_like(deform.weight)) - output = deform(inputs, offset) - output = output.detach().cpu().numpy() - deform_results = np.array( - [ - [30, 41.25, 48.75, 45, 28.75], - [62.25, 81, 90, 80.25, 50.25], - [99.75, 126, 135, 117.75, 72.75], - [105, 131.25, 138.75, 120, 73.75], - [71.75, 89.25, 93.75, 80.75, 49.5], - ] - ) - self.assertTrue(np.allclose(output.flatten(), deform_results.flatten())) - - @unittest.skipIf(not torch.cuda.is_available(), "This test requires gpu access") - def test_forward_output_on_cpu_equals_output_on_gpu(self): - N, C, H, W = shape = 2, 4, 10, 10 - kernel_size = 3 - padding = 1 - - for groups in [1, 2]: - inputs = torch.arange(np.prod(shape), dtype=torch.float32).reshape(*shape) - offset_channels = kernel_size * kernel_size * 2 - offset = torch.full((N, offset_channels, H, W), 0.5, dtype=torch.float32) - - deform_gpu = DeformConv( - C, C, kernel_size=kernel_size, padding=padding, groups=groups - ).to("cuda") - deform_gpu.weight = torch.nn.Parameter(torch.ones_like(deform_gpu.weight)) - output_gpu = deform_gpu(inputs.to("cuda"), offset.to("cuda")).detach().cpu().numpy() - - deform_cpu = DeformConv( - C, C, kernel_size=kernel_size, padding=padding, groups=groups - ).to("cpu") - deform_cpu.weight = torch.nn.Parameter(torch.ones_like(deform_cpu.weight)) - output_cpu = deform_cpu(inputs.to("cpu"), offset.to("cpu")).detach().numpy() - - self.assertTrue(np.allclose(output_gpu.flatten(), output_cpu.flatten())) - - @unittest.skipIf(not torch.cuda.is_available(), "Deformable not supported for cpu") - def test_small_input(self): - device = torch.device("cuda") - for kernel_size in [3, 5]: - padding = kernel_size // 2 - N, C, H, W = shape = (1, 1, kernel_size - 1, kernel_size - 1) - - inputs = torch.rand(shape).to(device) # input size is smaller than kernel size - - offset_channels = kernel_size * kernel_size * 2 - offset = torch.randn((N, offset_channels, H, W), dtype=torch.float32).to(device) - deform = DeformConv(C, C, kernel_size=kernel_size, padding=padding).to(device) - output = deform(inputs, offset) - self.assertTrue(output.shape == inputs.shape) - - mask_channels = kernel_size * kernel_size - mask = torch.ones((N, mask_channels, H, W), dtype=torch.float32).to(device) - modulate_deform = ModulatedDeformConv( - C, C, kernel_size, padding=padding, bias=False - ).to(device) - output = modulate_deform(inputs, offset, mask) - self.assertTrue(output.shape == inputs.shape) - - @unittest.skipIf(not torch.cuda.is_available(), "Deformable not supported for cpu") - def test_raise_exception(self): - device = torch.device("cuda") - N, C, H, W = shape = 1, 1, 3, 3 - kernel_size = 3 - padding = 1 - - inputs = torch.rand(shape, dtype=torch.float32).to(device) - offset_channels = kernel_size * kernel_size # This is wrong channels for offset - offset = torch.randn((N, offset_channels, H, W), dtype=torch.float32).to(device) - deform = DeformConv(C, C, kernel_size=kernel_size, padding=padding).to(device) - self.assertRaises(RuntimeError, deform, inputs, offset) - - offset_channels = kernel_size * kernel_size * 2 - offset = torch.randn((N, offset_channels, H, W), dtype=torch.float32).to(device) - mask_channels = kernel_size * kernel_size * 2 # This is wrong channels for mask - mask = torch.ones((N, mask_channels, H, W), dtype=torch.float32).to(device) - modulate_deform = ModulatedDeformConv(C, C, kernel_size, padding=padding, bias=False).to( - device - ) - self.assertRaises(RuntimeError, modulate_deform, inputs, offset, mask) - - def test_repr(self): - module = DeformConv(3, 10, kernel_size=3, padding=1, deformable_groups=2) - correct_string = ( - "DeformConv(in_channels=3, out_channels=10, kernel_size=(3, 3), " - "stride=(1, 1), padding=(1, 1), dilation=(1, 1), " - "groups=1, deformable_groups=2, bias=False)" - ) - self.assertEqual(repr(module), correct_string) - - module = ModulatedDeformConv(3, 10, kernel_size=3, padding=1, deformable_groups=2) - correct_string = ( - "ModulatedDeformConv(in_channels=3, out_channels=10, kernel_size=(3, 3), " - "stride=1, padding=1, dilation=1, groups=1, deformable_groups=2, bias=True)" - ) - self.assertEqual(repr(module), correct_string) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/layers/test_losses.py b/detectron2/tests/layers/test_losses.py deleted file mode 100644 index d74920246cbd4a188b3c81cf0c78e982af6da1ac..0000000000000000000000000000000000000000 --- a/detectron2/tests/layers/test_losses.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -import unittest -import torch - -from detectron2.layers import ciou_loss, diou_loss - - -class TestLosses(unittest.TestCase): - def test_diou_loss(self): - """ - loss = 1 - iou + d/c - where, - d = (distance between centers of the 2 boxes)^2 - c = (diagonal length of the smallest enclosing box covering the 2 boxes)^2 - """ - # Identical boxes should have loss of 0 - box = torch.tensor([-1, -1, 1, 1], dtype=torch.float32) - loss = diou_loss(box, box) - self.assertTrue(np.allclose(loss, [0.0])) - - # Half size box inside other box - # iou = 0.5, d = 0.25, c = 8 - box2 = torch.tensor([0, -1, 1, 1], dtype=torch.float32) - loss = diou_loss(box, box2) - self.assertTrue(np.allclose(loss, [0.53125])) - - # Two diagonally adjacent boxes - # iou = 0, d = 2, c = 8 - box3 = torch.tensor([0, 0, 1, 1], dtype=torch.float32) - box4 = torch.tensor([1, 1, 2, 2], dtype=torch.float32) - loss = diou_loss(box3, box4) - self.assertTrue(np.allclose(loss, [1.25])) - - # Test batched loss and reductions - box1s = torch.stack([box, box3], dim=0) - box2s = torch.stack([box2, box4], dim=0) - - loss = diou_loss(box1s, box2s, reduction="sum") - self.assertTrue(np.allclose(loss, [1.78125])) - - loss = diou_loss(box1s, box2s, reduction="mean") - self.assertTrue(np.allclose(loss, [0.890625])) - - def test_ciou_loss(self): - """ - loss = 1 - iou + d/c + alpha*v - where, - d = (distance between centers of the 2 boxes)^2 - c = (diagonal length of the smallest enclosing box covering the 2 boxes)^2 - v = (4/pi^2) * (arctan(box1_w/box1_h) - arctan(box2_w/box2_h))^2 - alpha = v/(1 - iou + v) - """ - # Identical boxes should have loss of 0 - box = torch.tensor([-1, -1, 1, 1], dtype=torch.float32) - loss = ciou_loss(box, box) - self.assertTrue(np.allclose(loss, [0.0])) - - # Half size box inside other box - # iou = 0.5, d = 0.25, c = 8 - # v = (4/pi^2) * (arctan(1) - arctan(0.5))^2 = 0.042 - # alpha = 0.0775 - box2 = torch.tensor([0, -1, 1, 1], dtype=torch.float32) - loss = ciou_loss(box, box2) - self.assertTrue(np.allclose(loss, [0.5345])) - - # Two diagonally adjacent boxes - # iou = 0, d = 2, c = 8, v = 0, alpha = 0 - box3 = torch.tensor([0, 0, 1, 1], dtype=torch.float32) - box4 = torch.tensor([1, 1, 2, 2], dtype=torch.float32) - loss = ciou_loss(box3, box4) - self.assertTrue(np.allclose(loss, [1.25])) - - # Test batched loss and reductions - box1s = torch.stack([box, box3], dim=0) - box2s = torch.stack([box2, box4], dim=0) - - loss = ciou_loss(box1s, box2s, reduction="sum") - self.assertTrue(np.allclose(loss, [1.7845])) - - loss = ciou_loss(box1s, box2s, reduction="mean") - self.assertTrue(np.allclose(loss, [0.89225])) diff --git a/detectron2/tests/layers/test_mask_ops.py b/detectron2/tests/layers/test_mask_ops.py deleted file mode 100644 index dfbcaf5291a87ec85617d5e7a7aa959c68b06770..0000000000000000000000000000000000000000 --- a/detectron2/tests/layers/test_mask_ops.py +++ /dev/null @@ -1,202 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import contextlib -import io -import numpy as np -import unittest -from collections import defaultdict -import torch -import tqdm -from fvcore.common.benchmark import benchmark -from pycocotools.coco import COCO -from tabulate import tabulate -from torch.nn import functional as F - -from detectron2.data import MetadataCatalog -from detectron2.layers.mask_ops import ( - pad_masks, - paste_mask_in_image_old, - paste_masks_in_image, - scale_boxes, -) -from detectron2.structures import BitMasks, Boxes, BoxMode, PolygonMasks -from detectron2.structures.masks import polygons_to_bitmask -from detectron2.utils.file_io import PathManager -from detectron2.utils.testing import random_boxes - - -def iou_between_full_image_bit_masks(a, b): - intersect = (a & b).sum() - union = (a | b).sum() - return intersect / union - - -def rasterize_polygons_with_grid_sample(full_image_bit_mask, box, mask_size, threshold=0.5): - x0, y0, x1, y1 = box[0], box[1], box[2], box[3] - - img_h, img_w = full_image_bit_mask.shape - - mask_y = np.arange(0.0, mask_size) + 0.5 # mask y sample coords in [0.5, mask_size - 0.5] - mask_x = np.arange(0.0, mask_size) + 0.5 # mask x sample coords in [0.5, mask_size - 0.5] - mask_y = mask_y / mask_size * (y1 - y0) + y0 - mask_x = mask_x / mask_size * (x1 - x0) + x0 - - mask_x = (mask_x - 0.5) / (img_w - 1) * 2 + -1 - mask_y = (mask_y - 0.5) / (img_h - 1) * 2 + -1 - gy, gx = torch.meshgrid(torch.from_numpy(mask_y), torch.from_numpy(mask_x)) - ind = torch.stack([gx, gy], dim=-1).to(dtype=torch.float32) - - full_image_bit_mask = torch.from_numpy(full_image_bit_mask) - mask = F.grid_sample( - full_image_bit_mask[None, None, :, :].to(dtype=torch.float32), - ind[None, :, :, :], - align_corners=True, - ) - - return mask[0, 0] >= threshold - - -class TestMaskCropPaste(unittest.TestCase): - def setUp(self): - json_file = MetadataCatalog.get("coco_2017_val_100").json_file - if not PathManager.isfile(json_file): - raise unittest.SkipTest("{} not found".format(json_file)) - with contextlib.redirect_stdout(io.StringIO()): - json_file = PathManager.get_local_path(json_file) - self.coco = COCO(json_file) - - def test_crop_paste_consistency(self): - """ - rasterize_polygons_within_box (used in training) - and - paste_masks_in_image (used in inference) - should be inverse operations to each other. - - This function runs several implementation of the above two operations and prints - the reconstruction error. - """ - - anns = self.coco.loadAnns(self.coco.getAnnIds(iscrowd=False)) # avoid crowd annotations - - selected_anns = anns[:100] - - ious = [] - for ann in tqdm.tqdm(selected_anns): - results = self.process_annotation(ann) - ious.append([k[2] for k in results]) - - ious = np.array(ious) - mean_ious = ious.mean(axis=0) - table = [] - res_dic = defaultdict(dict) - for row, iou in zip(results, mean_ious): - table.append((row[0], row[1], iou)) - res_dic[row[0]][row[1]] = iou - print(tabulate(table, headers=["rasterize", "paste", "iou"], tablefmt="simple")) - # assert that the reconstruction is good: - self.assertTrue(res_dic["polygon"]["aligned"] > 0.94) - self.assertTrue(res_dic["roialign"]["aligned"] > 0.95) - - def process_annotation(self, ann, mask_side_len=28): - # Parse annotation data - img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0] - height, width = img_info["height"], img_info["width"] - gt_polygons = [np.array(p, dtype=np.float64) for p in ann["segmentation"]] - gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) - gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width) - - # Run rasterize .. - torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape(-1, 4) - box_bitmasks = { - "polygon": PolygonMasks([gt_polygons]).crop_and_resize(torch_gt_bbox, mask_side_len)[0], - "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len), - "roialign": BitMasks(torch.from_numpy(gt_bit_mask[None, :, :])).crop_and_resize( - torch_gt_bbox, mask_side_len - )[0], - } - - # Run paste .. - results = defaultdict(dict) - for k, box_bitmask in box_bitmasks.items(): - padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1) - scaled_boxes = scale_boxes(torch_gt_bbox, scale) - - r = results[k] - r["old"] = paste_mask_in_image_old( - padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5 - ) - r["aligned"] = paste_masks_in_image( - box_bitmask[None, :, :], Boxes(torch_gt_bbox), (height, width) - )[0] - - table = [] - for rasterize_method, r in results.items(): - for paste_method, mask in r.items(): - mask = np.asarray(mask) - iou = iou_between_full_image_bit_masks(gt_bit_mask.astype("uint8"), mask) - table.append((rasterize_method, paste_method, iou)) - return table - - def test_polygon_area(self): - # Draw polygon boxes - for d in [5.0, 10.0, 1000.0]: - polygon = PolygonMasks([[[0, 0, 0, d, d, d, d, 0]]]) - area = polygon.area()[0] - target = d**2 - self.assertEqual(area, target) - - # Draw polygon triangles - for d in [5.0, 10.0, 1000.0]: - polygon = PolygonMasks([[[0, 0, 0, d, d, d]]]) - area = polygon.area()[0] - target = d**2 / 2 - self.assertEqual(area, target) - - def test_paste_mask_scriptable(self): - scripted_f = torch.jit.script(paste_masks_in_image) - N = 10 - masks = torch.rand(N, 28, 28) - boxes = Boxes(random_boxes(N, 100)).tensor - image_shape = (150, 150) - - out = paste_masks_in_image(masks, boxes, image_shape) - scripted_out = scripted_f(masks, boxes, image_shape) - self.assertTrue(torch.equal(out, scripted_out)) - - -def benchmark_paste(): - S = 800 - H, W = image_shape = (S, S) - N = 64 - torch.manual_seed(42) - masks = torch.rand(N, 28, 28) - - center = torch.rand(N, 2) * 600 + 100 - wh = torch.clamp(torch.randn(N, 2) * 40 + 200, min=50) - x0y0 = torch.clamp(center - wh * 0.5, min=0.0) - x1y1 = torch.clamp(center + wh * 0.5, max=S) - boxes = Boxes(torch.cat([x0y0, x1y1], axis=1)) - - def func(device, n=3): - m = masks.to(device=device) - b = boxes.to(device=device) - - def bench(): - for _ in range(n): - paste_masks_in_image(m, b, image_shape) - if device.type == "cuda": - torch.cuda.synchronize() - - return bench - - specs = [{"device": torch.device("cpu"), "n": 3}] - if torch.cuda.is_available(): - specs.append({"device": torch.device("cuda"), "n": 3}) - - benchmark(func, "paste_masks", specs, num_iters=10, warmup_iters=2) - - -if __name__ == "__main__": - benchmark_paste() - unittest.main() diff --git a/detectron2/tests/layers/test_nms.py b/detectron2/tests/layers/test_nms.py deleted file mode 100644 index a042db6147f110a82597c98f38e6b2221ccad53c..0000000000000000000000000000000000000000 --- a/detectron2/tests/layers/test_nms.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from __future__ import absolute_import, division, print_function, unicode_literals -import unittest -import torch - -from detectron2.layers import batched_nms -from detectron2.utils.testing import random_boxes - - -class TestNMS(unittest.TestCase): - def _create_tensors(self, N): - boxes = random_boxes(N, 200) - scores = torch.rand(N) - return boxes, scores - - def test_nms_scriptability(self): - N = 2000 - num_classes = 50 - boxes, scores = self._create_tensors(N) - idxs = torch.randint(0, num_classes, (N,)) - scripted_batched_nms = torch.jit.script(batched_nms) - err_msg = "NMS is incompatible with jit-scripted NMS for IoU={}" - - for iou in [0.2, 0.5, 0.8]: - keep_ref = batched_nms(boxes, scores, idxs, iou) - backup = boxes.clone() - scripted_keep = scripted_batched_nms(boxes, scores, idxs, iou) - assert torch.allclose(boxes, backup), "boxes modified by jit-scripted batched_nms" - self.assertTrue(torch.equal(keep_ref, scripted_keep), err_msg.format(iou)) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/layers/test_nms_rotated.py b/detectron2/tests/layers/test_nms_rotated.py deleted file mode 100644 index 4b45384892ab2a7cb20871cf19374f1bd08907ce..0000000000000000000000000000000000000000 --- a/detectron2/tests/layers/test_nms_rotated.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from __future__ import absolute_import, division, print_function, unicode_literals -import numpy as np -import unittest -from copy import deepcopy -import torch -from torchvision import ops - -from detectron2.layers import batched_nms, batched_nms_rotated, nms_rotated -from detectron2.utils.testing import random_boxes - - -def nms_edit_distance(keep1, keep2): - """ - Compare the "keep" result of two nms call. - They are allowed to be different in terms of edit distance - due to floating point precision issues, e.g., - if a box happen to have an IoU of 0.5 with another box, - one implentation may choose to keep it while another may discard it. - """ - keep1, keep2 = keep1.cpu(), keep2.cpu() - if torch.equal(keep1, keep2): - # they should be equal most of the time - return 0 - keep1, keep2 = tuple(keep1), tuple(keep2) - m, n = len(keep1), len(keep2) - - # edit distance with DP - f = [np.arange(n + 1), np.arange(n + 1)] - for i in range(m): - cur_row = i % 2 - other_row = (i + 1) % 2 - f[other_row][0] = i + 1 - for j in range(n): - f[other_row][j + 1] = ( - f[cur_row][j] - if keep1[i] == keep2[j] - else min(min(f[cur_row][j], f[cur_row][j + 1]), f[other_row][j]) + 1 - ) - return f[m % 2][n] - - -class TestNMSRotated(unittest.TestCase): - def reference_horizontal_nms(self, boxes, scores, iou_threshold): - """ - Args: - box_scores (N, 5): boxes in corner-form and probabilities. - (Note here 5 == 4 + 1, i.e., 4-dim horizontal box + 1-dim prob) - iou_threshold: intersection over union threshold. - Returns: - picked: a list of indexes of the kept boxes - """ - picked = [] - _, indexes = scores.sort(descending=True) - while len(indexes) > 0: - current = indexes[0] - picked.append(current.item()) - if len(indexes) == 1: - break - current_box = boxes[current, :] - indexes = indexes[1:] - rest_boxes = boxes[indexes, :] - iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1) - indexes = indexes[iou <= iou_threshold] - - return torch.as_tensor(picked) - - def _create_tensors(self, N, device="cpu"): - boxes = random_boxes(N, 200, device=device) - scores = torch.rand(N, device=device) - return boxes, scores - - def test_batched_nms_rotated_0_degree_cpu(self, device="cpu"): - N = 2000 - num_classes = 50 - boxes, scores = self._create_tensors(N, device=device) - idxs = torch.randint(0, num_classes, (N,)) - rotated_boxes = torch.zeros(N, 5, device=device) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] - rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] - err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}" - for iou in [0.2, 0.5, 0.8]: - backup = boxes.clone() - keep_ref = batched_nms(boxes, scores, idxs, iou) - assert torch.allclose(boxes, backup), "boxes modified by batched_nms" - backup = rotated_boxes.clone() - keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou) - assert torch.allclose( - rotated_boxes, backup - ), "rotated_boxes modified by batched_nms_rotated" - # Occasionally the gap can be large if there are many IOU on the threshold boundary - self.assertLessEqual(nms_edit_distance(keep, keep_ref), 5, err_msg.format(iou)) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_batched_nms_rotated_0_degree_cuda(self): - self.test_batched_nms_rotated_0_degree_cpu(device="cuda") - - def test_nms_rotated_0_degree_cpu(self, device="cpu"): - N = 1000 - boxes, scores = self._create_tensors(N, device=device) - rotated_boxes = torch.zeros(N, 5, device=device) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] - rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] - err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}" - for iou in [0.2, 0.5, 0.8]: - keep_ref = self.reference_horizontal_nms(boxes, scores, iou) - keep = nms_rotated(rotated_boxes, scores, iou) - self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou)) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_nms_rotated_0_degree_cuda(self): - self.test_nms_rotated_0_degree_cpu(device="cuda") - - def test_nms_rotated_90_degrees_cpu(self): - N = 1000 - boxes, scores = self._create_tensors(N) - rotated_boxes = torch.zeros(N, 5) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - # Note for rotated_boxes[:, 2] and rotated_boxes[:, 3]: - # widths and heights are intentionally swapped here for 90 degrees case - # so that the reference horizontal nms could be used - rotated_boxes[:, 2] = boxes[:, 3] - boxes[:, 1] - rotated_boxes[:, 3] = boxes[:, 2] - boxes[:, 0] - - rotated_boxes[:, 4] = torch.ones(N) * 90 - err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}" - for iou in [0.2, 0.5, 0.8]: - keep_ref = self.reference_horizontal_nms(boxes, scores, iou) - keep = nms_rotated(rotated_boxes, scores, iou) - self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou)) - - def test_nms_rotated_180_degrees_cpu(self): - N = 1000 - boxes, scores = self._create_tensors(N) - rotated_boxes = torch.zeros(N, 5) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] - rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] - rotated_boxes[:, 4] = torch.ones(N) * 180 - err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}" - for iou in [0.2, 0.5, 0.8]: - keep_ref = self.reference_horizontal_nms(boxes, scores, iou) - keep = nms_rotated(rotated_boxes, scores, iou) - self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou)) - - -class TestScriptable(unittest.TestCase): - def setUp(self): - class TestingModule(torch.nn.Module): - def forward(self, boxes, scores, threshold): - return nms_rotated(boxes, scores, threshold) - - self.module = TestingModule() - - def test_scriptable_cpu(self): - m = deepcopy(self.module).cpu() - _ = torch.jit.script(m) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_scriptable_cuda(self): - m = deepcopy(self.module).cuda() - _ = torch.jit.script(m) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/layers/test_roi_align.py b/detectron2/tests/layers/test_roi_align.py deleted file mode 100644 index b6fd8edefd107b727e3e523f1364fea1f4a20576..0000000000000000000000000000000000000000 --- a/detectron2/tests/layers/test_roi_align.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -import unittest -from copy import copy -import cv2 -import torch -from fvcore.common.benchmark import benchmark -from torch.nn import functional as F - -from detectron2.layers.roi_align import ROIAlign, roi_align - - -class ROIAlignTest(unittest.TestCase): - def test_forward_output(self): - input = np.arange(25).reshape(5, 5).astype("float32") - """ - 0 1 2 3 4 - 5 6 7 8 9 - 10 11 12 13 14 - 15 16 17 18 19 - 20 21 22 23 24 - """ - - output = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=False) - output_correct = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=True) - - # without correction: - old_results = [ - [7.5, 8, 8.5, 9], - [10, 10.5, 11, 11.5], - [12.5, 13, 13.5, 14], - [15, 15.5, 16, 16.5], - ] - - # with 0.5 correction: - correct_results = [ - [4.5, 5.0, 5.5, 6.0], - [7.0, 7.5, 8.0, 8.5], - [9.5, 10.0, 10.5, 11.0], - [12.0, 12.5, 13.0, 13.5], - ] - # This is an upsampled version of [[6, 7], [11, 12]] - - self.assertTrue(np.allclose(output.flatten(), np.asarray(old_results).flatten())) - self.assertTrue( - np.allclose(output_correct.flatten(), np.asarray(correct_results).flatten()) - ) - - # Also see similar issues in tensorflow at - # https://github.com/tensorflow/tensorflow/issues/26278 - - def test_resize(self): - H, W = 30, 30 - input = np.random.rand(H, W).astype("float32") * 100 - box = [10, 10, 20, 20] - output = self._simple_roialign(input, box, (5, 5), aligned=True) - - input2x = cv2.resize(input, (W // 2, H // 2), interpolation=cv2.INTER_LINEAR) - box2x = [x / 2 for x in box] - output2x = self._simple_roialign(input2x, box2x, (5, 5), aligned=True) - diff = np.abs(output2x - output) - self.assertTrue(diff.max() < 1e-4) - - def test_grid_sample_equivalence(self): - H, W = 30, 30 - input = np.random.rand(H, W).astype("float32") * 100 - box = [10, 10, 20, 20] - for ratio in [1, 2, 3]: - output = self._simple_roialign(input, box, (5, 5), sampling_ratio=ratio) - output_grid_sample = grid_sample_roi_align( - torch.from_numpy(input[None, None, :, :]).float(), - torch.as_tensor(box).float()[None, :], - 5, - 1.0, - ratio, - ) - self.assertTrue(torch.allclose(output, output_grid_sample)) - - def _simple_roialign(self, img, box, resolution, sampling_ratio=0, aligned=True): - """ - RoiAlign with scale 1.0. - """ - if isinstance(resolution, int): - resolution = (resolution, resolution) - op = ROIAlign(resolution, 1.0, sampling_ratio, aligned=aligned) - input = torch.from_numpy(img[None, None, :, :].astype("float32")) - - rois = [0] + list(box) - rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) - output = op.forward(input, rois) - if torch.cuda.is_available(): - output_cuda = op.forward(input.cuda(), rois.cuda()).cpu() - self.assertTrue(torch.allclose(output, output_cuda)) - return output[0, 0] - - def _simple_roialign_with_grad(self, img, box, resolution, device): - if isinstance(resolution, int): - resolution = (resolution, resolution) - - op = ROIAlign(resolution, 1.0, 0, aligned=True) - input = torch.from_numpy(img[None, None, :, :].astype("float32")) - - rois = [0] + list(box) - rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) - input = input.to(device=device) - rois = rois.to(device=device) - input.requires_grad = True - output = op.forward(input, rois) - return input, output - - def test_empty_box(self): - img = np.random.rand(5, 5) - box = [3, 4, 5, 4] - o = self._simple_roialign(img, box, 7) - self.assertTrue(o.shape == (7, 7)) - self.assertTrue((o == 0).all()) - - for dev in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: - input, output = self._simple_roialign_with_grad(img, box, 7, torch.device(dev)) - output.sum().backward() - self.assertTrue(torch.allclose(input.grad, torch.zeros_like(input))) - - def test_empty_batch(self): - input = torch.zeros(0, 3, 10, 10, dtype=torch.float32) - rois = torch.zeros(0, 5, dtype=torch.float32) - op = ROIAlign((7, 7), 1.0, 0, aligned=True) - output = op.forward(input, rois) - self.assertTrue(output.shape == (0, 3, 7, 7)) - - -def grid_sample_roi_align(input, boxes, output_size, scale, sampling_ratio): - # unlike true roi_align, this does not support different batch_idx - from detectron2.projects.point_rend.point_features import ( - generate_regular_grid_point_coords, - get_point_coords_wrt_image, - point_sample, - ) - - N, _, H, W = input.shape - R = len(boxes) - assert N == 1 - boxes = boxes * scale - grid = generate_regular_grid_point_coords(R, output_size * sampling_ratio, device=boxes.device) - coords = get_point_coords_wrt_image(boxes, grid) - coords = coords / torch.as_tensor([W, H], device=coords.device) # R, s^2, 2 - res = point_sample(input, coords.unsqueeze(0), align_corners=False) # 1,C, R,s^2 - res = ( - res.squeeze(0) - .permute(1, 0, 2) - .reshape(R, -1, output_size * sampling_ratio, output_size * sampling_ratio) - ) - res = F.avg_pool2d(res, sampling_ratio) - return res - - -def benchmark_roi_align(): - def random_boxes(mean_box, stdev, N, maxsize): - ret = torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float) - ret.clamp_(min=0, max=maxsize) - return ret - - def func(shape, nboxes_per_img, sampling_ratio, device, box_size="large"): - N, _, H, _ = shape - input = torch.rand(*shape) - boxes = [] - batch_idx = [] - for k in range(N): - if box_size == "large": - b = random_boxes([80, 80, 130, 130], 24, nboxes_per_img, H) - else: - b = random_boxes([100, 100, 110, 110], 4, nboxes_per_img, H) - boxes.append(b) - batch_idx.append(torch.zeros(nboxes_per_img, 1, dtype=torch.float32) + k) - boxes = torch.cat(boxes, axis=0) - batch_idx = torch.cat(batch_idx, axis=0) - boxes = torch.cat([batch_idx, boxes], axis=1) - - input = input.to(device=device) - boxes = boxes.to(device=device) - - def bench(): - if False and sampling_ratio > 0 and N == 1: - # enable to benchmark grid_sample (slower) - grid_sample_roi_align(input, boxes[:, 1:], 7, 1.0, sampling_ratio) - else: - roi_align(input, boxes, 7, 1.0, sampling_ratio, True) - if device == "cuda": - torch.cuda.synchronize() - - return bench - - def gen_args(arg): - args = [] - for size in ["small", "large"]: - for ratio in [0, 2]: - args.append(copy(arg)) - args[-1]["sampling_ratio"] = ratio - args[-1]["box_size"] = size - return args - - arg = dict(shape=(1, 512, 256, 256), nboxes_per_img=512, device="cuda") - benchmark(func, "cuda_roialign", gen_args(arg), num_iters=20, warmup_iters=1) - arg.update({"device": "cpu", "shape": (1, 256, 128, 128)}) - benchmark(func, "cpu_roialign", gen_args(arg), num_iters=5, warmup_iters=1) - - -if __name__ == "__main__": - if torch.cuda.is_available(): - benchmark_roi_align() - unittest.main() diff --git a/detectron2/tests/layers/test_roi_align_rotated.py b/detectron2/tests/layers/test_roi_align_rotated.py deleted file mode 100644 index 7323d7d5a86816f337571221313c428238c439f4..0000000000000000000000000000000000000000 --- a/detectron2/tests/layers/test_roi_align_rotated.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import unittest -import cv2 -import torch -from torch.autograd import Variable, gradcheck - -from detectron2.layers.roi_align import ROIAlign -from detectron2.layers.roi_align_rotated import ROIAlignRotated - -logger = logging.getLogger(__name__) - - -class ROIAlignRotatedTest(unittest.TestCase): - def _box_to_rotated_box(self, box, angle): - return [ - (box[0] + box[2]) / 2.0, - (box[1] + box[3]) / 2.0, - box[2] - box[0], - box[3] - box[1], - angle, - ] - - def _rot90(self, img, num): - num = num % 4 # note: -1 % 4 == 3 - for _ in range(num): - img = img.transpose(0, 1).flip(0) - return img - - def test_forward_output_0_90_180_270(self): - for i in range(4): - # i = 0, 1, 2, 3 corresponding to 0, 90, 180, 270 degrees - img = torch.arange(25, dtype=torch.float32).reshape(5, 5) - """ - 0 1 2 3 4 - 5 6 7 8 9 - 10 11 12 13 14 - 15 16 17 18 19 - 20 21 22 23 24 - """ - box = [1, 1, 3, 3] - rotated_box = self._box_to_rotated_box(box=box, angle=90 * i) - - result = self._simple_roi_align_rotated(img=img, box=rotated_box, resolution=(4, 4)) - - # Here's an explanation for 0 degree case: - # point 0 in the original input lies at [0.5, 0.5] - # (the center of bin [0, 1] x [0, 1]) - # point 1 in the original input lies at [1.5, 0.5], etc. - # since the resolution is (4, 4) that divides [1, 3] x [1, 3] - # into 4 x 4 equal bins, - # the top-left bin is [1, 1.5] x [1, 1.5], and its center - # (1.25, 1.25) lies at the 3/4 position - # between point 0 and point 1, point 5 and point 6, - # point 0 and point 5, point 1 and point 6, so it can be calculated as - # 0.25*(0*0.25+1*0.75)+(5*0.25+6*0.75)*0.75 = 4.5 - result_expected = torch.tensor( - [ - [4.5, 5.0, 5.5, 6.0], - [7.0, 7.5, 8.0, 8.5], - [9.5, 10.0, 10.5, 11.0], - [12.0, 12.5, 13.0, 13.5], - ] - ) - # This is also an upsampled version of [[6, 7], [11, 12]] - - # When the box is rotated by 90 degrees CCW, - # the result would be rotated by 90 degrees CW, thus it's -i here - result_expected = self._rot90(result_expected, -i) - - assert torch.allclose(result, result_expected) - - def test_resize(self): - H, W = 30, 30 - input = torch.rand(H, W) * 100 - box = [10, 10, 20, 20] - rotated_box = self._box_to_rotated_box(box, angle=0) - output = self._simple_roi_align_rotated(img=input, box=rotated_box, resolution=(5, 5)) - - input2x = cv2.resize(input.numpy(), (W // 2, H // 2), interpolation=cv2.INTER_LINEAR) - input2x = torch.from_numpy(input2x) - box2x = [x / 2 for x in box] - rotated_box2x = self._box_to_rotated_box(box2x, angle=0) - output2x = self._simple_roi_align_rotated(img=input2x, box=rotated_box2x, resolution=(5, 5)) - assert torch.allclose(output2x, output) - - def _simple_roi_align_rotated(self, img, box, resolution): - """ - RoiAlignRotated with scale 1.0 and 0 sample ratio. - """ - op = ROIAlignRotated(output_size=resolution, spatial_scale=1.0, sampling_ratio=0) - input = img[None, None, :, :] - - rois = [0] + list(box) - rois = torch.tensor(rois, dtype=torch.float32)[None, :] - result_cpu = op.forward(input, rois) - if torch.cuda.is_available(): - result_cuda = op.forward(input.cuda(), rois.cuda()) - assert torch.allclose(result_cpu, result_cuda.cpu()) - return result_cpu[0, 0] - - def test_empty_box(self): - img = torch.rand(5, 5) - out = self._simple_roi_align_rotated(img, [2, 3, 0, 0, 0], (7, 7)) - self.assertTrue((out == 0).all()) - - def test_roi_align_rotated_gradcheck_cpu(self): - dtype = torch.float64 - device = torch.device("cpu") - roi_align_rotated_op = ROIAlignRotated( - output_size=(5, 5), spatial_scale=0.5, sampling_ratio=1 - ).to(dtype=dtype, device=device) - x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True) - # roi format is (batch index, x_center, y_center, width, height, angle) - rois = torch.tensor( - [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]], - dtype=dtype, - device=device, - ) - - def func(input): - return roi_align_rotated_op(input, rois) - - assert gradcheck(func, (x,)), "gradcheck failed for RoIAlignRotated CPU" - assert gradcheck(func, (x.transpose(2, 3),)), "gradcheck failed for RoIAlignRotated CPU" - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_roi_align_rotated_gradient_cuda(self): - """ - Compute gradients for ROIAlignRotated with multiple bounding boxes on the GPU, - and compare the result with ROIAlign - """ - # torch.manual_seed(123) - dtype = torch.float64 - device = torch.device("cuda") - pool_h, pool_w = (5, 5) - - roi_align = ROIAlign(output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to( - device=device - ) - - roi_align_rotated = ROIAlignRotated( - output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2 - ).to(device=device) - - x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True) - # x_rotated = x.clone() won't work (will lead to grad_fun=CloneBackward)! - x_rotated = Variable(x.data.clone(), requires_grad=True) - - # roi_rotated format is (batch index, x_center, y_center, width, height, angle) - rois_rotated = torch.tensor( - [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]], - dtype=dtype, - device=device, - ) - - y_rotated = roi_align_rotated(x_rotated, rois_rotated) - s_rotated = y_rotated.sum() - s_rotated.backward() - - # roi format is (batch index, x1, y1, x2, y2) - rois = torch.tensor( - [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9]], dtype=dtype, device=device - ) - - y = roi_align(x, rois) - s = y.sum() - s.backward() - - assert torch.allclose( - x.grad, x_rotated.grad - ), "gradients for ROIAlign and ROIAlignRotated mismatch on CUDA" - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/modeling/__init__.py b/detectron2/tests/modeling/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/detectron2/tests/modeling/test_anchor_generator.py b/detectron2/tests/modeling/test_anchor_generator.py deleted file mode 100644 index 13a808e587382216da6fe7ee957603f448172657..0000000000000000000000000000000000000000 --- a/detectron2/tests/modeling/test_anchor_generator.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import unittest -import torch - -from detectron2.config import get_cfg -from detectron2.layers import ShapeSpec -from detectron2.modeling.anchor_generator import DefaultAnchorGenerator, RotatedAnchorGenerator - -logger = logging.getLogger(__name__) - - -class TestAnchorGenerator(unittest.TestCase): - def test_default_anchor_generator(self): - cfg = get_cfg() - cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] - cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]] - - anchor_generator = DefaultAnchorGenerator(cfg, [ShapeSpec(stride=4)]) - - # only the last two dimensions of features matter here - num_images = 2 - features = {"stage3": torch.rand(num_images, 96, 1, 2)} - anchors = anchor_generator([features["stage3"]]) - expected_anchor_tensor = torch.tensor( - [ - [-32.0, -8.0, 32.0, 8.0], - [-16.0, -16.0, 16.0, 16.0], - [-8.0, -32.0, 8.0, 32.0], - [-64.0, -16.0, 64.0, 16.0], - [-32.0, -32.0, 32.0, 32.0], - [-16.0, -64.0, 16.0, 64.0], - [-28.0, -8.0, 36.0, 8.0], # -28.0 == -32.0 + STRIDE (4) - [-12.0, -16.0, 20.0, 16.0], - [-4.0, -32.0, 12.0, 32.0], - [-60.0, -16.0, 68.0, 16.0], - [-28.0, -32.0, 36.0, 32.0], - [-12.0, -64.0, 20.0, 64.0], - ] - ) - - self.assertTrue(torch.allclose(anchors[0].tensor, expected_anchor_tensor)) - - def test_default_anchor_generator_centered(self): - # test explicit args - anchor_generator = DefaultAnchorGenerator( - sizes=[32, 64], aspect_ratios=[0.25, 1, 4], strides=[4] - ) - - # only the last two dimensions of features matter here - num_images = 2 - features = {"stage3": torch.rand(num_images, 96, 1, 2)} - expected_anchor_tensor = torch.tensor( - [ - [-30.0, -6.0, 34.0, 10.0], - [-14.0, -14.0, 18.0, 18.0], - [-6.0, -30.0, 10.0, 34.0], - [-62.0, -14.0, 66.0, 18.0], - [-30.0, -30.0, 34.0, 34.0], - [-14.0, -62.0, 18.0, 66.0], - [-26.0, -6.0, 38.0, 10.0], - [-10.0, -14.0, 22.0, 18.0], - [-2.0, -30.0, 14.0, 34.0], - [-58.0, -14.0, 70.0, 18.0], - [-26.0, -30.0, 38.0, 34.0], - [-10.0, -62.0, 22.0, 66.0], - ] - ) - - anchors = anchor_generator([features["stage3"]]) - self.assertTrue(torch.allclose(anchors[0].tensor, expected_anchor_tensor)) - - anchors = torch.jit.script(anchor_generator)([features["stage3"]]) - self.assertTrue(torch.allclose(anchors[0].tensor, expected_anchor_tensor)) - - def test_rrpn_anchor_generator(self): - cfg = get_cfg() - cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] - cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]] - cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [0, 45] # test single list[float] - anchor_generator = RotatedAnchorGenerator(cfg, [ShapeSpec(stride=4)]) - - # only the last two dimensions of features matter here - num_images = 2 - features = {"stage3": torch.rand(num_images, 96, 1, 2)} - anchors = anchor_generator([features["stage3"]]) - expected_anchor_tensor = torch.tensor( - [ - [0.0, 0.0, 64.0, 16.0, 0.0], - [0.0, 0.0, 64.0, 16.0, 45.0], - [0.0, 0.0, 32.0, 32.0, 0.0], - [0.0, 0.0, 32.0, 32.0, 45.0], - [0.0, 0.0, 16.0, 64.0, 0.0], - [0.0, 0.0, 16.0, 64.0, 45.0], - [0.0, 0.0, 128.0, 32.0, 0.0], - [0.0, 0.0, 128.0, 32.0, 45.0], - [0.0, 0.0, 64.0, 64.0, 0.0], - [0.0, 0.0, 64.0, 64.0, 45.0], - [0.0, 0.0, 32.0, 128.0, 0.0], - [0.0, 0.0, 32.0, 128.0, 45.0], - [4.0, 0.0, 64.0, 16.0, 0.0], # 4.0 == 0.0 + STRIDE (4) - [4.0, 0.0, 64.0, 16.0, 45.0], - [4.0, 0.0, 32.0, 32.0, 0.0], - [4.0, 0.0, 32.0, 32.0, 45.0], - [4.0, 0.0, 16.0, 64.0, 0.0], - [4.0, 0.0, 16.0, 64.0, 45.0], - [4.0, 0.0, 128.0, 32.0, 0.0], - [4.0, 0.0, 128.0, 32.0, 45.0], - [4.0, 0.0, 64.0, 64.0, 0.0], - [4.0, 0.0, 64.0, 64.0, 45.0], - [4.0, 0.0, 32.0, 128.0, 0.0], - [4.0, 0.0, 32.0, 128.0, 45.0], - ] - ) - - self.assertTrue(torch.allclose(anchors[0].tensor, expected_anchor_tensor)) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/modeling/test_backbone.py b/detectron2/tests/modeling/test_backbone.py deleted file mode 100644 index 3bb100f9bd5b4939e4646821c5a60d51c8ea65fd..0000000000000000000000000000000000000000 --- a/detectron2/tests/modeling/test_backbone.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import unittest -import torch - -import detectron2.export.torchscript # apply patch # noqa -from detectron2 import model_zoo -from detectron2.config import get_cfg -from detectron2.layers import ShapeSpec -from detectron2.modeling.backbone import build_resnet_backbone -from detectron2.modeling.backbone.fpn import build_resnet_fpn_backbone - - -class TestBackBone(unittest.TestCase): - def test_resnet_scriptability(self): - cfg = get_cfg() - resnet = build_resnet_backbone(cfg, ShapeSpec(channels=3)) - - scripted_resnet = torch.jit.script(resnet) - - inp = torch.rand(2, 3, 100, 100) - out1 = resnet(inp)["res4"] - out2 = scripted_resnet(inp)["res4"] - self.assertTrue(torch.allclose(out1, out2)) - - def test_fpn_scriptability(self): - cfg = model_zoo.get_config("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml") - bb = build_resnet_fpn_backbone(cfg, ShapeSpec(channels=3)) - bb_s = torch.jit.script(bb) - - inp = torch.rand(2, 3, 128, 128) - out1 = bb(inp)["p5"] - out2 = bb_s(inp)["p5"] - self.assertTrue(torch.allclose(out1, out2)) diff --git a/detectron2/tests/modeling/test_box2box_transform.py b/detectron2/tests/modeling/test_box2box_transform.py deleted file mode 100644 index fd3a7b79b6b7a3608ad7cb3918de020a5a600d2f..0000000000000000000000000000000000000000 --- a/detectron2/tests/modeling/test_box2box_transform.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import unittest -import torch - -from detectron2.modeling.box_regression import ( - Box2BoxTransform, - Box2BoxTransformLinear, - Box2BoxTransformRotated, -) -from detectron2.utils.testing import random_boxes - -logger = logging.getLogger(__name__) - - -class TestBox2BoxTransform(unittest.TestCase): - def test_reconstruction(self): - weights = (5, 5, 10, 10) - b2b_tfm = Box2BoxTransform(weights=weights) - src_boxes = random_boxes(10) - dst_boxes = random_boxes(10) - - devices = [torch.device("cpu")] - if torch.cuda.is_available(): - devices.append(torch.device("cuda")) - for device in devices: - src_boxes = src_boxes.to(device=device) - dst_boxes = dst_boxes.to(device=device) - deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes) - dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes) - self.assertTrue(torch.allclose(dst_boxes, dst_boxes_reconstructed)) - - def test_apply_deltas_tracing(self): - weights = (5, 5, 10, 10) - b2b_tfm = Box2BoxTransform(weights=weights) - - with torch.no_grad(): - func = torch.jit.trace(b2b_tfm.apply_deltas, (torch.randn(10, 20), torch.randn(10, 4))) - - o = func(torch.randn(10, 20), torch.randn(10, 4)) - self.assertEqual(o.shape, (10, 20)) - o = func(torch.randn(5, 20), torch.randn(5, 4)) - self.assertEqual(o.shape, (5, 20)) - - -def random_rotated_boxes(mean_box, std_length, std_angle, N): - return torch.cat( - [torch.rand(N, 4) * std_length, torch.rand(N, 1) * std_angle], dim=1 - ) + torch.tensor(mean_box, dtype=torch.float) - - -class TestBox2BoxTransformRotated(unittest.TestCase): - def test_reconstruction(self): - weights = (5, 5, 10, 10, 1) - b2b_transform = Box2BoxTransformRotated(weights=weights) - src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) - dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) - - devices = [torch.device("cpu")] - if torch.cuda.is_available(): - devices.append(torch.device("cuda")) - for device in devices: - src_boxes = src_boxes.to(device=device) - dst_boxes = dst_boxes.to(device=device) - deltas = b2b_transform.get_deltas(src_boxes, dst_boxes) - dst_boxes_reconstructed = b2b_transform.apply_deltas(deltas, src_boxes) - assert torch.allclose(dst_boxes[:, :4], dst_boxes_reconstructed[:, :4], atol=1e-5) - # angle difference has to be normalized - assert torch.allclose( - (dst_boxes[:, 4] - dst_boxes_reconstructed[:, 4] + 180.0) % 360.0 - 180.0, - torch.zeros_like(dst_boxes[:, 4]), - atol=1e-4, - ) - - -class TestBox2BoxTransformLinear(unittest.TestCase): - def test_reconstruction(self): - b2b_tfm = Box2BoxTransformLinear() - src_boxes = random_boxes(10) - dst_boxes = torch.tensor([0, 0, 101, 101] * 10).reshape(10, 4).float() - - devices = [torch.device("cpu")] - if torch.cuda.is_available(): - devices.append(torch.device("cuda")) - for device in devices: - src_boxes = src_boxes.to(device=device) - dst_boxes = dst_boxes.to(device=device) - deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes) - dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes) - self.assertTrue(torch.allclose(dst_boxes, dst_boxes_reconstructed, atol=1e-3)) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/modeling/test_fast_rcnn.py b/detectron2/tests/modeling/test_fast_rcnn.py deleted file mode 100644 index e29b944bffca1ccbf5b02be59a753f3188d90a4f..0000000000000000000000000000000000000000 --- a/detectron2/tests/modeling/test_fast_rcnn.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import unittest -import torch - -from detectron2.layers import ShapeSpec -from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated -from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers -from detectron2.modeling.roi_heads.rotated_fast_rcnn import RotatedFastRCNNOutputLayers -from detectron2.structures import Boxes, Instances, RotatedBoxes -from detectron2.utils.events import EventStorage - -logger = logging.getLogger(__name__) - - -class FastRCNNTest(unittest.TestCase): - def test_fast_rcnn(self): - torch.manual_seed(132) - - box_head_output_size = 8 - - box_predictor = FastRCNNOutputLayers( - ShapeSpec(channels=box_head_output_size), - box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)), - num_classes=5, - ) - feature_pooled = torch.rand(2, box_head_output_size) - predictions = box_predictor(feature_pooled) - - proposal_boxes = torch.tensor([[0.8, 1.1, 3.2, 2.8], [2.3, 2.5, 7, 8]], dtype=torch.float32) - gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) - proposal = Instances((10, 10)) - proposal.proposal_boxes = Boxes(proposal_boxes) - proposal.gt_boxes = Boxes(gt_boxes) - proposal.gt_classes = torch.tensor([1, 2]) - - with EventStorage(): # capture events in a new storage to discard them - losses = box_predictor.losses(predictions, [proposal]) - - expected_losses = { - "loss_cls": torch.tensor(1.7951188087), - "loss_box_reg": torch.tensor(4.0357131958), - } - for name in expected_losses.keys(): - assert torch.allclose(losses[name], expected_losses[name]) - - def test_fast_rcnn_empty_batch(self, device="cpu"): - box_predictor = FastRCNNOutputLayers( - ShapeSpec(channels=10), - box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)), - num_classes=8, - ).to(device=device) - - logits = torch.randn(0, 100, requires_grad=True, device=device) - deltas = torch.randn(0, 4, requires_grad=True, device=device) - losses = box_predictor.losses([logits, deltas], []) - for value in losses.values(): - self.assertTrue(torch.allclose(value, torch.zeros_like(value))) - sum(losses.values()).backward() - self.assertTrue(logits.grad is not None) - self.assertTrue(deltas.grad is not None) - - predictions, _ = box_predictor.inference([logits, deltas], []) - self.assertEqual(len(predictions), 0) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_fast_rcnn_empty_batch_cuda(self): - self.test_fast_rcnn_empty_batch(device=torch.device("cuda")) - - def test_fast_rcnn_rotated(self): - torch.manual_seed(132) - box_head_output_size = 8 - - box_predictor = RotatedFastRCNNOutputLayers( - ShapeSpec(channels=box_head_output_size), - box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5, 1)), - num_classes=5, - ) - feature_pooled = torch.rand(2, box_head_output_size) - predictions = box_predictor(feature_pooled) - proposal_boxes = torch.tensor( - [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32 - ) - gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) - proposal = Instances((10, 10)) - proposal.proposal_boxes = RotatedBoxes(proposal_boxes) - proposal.gt_boxes = RotatedBoxes(gt_boxes) - proposal.gt_classes = torch.tensor([1, 2]) - - with EventStorage(): # capture events in a new storage to discard them - losses = box_predictor.losses(predictions, [proposal]) - - # Note: the expected losses are slightly different even if - # the boxes are essentially the same as in the FastRCNNOutput test, because - # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization - # between the two cases. - expected_losses = { - "loss_cls": torch.tensor(1.7920907736), - "loss_box_reg": torch.tensor(4.0410838127), - } - for name in expected_losses.keys(): - assert torch.allclose(losses[name], expected_losses[name]) - - def test_predict_boxes_tracing(self): - class Model(torch.nn.Module): - def __init__(self, output_layer): - super(Model, self).__init__() - self._output_layer = output_layer - - def forward(self, proposal_deltas, proposal_boxes): - instances = Instances((10, 10)) - instances.proposal_boxes = Boxes(proposal_boxes) - return self._output_layer.predict_boxes((None, proposal_deltas), [instances]) - - box_head_output_size = 8 - - box_predictor = FastRCNNOutputLayers( - ShapeSpec(channels=box_head_output_size), - box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)), - num_classes=5, - ) - - model = Model(box_predictor) - - from detectron2.export.torchscript_patch import patch_builtin_len - - with torch.no_grad(), patch_builtin_len(): - func = torch.jit.trace(model, (torch.randn(10, 20), torch.randn(10, 4))) - - o = func(torch.randn(10, 20), torch.randn(10, 4)) - self.assertEqual(o[0].shape, (10, 20)) - o = func(torch.randn(5, 20), torch.randn(5, 4)) - self.assertEqual(o[0].shape, (5, 20)) - o = func(torch.randn(20, 20), torch.randn(20, 4)) - self.assertEqual(o[0].shape, (20, 20)) - - def test_predict_probs_tracing(self): - class Model(torch.nn.Module): - def __init__(self, output_layer): - super(Model, self).__init__() - self._output_layer = output_layer - - def forward(self, scores, proposal_boxes): - instances = Instances((10, 10)) - instances.proposal_boxes = Boxes(proposal_boxes) - return self._output_layer.predict_probs((scores, None), [instances]) - - box_head_output_size = 8 - - box_predictor = FastRCNNOutputLayers( - ShapeSpec(channels=box_head_output_size), - box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)), - num_classes=5, - ) - - model = Model(box_predictor) - - from detectron2.export.torchscript_patch import patch_builtin_len - - with torch.no_grad(), patch_builtin_len(): - func = torch.jit.trace(model, (torch.randn(10, 6), torch.rand(10, 4))) - o = func(torch.randn(10, 6), torch.randn(10, 4)) - self.assertEqual(o[0].shape, (10, 6)) - o = func(torch.randn(5, 6), torch.randn(5, 4)) - self.assertEqual(o[0].shape, (5, 6)) - o = func(torch.randn(20, 6), torch.randn(20, 4)) - self.assertEqual(o[0].shape, (20, 6)) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/modeling/test_matcher.py b/detectron2/tests/modeling/test_matcher.py deleted file mode 100644 index 6eb2db0c24b117337c431e9ef00a85a3bced71b9..0000000000000000000000000000000000000000 --- a/detectron2/tests/modeling/test_matcher.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import unittest -from typing import List -import torch - -from detectron2.config import get_cfg -from detectron2.modeling.matcher import Matcher - - -class TestMatcher(unittest.TestCase): - def test_scriptability(self): - cfg = get_cfg() - anchor_matcher = Matcher( - cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True - ) - match_quality_matrix = torch.tensor( - [[0.15, 0.45, 0.2, 0.6], [0.3, 0.65, 0.05, 0.1], [0.05, 0.4, 0.25, 0.4]] - ) - expected_matches = torch.tensor([1, 1, 2, 0]) - expected_match_labels = torch.tensor([-1, 1, 0, 1], dtype=torch.int8) - - matches, match_labels = anchor_matcher(match_quality_matrix) - self.assertTrue(torch.allclose(matches, expected_matches)) - self.assertTrue(torch.allclose(match_labels, expected_match_labels)) - - # nonzero_tuple must be import explicitly to let jit know what it is. - # https://github.com/pytorch/pytorch/issues/38964 - from detectron2.layers import nonzero_tuple # noqa F401 - - def f(thresholds: List[float], labels: List[int]): - return Matcher(thresholds, labels, allow_low_quality_matches=True) - - scripted_anchor_matcher = torch.jit.script(f)( - cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS - ) - matches, match_labels = scripted_anchor_matcher(match_quality_matrix) - self.assertTrue(torch.allclose(matches, expected_matches)) - self.assertTrue(torch.allclose(match_labels, expected_match_labels)) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/modeling/test_mmdet.py b/detectron2/tests/modeling/test_mmdet.py deleted file mode 100644 index a743b0b67d5ab664257040621d28c1b1b4451709..0000000000000000000000000000000000000000 --- a/detectron2/tests/modeling/test_mmdet.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import unittest - -from detectron2.layers import ShapeSpec -from detectron2.modeling.mmdet_wrapper import MMDetBackbone, MMDetDetector - -try: - import mmdet.models # noqa - - HAS_MMDET = True -except ImportError: - HAS_MMDET = False - - -@unittest.skipIf(not HAS_MMDET, "mmdet not available") -class TestMMDetWrapper(unittest.TestCase): - def test_backbone(self): - MMDetBackbone( - backbone=dict( - type="DetectoRS_ResNet", - conv_cfg=dict(type="ConvAWS"), - sac=dict(type="SAC", use_deform=True), - stage_with_sac=(False, True, True, True), - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type="BN", requires_grad=True), - norm_eval=True, - style="pytorch", - ), - neck=dict( - type="FPN", - in_channels=[256, 512, 1024, 2048], - out_channels=256, - num_outs=5, - ), - # skip pretrained model for tests - # pretrained_backbone="torchvision://resnet50", - output_shapes=[ShapeSpec(channels=256, stride=s) for s in [4, 8, 16, 32, 64]], - output_names=["p2", "p3", "p4", "p5", "p6"], - ) - - def test_detector(self): - # a basic R50 Mask R-CNN - MMDetDetector( - detector=dict( - type="MaskRCNN", - backbone=dict( - type="ResNet", - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type="BN", requires_grad=True), - norm_eval=True, - style="pytorch", - # skip pretrained model for tests - # init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')) - ), - neck=dict( - type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5 - ), - rpn_head=dict( - type="RPNHead", - in_channels=256, - feat_channels=256, - anchor_generator=dict( - type="AnchorGenerator", - scales=[8], - ratios=[0.5, 1.0, 2.0], - strides=[4, 8, 16, 32, 64], - ), - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[1.0, 1.0, 1.0, 1.0], - ), - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type="L1Loss", loss_weight=1.0), - ), - roi_head=dict( - type="StandardRoIHead", - bbox_roi_extractor=dict( - type="SingleRoIExtractor", - roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32], - ), - bbox_head=dict( - type="Shared2FCBBoxHead", - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[0.1, 0.1, 0.2, 0.2], - ), - reg_class_agnostic=False, - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type="L1Loss", loss_weight=1.0), - ), - mask_roi_extractor=dict( - type="SingleRoIExtractor", - roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32], - ), - mask_head=dict( - type="FCNMaskHead", - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=80, - loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), - ), - ), - # model training and testing settings - train_cfg=dict( - rpn=dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - match_low_quality=True, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False, - ), - allowed_border=-1, - pos_weight=-1, - debug=False, - ), - rpn_proposal=dict( - nms_pre=2000, - max_per_img=1000, - nms=dict(type="nms", iou_threshold=0.7), - min_bbox_size=0, - ), - rcnn=dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - match_low_quality=True, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True, - ), - mask_size=28, - pos_weight=-1, - debug=False, - ), - ), - test_cfg=dict( - rpn=dict( - nms_pre=1000, - max_per_img=1000, - nms=dict(type="nms", iou_threshold=0.7), - min_bbox_size=0, - ), - rcnn=dict( - score_thr=0.05, - nms=dict(type="nms", iou_threshold=0.5), - max_per_img=100, - mask_thr_binary=0.5, - ), - ), - ), - pixel_mean=[1, 2, 3], - pixel_std=[1, 2, 3], - ) diff --git a/detectron2/tests/modeling/test_model_e2e.py b/detectron2/tests/modeling/test_model_e2e.py deleted file mode 100644 index 8c07e6856d2f4304e0b0cb32747fb667e3bbcb4c..0000000000000000000000000000000000000000 --- a/detectron2/tests/modeling/test_model_e2e.py +++ /dev/null @@ -1,227 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - - -import itertools -import unittest -from contextlib import contextmanager -from copy import deepcopy -import torch - -from detectron2.structures import BitMasks, Boxes, ImageList, Instances -from detectron2.utils.events import EventStorage -from detectron2.utils.testing import get_model_no_weights - - -@contextmanager -def typecheck_hook(model, *, in_dtype=None, out_dtype=None): - """ - Check that the model must be called with the given input/output dtype - """ - if not isinstance(in_dtype, set): - in_dtype = {in_dtype} - if not isinstance(out_dtype, set): - out_dtype = {out_dtype} - - def flatten(x): - if isinstance(x, torch.Tensor): - return [x] - if isinstance(x, (list, tuple)): - return list(itertools.chain(*[flatten(t) for t in x])) - if isinstance(x, dict): - return flatten(list(x.values())) - return [] - - def hook(module, input, output): - if in_dtype is not None: - dtypes = {x.dtype for x in flatten(input)} - assert ( - dtypes == in_dtype - ), f"Expected input dtype of {type(module)} is {in_dtype}. Got {dtypes} instead!" - - if out_dtype is not None: - dtypes = {x.dtype for x in flatten(output)} - assert ( - dtypes == out_dtype - ), f"Expected output dtype of {type(module)} is {out_dtype}. Got {dtypes} instead!" - - with model.register_forward_hook(hook): - yield - - -def create_model_input(img, inst=None): - if inst is not None: - return {"image": img, "instances": inst} - else: - return {"image": img} - - -def get_empty_instance(h, w): - inst = Instances((h, w)) - inst.gt_boxes = Boxes(torch.rand(0, 4)) - inst.gt_classes = torch.tensor([]).to(dtype=torch.int64) - inst.gt_masks = BitMasks(torch.rand(0, h, w)) - return inst - - -def get_regular_bitmask_instances(h, w): - inst = Instances((h, w)) - inst.gt_boxes = Boxes(torch.rand(3, 4)) - inst.gt_boxes.tensor[:, 2:] += inst.gt_boxes.tensor[:, :2] - inst.gt_classes = torch.tensor([3, 4, 5]).to(dtype=torch.int64) - inst.gt_masks = BitMasks((torch.rand(3, h, w) > 0.5)) - return inst - - -class InstanceModelE2ETest: - def setUp(self): - torch.manual_seed(43) - self.model = get_model_no_weights(self.CONFIG_PATH) - - def _test_eval(self, input_sizes): - inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes] - self.model.eval() - self.model(inputs) - - def _test_train(self, input_sizes, instances): - assert len(input_sizes) == len(instances) - inputs = [ - create_model_input(torch.rand(3, s[0], s[1]), inst) - for s, inst in zip(input_sizes, instances) - ] - self.model.train() - with EventStorage(): - losses = self.model(inputs) - sum(losses.values()).backward() - del losses - - def _inf_tensor(self, *shape): - return 1.0 / torch.zeros(*shape, device=self.model.device) - - def _nan_tensor(self, *shape): - return torch.zeros(*shape, device=self.model.device).fill_(float("nan")) - - def test_empty_data(self): - instances = [get_empty_instance(200, 250), get_empty_instance(200, 249)] - self._test_eval([(200, 250), (200, 249)]) - self._test_train([(200, 250), (200, 249)], instances) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") - def test_eval_tocpu(self): - model = deepcopy(self.model).cpu() - model.eval() - input_sizes = [(200, 250), (200, 249)] - inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes] - model(inputs) - - -class MaskRCNNE2ETest(InstanceModelE2ETest, unittest.TestCase): - CONFIG_PATH = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" - - def test_half_empty_data(self): - instances = [get_empty_instance(200, 250), get_regular_bitmask_instances(200, 249)] - self._test_train([(200, 250), (200, 249)], instances) - - # This test is flaky because in some environment the output features are zero due to relu - # def test_rpn_inf_nan_data(self): - # self.model.eval() - # for tensor in [self._inf_tensor, self._nan_tensor]: - # images = ImageList(tensor(1, 3, 512, 512), [(510, 510)]) - # features = { - # "p2": tensor(1, 256, 256, 256), - # "p3": tensor(1, 256, 128, 128), - # "p4": tensor(1, 256, 64, 64), - # "p5": tensor(1, 256, 32, 32), - # "p6": tensor(1, 256, 16, 16), - # } - # props, _ = self.model.proposal_generator(images, features) - # self.assertEqual(len(props[0]), 0) - - def test_roiheads_inf_nan_data(self): - self.model.eval() - for tensor in [self._inf_tensor, self._nan_tensor]: - images = ImageList(tensor(1, 3, 512, 512), [(510, 510)]) - features = { - "p2": tensor(1, 256, 256, 256), - "p3": tensor(1, 256, 128, 128), - "p4": tensor(1, 256, 64, 64), - "p5": tensor(1, 256, 32, 32), - "p6": tensor(1, 256, 16, 16), - } - props = [Instances((510, 510))] - props[0].proposal_boxes = Boxes([[10, 10, 20, 20]]).to(device=self.model.device) - props[0].objectness_logits = torch.tensor([1.0]).reshape(1, 1) - det, _ = self.model.roi_heads(images, features, props) - self.assertEqual(len(det[0]), 0) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_autocast(self): - from torch.cuda.amp import autocast - - inputs = [{"image": torch.rand(3, 100, 100)}] - self.model.eval() - with autocast(), typecheck_hook( - self.model.backbone, in_dtype=torch.float32, out_dtype=torch.float16 - ), typecheck_hook( - self.model.roi_heads.box_predictor, in_dtype=torch.float16, out_dtype=torch.float16 - ): - out = self.model.inference(inputs, do_postprocess=False)[0] - self.assertEqual(out.pred_boxes.tensor.dtype, torch.float32) - self.assertEqual(out.pred_masks.dtype, torch.float16) - self.assertEqual(out.scores.dtype, torch.float32) # scores comes from softmax - - -class RetinaNetE2ETest(InstanceModelE2ETest, unittest.TestCase): - CONFIG_PATH = "COCO-Detection/retinanet_R_50_FPN_1x.yaml" - - def test_inf_nan_data(self): - self.model.eval() - self.model.score_threshold = -999999999 - for tensor in [self._inf_tensor, self._nan_tensor]: - images = ImageList(tensor(1, 3, 512, 512), [(510, 510)]) - features = [ - tensor(1, 256, 128, 128), - tensor(1, 256, 64, 64), - tensor(1, 256, 32, 32), - tensor(1, 256, 16, 16), - tensor(1, 256, 8, 8), - ] - pred_logits, pred_anchor_deltas = self.model.head(features) - pred_logits = [tensor(*x.shape) for x in pred_logits] - pred_anchor_deltas = [tensor(*x.shape) for x in pred_anchor_deltas] - det = self.model.forward_inference(images, features, [pred_logits, pred_anchor_deltas]) - # all predictions (if any) are infinite or nan - if len(det[0]): - self.assertTrue(torch.isfinite(det[0].pred_boxes.tensor).sum() == 0) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_autocast(self): - from torch.cuda.amp import autocast - - inputs = [{"image": torch.rand(3, 100, 100)}] - self.model.eval() - with autocast(), typecheck_hook( - self.model.backbone, in_dtype=torch.float32, out_dtype=torch.float16 - ), typecheck_hook(self.model.head, in_dtype=torch.float16, out_dtype=torch.float16): - out = self.model(inputs)[0]["instances"] - self.assertEqual(out.pred_boxes.tensor.dtype, torch.float32) - self.assertEqual(out.scores.dtype, torch.float16) - - -class FCOSE2ETest(InstanceModelE2ETest, unittest.TestCase): - CONFIG_PATH = "COCO-Detection/fcos_R_50_FPN_1x.py" - - -class SemSegE2ETest(unittest.TestCase): - CONFIG_PATH = "Misc/semantic_R_50_FPN_1x.yaml" - - def setUp(self): - torch.manual_seed(43) - self.model = get_model_no_weights(self.CONFIG_PATH) - - def _test_eval(self, input_sizes): - inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes] - self.model.eval() - self.model(inputs) - - def test_forward(self): - self._test_eval([(200, 250), (200, 249)]) diff --git a/detectron2/tests/modeling/test_roi_heads.py b/detectron2/tests/modeling/test_roi_heads.py deleted file mode 100644 index 86360e1e36bf2e2d969db426eb11e54318a95385..0000000000000000000000000000000000000000 --- a/detectron2/tests/modeling/test_roi_heads.py +++ /dev/null @@ -1,323 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import unittest -from copy import deepcopy -import torch -from torch import nn - -from detectron2 import model_zoo -from detectron2.config import get_cfg -from detectron2.export.torchscript_patch import ( - freeze_training_mode, - patch_builtin_len, - patch_instances, -) -from detectron2.layers import ShapeSpec -from detectron2.modeling.proposal_generator.build import build_proposal_generator -from detectron2.modeling.roi_heads import ( - FastRCNNConvFCHead, - KRCNNConvDeconvUpsampleHead, - MaskRCNNConvUpsampleHead, - StandardROIHeads, - build_roi_heads, -) -from detectron2.projects import point_rend -from detectron2.structures import BitMasks, Boxes, ImageList, Instances, RotatedBoxes -from detectron2.utils.events import EventStorage -from detectron2.utils.testing import assert_instances_allclose, random_boxes - -logger = logging.getLogger(__name__) - -""" -Make sure the losses of ROIHeads/RPN do not change, to avoid -breaking the forward logic by mistake. -This relies on assumption that pytorch's RNG is stable. -""" - - -class ROIHeadsTest(unittest.TestCase): - def test_roi_heads(self): - torch.manual_seed(121) - cfg = get_cfg() - cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" - cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 - cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" - cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5) - cfg.MODEL.MASK_ON = True - num_images = 2 - images_tensor = torch.rand(num_images, 20, 30) - image_sizes = [(10, 10), (20, 30)] - images = ImageList(images_tensor, image_sizes) - num_channels = 1024 - features = {"res4": torch.rand(num_images, num_channels, 1, 2)} - feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} - - image_shape = (15, 15) - gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) - gt_instance0 = Instances(image_shape) - gt_instance0.gt_boxes = Boxes(gt_boxes0) - gt_instance0.gt_classes = torch.tensor([2, 1]) - gt_instance0.gt_masks = BitMasks(torch.rand((2,) + image_shape) > 0.5) - gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32) - gt_instance1 = Instances(image_shape) - gt_instance1.gt_boxes = Boxes(gt_boxes1) - gt_instance1.gt_classes = torch.tensor([1, 2]) - gt_instance1.gt_masks = BitMasks(torch.rand((2,) + image_shape) > 0.5) - gt_instances = [gt_instance0, gt_instance1] - - proposal_generator = build_proposal_generator(cfg, feature_shape) - roi_heads = StandardROIHeads(cfg, feature_shape) - - with EventStorage(): # capture events in a new storage to discard them - proposals, proposal_losses = proposal_generator(images, features, gt_instances) - _, detector_losses = roi_heads(images, features, proposals, gt_instances) - - detector_losses.update(proposal_losses) - expected_losses = { - "loss_cls": 4.5253729820251465, - "loss_box_reg": 0.009785720147192478, - "loss_mask": 0.693184494972229, - "loss_rpn_cls": 0.08186662942171097, - "loss_rpn_loc": 0.1104838103055954, - } - succ = all( - torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0))) - for name in detector_losses.keys() - ) - self.assertTrue( - succ, - "Losses has changed! New losses: {}".format( - {k: v.item() for k, v in detector_losses.items()} - ), - ) - - def test_rroi_heads(self): - torch.manual_seed(121) - cfg = get_cfg() - cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" - cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" - cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" - cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" - cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 - cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) - cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" - cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" - cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) - num_images = 2 - images_tensor = torch.rand(num_images, 20, 30) - image_sizes = [(10, 10), (20, 30)] - images = ImageList(images_tensor, image_sizes) - num_channels = 1024 - features = {"res4": torch.rand(num_images, num_channels, 1, 2)} - feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} - - image_shape = (15, 15) - gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) - gt_instance0 = Instances(image_shape) - gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) - gt_instance0.gt_classes = torch.tensor([2, 1]) - gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) - gt_instance1 = Instances(image_shape) - gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) - gt_instance1.gt_classes = torch.tensor([1, 2]) - gt_instances = [gt_instance0, gt_instance1] - - proposal_generator = build_proposal_generator(cfg, feature_shape) - roi_heads = build_roi_heads(cfg, feature_shape) - - with EventStorage(): # capture events in a new storage to discard them - proposals, proposal_losses = proposal_generator(images, features, gt_instances) - _, detector_losses = roi_heads(images, features, proposals, gt_instances) - - detector_losses.update(proposal_losses) - expected_losses = { - "loss_cls": 4.365657806396484, - "loss_box_reg": 0.0015851043863222003, - "loss_rpn_cls": 0.2427729219198227, - "loss_rpn_loc": 0.3646621108055115, - } - succ = all( - torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0))) - for name in detector_losses.keys() - ) - self.assertTrue( - succ, - "Losses has changed! New losses: {}".format( - {k: v.item() for k, v in detector_losses.items()} - ), - ) - - def test_box_head_scriptability(self): - input_shape = ShapeSpec(channels=1024, height=14, width=14) - box_features = torch.randn(4, 1024, 14, 14) - - box_head = FastRCNNConvFCHead( - input_shape, conv_dims=[512, 512], fc_dims=[1024, 1024] - ).eval() - script_box_head = torch.jit.script(box_head) - - origin_output = box_head(box_features) - script_output = script_box_head(box_features) - self.assertTrue(torch.equal(origin_output, script_output)) - - def test_mask_head_scriptability(self): - input_shape = ShapeSpec(channels=1024) - mask_features = torch.randn(4, 1024, 14, 14) - - image_shapes = [(10, 10), (15, 15)] - pred_instance0 = Instances(image_shapes[0]) - pred_classes0 = torch.tensor([1, 2, 3], dtype=torch.int64) - pred_instance0.pred_classes = pred_classes0 - pred_instance1 = Instances(image_shapes[1]) - pred_classes1 = torch.tensor([4], dtype=torch.int64) - pred_instance1.pred_classes = pred_classes1 - - mask_head = MaskRCNNConvUpsampleHead( - input_shape, num_classes=80, conv_dims=[256, 256] - ).eval() - # pred_instance will be in-place changed during the inference - # process of `MaskRCNNConvUpsampleHead` - origin_outputs = mask_head(mask_features, deepcopy([pred_instance0, pred_instance1])) - - fields = {"pred_masks": torch.Tensor, "pred_classes": torch.Tensor} - with freeze_training_mode(mask_head), patch_instances(fields) as NewInstances: - sciript_mask_head = torch.jit.script(mask_head) - pred_instance0 = NewInstances.from_instances(pred_instance0) - pred_instance1 = NewInstances.from_instances(pred_instance1) - script_outputs = sciript_mask_head(mask_features, [pred_instance0, pred_instance1]) - - for origin_ins, script_ins in zip(origin_outputs, script_outputs): - assert_instances_allclose(origin_ins, script_ins, rtol=0) - - def test_keypoint_head_scriptability(self): - input_shape = ShapeSpec(channels=1024, height=14, width=14) - keypoint_features = torch.randn(4, 1024, 14, 14) - - image_shapes = [(10, 10), (15, 15)] - pred_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6], [1, 5, 2, 8]], dtype=torch.float32) - pred_instance0 = Instances(image_shapes[0]) - pred_instance0.pred_boxes = Boxes(pred_boxes0) - pred_boxes1 = torch.tensor([[7, 3, 10, 5]], dtype=torch.float32) - pred_instance1 = Instances(image_shapes[1]) - pred_instance1.pred_boxes = Boxes(pred_boxes1) - - keypoint_head = KRCNNConvDeconvUpsampleHead( - input_shape, num_keypoints=17, conv_dims=[512, 512] - ).eval() - origin_outputs = keypoint_head( - keypoint_features, deepcopy([pred_instance0, pred_instance1]) - ) - - fields = { - "pred_boxes": Boxes, - "pred_keypoints": torch.Tensor, - "pred_keypoint_heatmaps": torch.Tensor, - } - with freeze_training_mode(keypoint_head), patch_instances(fields) as NewInstances: - script_keypoint_head = torch.jit.script(keypoint_head) - pred_instance0 = NewInstances.from_instances(pred_instance0) - pred_instance1 = NewInstances.from_instances(pred_instance1) - script_outputs = script_keypoint_head( - keypoint_features, [pred_instance0, pred_instance1] - ) - - for origin_ins, script_ins in zip(origin_outputs, script_outputs): - assert_instances_allclose(origin_ins, script_ins, rtol=0) - - def test_StandardROIHeads_scriptability(self): - cfg = get_cfg() - cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" - cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 - cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" - cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5) - cfg.MODEL.MASK_ON = True - cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.01 - cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 - num_images = 2 - images_tensor = torch.rand(num_images, 20, 30) - image_sizes = [(10, 10), (20, 30)] - images = ImageList(images_tensor, image_sizes) - num_channels = 1024 - features = {"res4": torch.rand(num_images, num_channels, 1, 2)} - feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} - - roi_heads = StandardROIHeads(cfg, feature_shape).eval() - - proposal0 = Instances(image_sizes[0]) - proposal_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) - proposal0.proposal_boxes = Boxes(proposal_boxes0) - proposal0.objectness_logits = torch.tensor([0.5, 0.7], dtype=torch.float32) - - proposal1 = Instances(image_sizes[1]) - proposal_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32) - proposal1.proposal_boxes = Boxes(proposal_boxes1) - proposal1.objectness_logits = torch.tensor([0.1, 0.9], dtype=torch.float32) - proposals = [proposal0, proposal1] - - pred_instances, _ = roi_heads(images, features, proposals) - fields = { - "objectness_logits": torch.Tensor, - "proposal_boxes": Boxes, - "pred_classes": torch.Tensor, - "scores": torch.Tensor, - "pred_masks": torch.Tensor, - "pred_boxes": Boxes, - "pred_keypoints": torch.Tensor, - "pred_keypoint_heatmaps": torch.Tensor, - } - with freeze_training_mode(roi_heads), patch_instances(fields) as new_instances: - proposal0 = new_instances.from_instances(proposal0) - proposal1 = new_instances.from_instances(proposal1) - proposals = [proposal0, proposal1] - scripted_rot_heads = torch.jit.script(roi_heads) - scripted_pred_instances, _ = scripted_rot_heads(images, features, proposals) - - for instance, scripted_instance in zip(pred_instances, scripted_pred_instances): - assert_instances_allclose(instance, scripted_instance, rtol=0) - - def test_PointRend_mask_head_tracing(self): - cfg = model_zoo.get_config("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml") - point_rend.add_pointrend_config(cfg) - cfg.MODEL.ROI_HEADS.IN_FEATURES = ["p2", "p3"] - cfg.MODEL.ROI_MASK_HEAD.NAME = "PointRendMaskHead" - cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "" - cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON = True - chan = 256 - head = point_rend.PointRendMaskHead( - cfg, - { - "p2": ShapeSpec(channels=chan, stride=4), - "p3": ShapeSpec(channels=chan, stride=8), - }, - ) - - def gen_inputs(h, w, N): - p2 = torch.rand(1, chan, h, w) - p3 = torch.rand(1, chan, h // 2, w // 2) - boxes = random_boxes(N, max_coord=h) - return p2, p3, boxes - - class Wrap(nn.ModuleDict): - def forward(self, p2, p3, boxes): - features = { - "p2": p2, - "p3": p3, - } - inst = Instances((p2.shape[2] * 4, p2.shape[3] * 4)) - inst.pred_boxes = Boxes(boxes) - inst.pred_classes = torch.zeros(inst.__len__(), dtype=torch.long) - out = self.head(features, [inst])[0] - return out.pred_masks - - model = Wrap({"head": head}) - model.eval() - with torch.no_grad(), patch_builtin_len(): - traced = torch.jit.trace(model, gen_inputs(302, 208, 20)) - inputs = gen_inputs(100, 120, 30) - out_eager = model(*inputs) - out_trace = traced(*inputs) - self.assertTrue(torch.allclose(out_eager, out_trace)) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/modeling/test_roi_pooler.py b/detectron2/tests/modeling/test_roi_pooler.py deleted file mode 100644 index e1d7c1c689cad32d8b8566e5d497341a5f3f5a36..0000000000000000000000000000000000000000 --- a/detectron2/tests/modeling/test_roi_pooler.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import unittest -import torch - -from detectron2.modeling.poolers import ROIPooler -from detectron2.structures import Boxes, RotatedBoxes -from detectron2.utils.testing import random_boxes - -logger = logging.getLogger(__name__) - - -class TestROIPooler(unittest.TestCase): - def _test_roialignv2_roialignrotated_match(self, device): - pooler_resolution = 14 - canonical_level = 4 - canonical_scale_factor = 2**canonical_level - pooler_scales = (1.0 / canonical_scale_factor,) - sampling_ratio = 0 - - N, C, H, W = 2, 4, 10, 8 - N_rois = 10 - std = 11 - mean = 0 - feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean - - features = [feature.to(device)] - - rois = [] - rois_rotated = [] - for _ in range(N): - boxes = random_boxes(N_rois, W * canonical_scale_factor) - rotated_boxes = torch.zeros(N_rois, 5) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] - rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] - rois.append(Boxes(boxes).to(device)) - rois_rotated.append(RotatedBoxes(rotated_boxes).to(device)) - - roialignv2_pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type="ROIAlignV2", - ) - - roialignv2_out = roialignv2_pooler(features, rois) - - roialignrotated_pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type="ROIAlignRotated", - ) - - roialignrotated_out = roialignrotated_pooler(features, rois_rotated) - - self.assertTrue(torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4)) - - def test_roialignv2_roialignrotated_match_cpu(self): - self._test_roialignv2_roialignrotated_match(device="cpu") - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_roialignv2_roialignrotated_match_cuda(self): - self._test_roialignv2_roialignrotated_match(device="cuda") - - def _test_scriptability(self, device): - pooler_resolution = 14 - canonical_level = 4 - canonical_scale_factor = 2**canonical_level - pooler_scales = (1.0 / canonical_scale_factor,) - sampling_ratio = 0 - - N, C, H, W = 2, 4, 10, 8 - N_rois = 10 - std = 11 - mean = 0 - feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean - - features = [feature.to(device)] - - rois = [] - for _ in range(N): - boxes = random_boxes(N_rois, W * canonical_scale_factor) - - rois.append(Boxes(boxes).to(device)) - - roialignv2_pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type="ROIAlignV2", - ) - - roialignv2_out = roialignv2_pooler(features, rois) - scripted_roialignv2_out = torch.jit.script(roialignv2_pooler)(features, rois) - self.assertTrue(torch.equal(roialignv2_out, scripted_roialignv2_out)) - - def test_scriptability_cpu(self): - self._test_scriptability(device="cpu") - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_scriptability_gpu(self): - self._test_scriptability(device="cuda") - - def test_no_images(self): - N, C, H, W = 0, 32, 32, 32 - feature = torch.rand(N, C, H, W) - 0.5 - features = [feature] - pooler = ROIPooler( - output_size=14, scales=(1.0,), sampling_ratio=0.0, pooler_type="ROIAlignV2" - ) - output = pooler.forward(features, []) - self.assertEqual(output.shape, (0, C, 14, 14)) - - def test_roi_pooler_tracing(self): - class Model(torch.nn.Module): - def __init__(self, roi): - super(Model, self).__init__() - self.roi = roi - - def forward(self, x, boxes): - return self.roi(x, [Boxes(boxes)]) - - pooler_resolution = 14 - canonical_level = 4 - canonical_scale_factor = 2**canonical_level - pooler_scales = (1.0 / canonical_scale_factor, 0.5 / canonical_scale_factor) - sampling_ratio = 0 - - N, C, H, W = 1, 4, 10, 8 - N_rois = 10 - std = 11 - mean = 0 - feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean - feature = [feature, feature] - - rois = random_boxes(N_rois, W * canonical_scale_factor) - # Add one larger box so that this level has only one box. - # This may trigger the bug https://github.com/pytorch/pytorch/issues/49852 - # that we shall workaround. - rois = torch.cat([rois, torch.tensor([[0, 0, 448, 448]])]) - - model = Model( - ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type="ROIAlign", - ) - ) - - with torch.no_grad(): - func = torch.jit.trace(model, (feature, rois)) - o = func(feature, rois) - self.assertEqual(o.shape, (11, 4, 14, 14)) - o = func(feature, rois[:5]) - self.assertEqual(o.shape, (5, 4, 14, 14)) - o = func(feature, random_boxes(20, W * canonical_scale_factor)) - self.assertEqual(o.shape, (20, 4, 14, 14)) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/modeling/test_rpn.py b/detectron2/tests/modeling/test_rpn.py deleted file mode 100644 index f14faae56e580d3d4762d31273b9f65c5774346b..0000000000000000000000000000000000000000 --- a/detectron2/tests/modeling/test_rpn.py +++ /dev/null @@ -1,262 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import unittest -import torch - -from detectron2.config import get_cfg -from detectron2.export import scripting_with_instances -from detectron2.layers import ShapeSpec -from detectron2.modeling.backbone import build_backbone -from detectron2.modeling.proposal_generator import RPN, build_proposal_generator -from detectron2.modeling.proposal_generator.proposal_utils import ( - add_ground_truth_to_proposals, - find_top_rpn_proposals, -) -from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes -from detectron2.utils.events import EventStorage - -logger = logging.getLogger(__name__) - - -class RPNTest(unittest.TestCase): - def get_gt_and_features(self): - num_images = 2 - images_tensor = torch.rand(num_images, 20, 30) - image_sizes = [(10, 10), (20, 30)] - images = ImageList(images_tensor, image_sizes) - image_shape = (15, 15) - num_channels = 1024 - features = {"res4": torch.rand(num_images, num_channels, 1, 2)} - gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) - gt_instances = Instances(image_shape) - gt_instances.gt_boxes = Boxes(gt_boxes) - return (gt_instances, features, images, image_sizes) - - def test_rpn(self): - torch.manual_seed(121) - cfg = get_cfg() - backbone = build_backbone(cfg) - proposal_generator = RPN(cfg, backbone.output_shape()) - (gt_instances, features, images, image_sizes) = self.get_gt_and_features() - with EventStorage(): # capture events in a new storage to discard them - proposals, proposal_losses = proposal_generator( - images, features, [gt_instances[0], gt_instances[1]] - ) - - expected_losses = { - "loss_rpn_cls": torch.tensor(0.08011703193), - "loss_rpn_loc": torch.tensor(0.101470276), - } - for name in expected_losses.keys(): - err_msg = "proposal_losses[{}] = {}, expected losses = {}".format( - name, proposal_losses[name], expected_losses[name] - ) - self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg) - - self.assertEqual(len(proposals), len(image_sizes)) - for proposal, im_size in zip(proposals, image_sizes): - self.assertEqual(proposal.image_size, im_size) - - expected_proposal_box = torch.tensor([[0, 0, 10, 10], [7.2702, 0, 10, 10]]) - expected_objectness_logit = torch.tensor([0.1596, -0.0007]) - self.assertTrue( - torch.allclose(proposals[0].proposal_boxes.tensor, expected_proposal_box, atol=1e-4) - ) - self.assertTrue( - torch.allclose(proposals[0].objectness_logits, expected_objectness_logit, atol=1e-4) - ) - - def verify_rpn(self, conv_dims, expected_conv_dims): - torch.manual_seed(121) - cfg = get_cfg() - cfg.MODEL.RPN.CONV_DIMS = conv_dims - backbone = build_backbone(cfg) - proposal_generator = RPN(cfg, backbone.output_shape()) - for k, conv in enumerate(proposal_generator.rpn_head.conv): - self.assertEqual(expected_conv_dims[k], conv.out_channels) - return proposal_generator - - def test_rpn_larger_num_convs(self): - conv_dims = [64, 64, 64, 64, 64] - proposal_generator = self.verify_rpn(conv_dims, conv_dims) - (gt_instances, features, images, image_sizes) = self.get_gt_and_features() - with EventStorage(): # capture events in a new storage to discard them - proposals, proposal_losses = proposal_generator( - images, features, [gt_instances[0], gt_instances[1]] - ) - expected_losses = { - "loss_rpn_cls": torch.tensor(0.08122821152), - "loss_rpn_loc": torch.tensor(0.10064548254), - } - for name in expected_losses.keys(): - err_msg = "proposal_losses[{}] = {}, expected losses = {}".format( - name, proposal_losses[name], expected_losses[name] - ) - self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg) - - def test_rpn_conv_dims_not_set(self): - conv_dims = [-1, -1, -1] - expected_conv_dims = [1024, 1024, 1024] - self.verify_rpn(conv_dims, expected_conv_dims) - - def test_rpn_scriptability(self): - cfg = get_cfg() - proposal_generator = RPN(cfg, {"res4": ShapeSpec(channels=1024, stride=16)}).eval() - num_images = 2 - images_tensor = torch.rand(num_images, 30, 40) - image_sizes = [(32, 32), (30, 40)] - images = ImageList(images_tensor, image_sizes) - features = {"res4": torch.rand(num_images, 1024, 1, 2)} - - fields = {"proposal_boxes": Boxes, "objectness_logits": torch.Tensor} - proposal_generator_ts = scripting_with_instances(proposal_generator, fields) - - proposals, _ = proposal_generator(images, features) - proposals_ts, _ = proposal_generator_ts(images, features) - - for proposal, proposal_ts in zip(proposals, proposals_ts): - self.assertEqual(proposal.image_size, proposal_ts.image_size) - self.assertTrue( - torch.equal(proposal.proposal_boxes.tensor, proposal_ts.proposal_boxes.tensor) - ) - self.assertTrue(torch.equal(proposal.objectness_logits, proposal_ts.objectness_logits)) - - def test_rrpn(self): - torch.manual_seed(121) - cfg = get_cfg() - cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" - cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" - cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] - cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]] - cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]] - cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) - cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" - backbone = build_backbone(cfg) - proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) - num_images = 2 - images_tensor = torch.rand(num_images, 20, 30) - image_sizes = [(10, 10), (20, 30)] - images = ImageList(images_tensor, image_sizes) - image_shape = (15, 15) - num_channels = 1024 - features = {"res4": torch.rand(num_images, num_channels, 1, 2)} - gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) - gt_instances = Instances(image_shape) - gt_instances.gt_boxes = RotatedBoxes(gt_boxes) - with EventStorage(): # capture events in a new storage to discard them - proposals, proposal_losses = proposal_generator( - images, features, [gt_instances[0], gt_instances[1]] - ) - - expected_losses = { - "loss_rpn_cls": torch.tensor(0.04291602224), - "loss_rpn_loc": torch.tensor(0.145077362), - } - for name in expected_losses.keys(): - err_msg = "proposal_losses[{}] = {}, expected losses = {}".format( - name, proposal_losses[name], expected_losses[name] - ) - self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg) - - expected_proposal_box = torch.tensor( - [ - [-1.77999556, 0.78155339, 68.04367828, 14.78156471, 60.59333801], - [13.82740974, -1.50282836, 34.67269897, 29.19676590, -3.81942749], - [8.10392570, -0.99071521, 145.39100647, 32.13126373, 3.67242432], - [5.00000000, 4.57370186, 10.00000000, 9.14740372, 0.89196777], - ] - ) - - expected_objectness_logit = torch.tensor([0.10924313, 0.09881870, 0.07649877, 0.05858029]) - - torch.set_printoptions(precision=8, sci_mode=False) - - self.assertEqual(len(proposals), len(image_sizes)) - - proposal = proposals[0] - # It seems that there's some randomness in the result across different machines: - # This test can be run on a local machine for 100 times with exactly the same result, - # However, a different machine might produce slightly different results, - # thus the atol here. - err_msg = "computed proposal boxes = {}, expected {}".format( - proposal.proposal_boxes.tensor, expected_proposal_box - ) - self.assertTrue( - torch.allclose(proposal.proposal_boxes.tensor[:4], expected_proposal_box, atol=1e-5), - err_msg, - ) - - err_msg = "computed objectness logits = {}, expected {}".format( - proposal.objectness_logits, expected_objectness_logit - ) - self.assertTrue( - torch.allclose(proposal.objectness_logits[:4], expected_objectness_logit, atol=1e-5), - err_msg, - ) - - def test_find_rpn_proposals_inf(self): - N, Hi, Wi, A = 3, 3, 3, 3 - proposals = [torch.rand(N, Hi * Wi * A, 4)] - pred_logits = [torch.rand(N, Hi * Wi * A)] - pred_logits[0][1][3:5].fill_(float("inf")) - find_top_rpn_proposals(proposals, pred_logits, [(10, 10)], 0.5, 1000, 1000, 0, False) - - def test_find_rpn_proposals_tracing(self): - N, Hi, Wi, A = 3, 50, 50, 9 - proposal = torch.rand(N, Hi * Wi * A, 4) - pred_logit = torch.rand(N, Hi * Wi * A) - - def func(proposal, logit, image_size): - r = find_top_rpn_proposals( - [proposal], [logit], [image_size], 0.7, 1000, 1000, 0, False - )[0] - size = r.image_size - if not isinstance(size, torch.Tensor): - size = torch.tensor(size) - return (size, r.proposal_boxes.tensor, r.objectness_logits) - - other_inputs = [] - # test that it generalizes to other shapes - for Hi, Wi, shp in [(30, 30, 60), (10, 10, 800)]: - other_inputs.append( - ( - torch.rand(N, Hi * Wi * A, 4), - torch.rand(N, Hi * Wi * A), - torch.tensor([shp, shp]), - ) - ) - torch.jit.trace( - func, (proposal, pred_logit, torch.tensor([100, 100])), check_inputs=other_inputs - ) - - def test_append_gt_to_proposal(self): - proposals = Instances( - (10, 10), - **{ - "proposal_boxes": Boxes(torch.empty((0, 4))), - "objectness_logits": torch.tensor([]), - "custom_attribute": torch.tensor([]), - } - ) - gt_boxes = Boxes(torch.tensor([[0, 0, 1, 1]])) - - self.assertRaises(AssertionError, add_ground_truth_to_proposals, [gt_boxes], [proposals]) - - gt_instances = Instances((10, 10)) - gt_instances.gt_boxes = gt_boxes - - self.assertRaises( - AssertionError, add_ground_truth_to_proposals, [gt_instances], [proposals] - ) - - gt_instances.custom_attribute = torch.tensor([1]) - gt_instances.custom_attribute2 = torch.tensor([1]) - new_proposals = add_ground_truth_to_proposals([gt_instances], [proposals])[0] - - self.assertEqual(new_proposals.custom_attribute[0], 1) - # new proposals should only include the attributes in proposals - self.assertRaises(AttributeError, lambda: new_proposals.custom_attribute2) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/structures/__init__.py b/detectron2/tests/structures/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/detectron2/tests/structures/test_boxes.py b/detectron2/tests/structures/test_boxes.py deleted file mode 100644 index 101191818c511cf90c3c8f2cbc55aa49295697fa..0000000000000000000000000000000000000000 --- a/detectron2/tests/structures/test_boxes.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import json -import math -import numpy as np -import unittest -import torch - -from detectron2.structures import Boxes, BoxMode, pairwise_ioa, pairwise_iou -from detectron2.utils.testing import reload_script_model - - -class TestBoxMode(unittest.TestCase): - def _convert_xy_to_wh(self, x): - return BoxMode.convert(x, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - - def _convert_xywha_to_xyxy(self, x): - return BoxMode.convert(x, BoxMode.XYWHA_ABS, BoxMode.XYXY_ABS) - - def _convert_xywh_to_xywha(self, x): - return BoxMode.convert(x, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS) - - def test_convert_int_mode(self): - BoxMode.convert([1, 2, 3, 4], 0, 1) - - def test_box_convert_list(self): - for tp in [list, tuple]: - box = tp([5.0, 5.0, 10.0, 10.0]) - output = self._convert_xy_to_wh(box) - self.assertIsInstance(output, tp) - self.assertIsInstance(output[0], float) - self.assertEqual(output, tp([5.0, 5.0, 5.0, 5.0])) - - with self.assertRaises(Exception): - self._convert_xy_to_wh([box]) - - def test_box_convert_array(self): - box = np.asarray([[5, 5, 10, 10], [1, 1, 2, 3]]) - output = self._convert_xy_to_wh(box) - self.assertEqual(output.dtype, box.dtype) - self.assertEqual(output.shape, box.shape) - self.assertTrue((output[0] == [5, 5, 5, 5]).all()) - self.assertTrue((output[1] == [1, 1, 1, 2]).all()) - - def test_box_convert_cpu_tensor(self): - box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]) - output = self._convert_xy_to_wh(box) - self.assertEqual(output.dtype, box.dtype) - self.assertEqual(output.shape, box.shape) - output = output.numpy() - self.assertTrue((output[0] == [5, 5, 5, 5]).all()) - self.assertTrue((output[1] == [1, 1, 1, 2]).all()) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_box_convert_cuda_tensor(self): - box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]).cuda() - output = self._convert_xy_to_wh(box) - self.assertEqual(output.dtype, box.dtype) - self.assertEqual(output.shape, box.shape) - self.assertEqual(output.device, box.device) - output = output.cpu().numpy() - self.assertTrue((output[0] == [5, 5, 5, 5]).all()) - self.assertTrue((output[1] == [1, 1, 1, 2]).all()) - - def test_box_convert_xywha_to_xyxy_list(self): - for tp in [list, tuple]: - box = tp([50, 50, 30, 20, 0]) - output = self._convert_xywha_to_xyxy(box) - self.assertIsInstance(output, tp) - self.assertEqual(output, tp([35, 40, 65, 60])) - - with self.assertRaises(Exception): - self._convert_xywha_to_xyxy([box]) - - def test_box_convert_xywha_to_xyxy_array(self): - for dtype in [np.float64, np.float32]: - box = np.asarray( - [ - [50, 50, 30, 20, 0], - [50, 50, 30, 20, 90], - [1, 1, math.sqrt(2), math.sqrt(2), -45], - ], - dtype=dtype, - ) - output = self._convert_xywha_to_xyxy(box) - self.assertEqual(output.dtype, box.dtype) - expected = np.asarray([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype) - self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output)) - - def test_box_convert_xywha_to_xyxy_tensor(self): - for dtype in [torch.float32, torch.float64]: - box = torch.tensor( - [ - [50, 50, 30, 20, 0], - [50, 50, 30, 20, 90], - [1, 1, math.sqrt(2), math.sqrt(2), -45], - ], - dtype=dtype, - ) - output = self._convert_xywha_to_xyxy(box) - self.assertEqual(output.dtype, box.dtype) - expected = torch.tensor([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype) - - self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output)) - - def test_box_convert_xywh_to_xywha_list(self): - for tp in [list, tuple]: - box = tp([50, 50, 30, 20]) - output = self._convert_xywh_to_xywha(box) - self.assertIsInstance(output, tp) - self.assertEqual(output, tp([65, 60, 30, 20, 0])) - - with self.assertRaises(Exception): - self._convert_xywh_to_xywha([box]) - - def test_box_convert_xywh_to_xywha_array(self): - for dtype in [np.float64, np.float32]: - box = np.asarray([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype) - output = self._convert_xywh_to_xywha(box) - self.assertEqual(output.dtype, box.dtype) - expected = np.asarray( - [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype - ) - self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output)) - - def test_box_convert_xywh_to_xywha_tensor(self): - for dtype in [torch.float32, torch.float64]: - box = torch.tensor([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype) - output = self._convert_xywh_to_xywha(box) - self.assertEqual(output.dtype, box.dtype) - expected = torch.tensor( - [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype - ) - - self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output)) - - def test_json_serializable(self): - payload = {"box_mode": BoxMode.XYWH_REL} - try: - json.dumps(payload) - except Exception: - self.fail("JSON serialization failed") - - def test_json_deserializable(self): - payload = '{"box_mode": 2}' - obj = json.loads(payload) - try: - obj["box_mode"] = BoxMode(obj["box_mode"]) - except Exception: - self.fail("JSON deserialization failed") - - -class TestBoxIOU(unittest.TestCase): - def create_boxes(self): - boxes1 = torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]) - - boxes2 = torch.tensor( - [ - [0.0, 0.0, 1.0, 1.0], - [0.0, 0.0, 0.5, 1.0], - [0.0, 0.0, 1.0, 0.5], - [0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 1.0], - [0.5, 0.5, 1.5, 1.5], - ] - ) - return boxes1, boxes2 - - def test_pairwise_iou(self): - boxes1, boxes2 = self.create_boxes() - expected_ious = torch.tensor( - [ - [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], - [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], - ] - ) - - ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_ioa(self): - boxes1, boxes2 = self.create_boxes() - expected_ioas = torch.tensor( - [[1.0, 1.0, 1.0, 1.0, 1.0, 0.25], [1.0, 1.0, 1.0, 1.0, 1.0, 0.25]] - ) - ioas = pairwise_ioa(Boxes(boxes1), Boxes(boxes2)) - self.assertTrue(torch.allclose(ioas, expected_ioas)) - - -class TestBoxes(unittest.TestCase): - def test_empty_cat(self): - x = Boxes.cat([]) - self.assertTrue(x.tensor.shape, (0, 4)) - - def test_to(self): - x = Boxes(torch.rand(3, 4)) - self.assertEqual(x.to(device="cpu").tensor.device.type, "cpu") - - def test_scriptability(self): - def func(x): - boxes = Boxes(x) - test = boxes.to(torch.device("cpu")).tensor - return boxes.area(), test - - f = torch.jit.script(func) - f = reload_script_model(f) - f(torch.rand((3, 4))) - - data = torch.rand((3, 4)) - - def func_cat(x: torch.Tensor): - boxes1 = Boxes(x) - boxes2 = Boxes(x) - # boxes3 = Boxes.cat([boxes1, boxes2]) # this is not supported by torchsript for now. - boxes3 = boxes1.cat([boxes1, boxes2]) - return boxes3 - - f = torch.jit.script(func_cat) - script_box = f(data) - self.assertTrue(torch.equal(torch.cat([data, data]), script_box.tensor)) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/structures/test_imagelist.py b/detectron2/tests/structures/test_imagelist.py deleted file mode 100644 index e446e44a37f5d8f9a68362e4b93a291d314d5d68..0000000000000000000000000000000000000000 --- a/detectron2/tests/structures/test_imagelist.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import unittest -from typing import List, Sequence, Tuple -import torch - -from detectron2.structures import ImageList - - -class TestImageList(unittest.TestCase): - def test_imagelist_padding_tracing(self): - # test that the trace does not contain hard-coded constant sizes - def to_imagelist(tensors: Sequence[torch.Tensor]): - image_list = ImageList.from_tensors(tensors, 4) - return image_list.tensor, image_list.image_sizes - - def _tensor(*shape): - return torch.ones(shape, dtype=torch.float32) - - # test CHW (inputs needs padding vs. no padding) - for shape in [(3, 10, 10), (3, 12, 12)]: - func = torch.jit.trace(to_imagelist, ([_tensor(*shape)],)) - tensor, image_sizes = func([_tensor(3, 15, 20)]) - self.assertEqual(tensor.shape, (1, 3, 16, 20), tensor.shape) - self.assertEqual(image_sizes[0].tolist(), [15, 20], image_sizes[0]) - - # test HW - func = torch.jit.trace(to_imagelist, ([_tensor(10, 10)],)) - tensor, image_sizes = func([_tensor(15, 20)]) - self.assertEqual(tensor.shape, (1, 16, 20), tensor.shape) - self.assertEqual(image_sizes[0].tolist(), [15, 20], image_sizes[0]) - - # test 2x CHW - func = torch.jit.trace( - to_imagelist, - ([_tensor(3, 16, 10), _tensor(3, 13, 11)],), - ) - tensor, image_sizes = func([_tensor(3, 25, 20), _tensor(3, 10, 10)]) - self.assertEqual(tensor.shape, (2, 3, 28, 20), tensor.shape) - self.assertEqual(image_sizes[0].tolist(), [25, 20], image_sizes[0]) - self.assertEqual(image_sizes[1].tolist(), [10, 10], image_sizes[1]) - # support calling with different spatial sizes, but not with different #images - - def test_imagelist_scriptability(self): - image_nums = 2 - image_tensor = torch.randn((image_nums, 10, 20), dtype=torch.float32) - image_shape = [(10, 20)] * image_nums - - def f(image_tensor, image_shape: List[Tuple[int, int]]): - return ImageList(image_tensor, image_shape) - - ret = f(image_tensor, image_shape) - ret_script = torch.jit.script(f)(image_tensor, image_shape) - - self.assertEqual(len(ret), len(ret_script)) - for i in range(image_nums): - self.assertTrue(torch.equal(ret[i], ret_script[i])) - - def test_imagelist_from_tensors_scriptability(self): - image_tensor_0 = torch.randn(10, 20, dtype=torch.float32) - image_tensor_1 = torch.randn(12, 22, dtype=torch.float32) - inputs = [image_tensor_0, image_tensor_1] - - def f(image_tensor: List[torch.Tensor]): - return ImageList.from_tensors(image_tensor, 10) - - ret = f(inputs) - ret_script = torch.jit.script(f)(inputs) - - self.assertEqual(len(ret), len(ret_script)) - self.assertTrue(torch.equal(ret.tensor, ret_script.tensor)) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/structures/test_instances.py b/detectron2/tests/structures/test_instances.py deleted file mode 100644 index a352f74313ae9b2b7a42398f0ef4606fcb4a610c..0000000000000000000000000000000000000000 --- a/detectron2/tests/structures/test_instances.py +++ /dev/null @@ -1,219 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import unittest -import torch -from torch import Tensor - -from detectron2.export.torchscript import patch_instances -from detectron2.structures import Boxes, Instances -from detectron2.utils.testing import convert_scripted_instances - - -class TestInstances(unittest.TestCase): - def test_int_indexing(self): - attr1 = torch.tensor([[0.0, 0.0, 1.0], [0.0, 0.0, 0.5], [0.0, 0.0, 1.0], [0.0, 0.5, 0.5]]) - attr2 = torch.tensor([0.1, 0.2, 0.3, 0.4]) - instances = Instances((100, 100)) - instances.attr1 = attr1 - instances.attr2 = attr2 - for i in range(-len(instances), len(instances)): - inst = instances[i] - self.assertEqual((inst.attr1 == attr1[i]).all(), True) - self.assertEqual((inst.attr2 == attr2[i]).all(), True) - - self.assertRaises(IndexError, lambda: instances[len(instances)]) - self.assertRaises(IndexError, lambda: instances[-len(instances) - 1]) - - def test_script_new_fields(self): - def get_mask(x: Instances) -> torch.Tensor: - return x.mask - - class f(torch.nn.Module): - def forward(self, x: Instances): - proposal_boxes = x.proposal_boxes # noqa F841 - objectness_logits = x.objectness_logits # noqa F841 - return x - - class g(torch.nn.Module): - def forward(self, x: Instances): - return get_mask(x) - - class g2(torch.nn.Module): - def __init__(self): - super().__init__() - self.g = g() - - def forward(self, x: Instances): - proposal_boxes = x.proposal_boxes # noqa F841 - return x, self.g(x) - - fields = {"proposal_boxes": Boxes, "objectness_logits": Tensor} - with patch_instances(fields): - torch.jit.script(f()) - - # can't script anymore after exiting the context - with self.assertRaises(Exception): - # will create a ConcreteType for g - torch.jit.script(g2()) - - new_fields = {"mask": Tensor} - with patch_instances(new_fields): - # will compile g with a different Instances; this should pass - torch.jit.script(g()) - with self.assertRaises(Exception): - torch.jit.script(g2()) - - new_fields = {"mask": Tensor, "proposal_boxes": Boxes} - with patch_instances(new_fields) as NewInstances: - # get_mask will be compiled with a different Instances; this should pass - scripted_g2 = torch.jit.script(g2()) - x = NewInstances((3, 4)) - x.mask = torch.rand(3) - x.proposal_boxes = Boxes(torch.rand(3, 4)) - scripted_g2(x) # it should accept the new Instances object and run successfully - - def test_script_access_fields(self): - class f(torch.nn.Module): - def forward(self, x: Instances): - proposal_boxes = x.proposal_boxes - objectness_logits = x.objectness_logits - return proposal_boxes.tensor + objectness_logits - - fields = {"proposal_boxes": Boxes, "objectness_logits": Tensor} - with patch_instances(fields): - torch.jit.script(f()) - - def test_script_len(self): - class f(torch.nn.Module): - def forward(self, x: Instances): - return len(x) - - class g(torch.nn.Module): - def forward(self, x: Instances): - return len(x) - - image_shape = (15, 15) - - fields = {"proposal_boxes": Boxes} - with patch_instances(fields) as new_instance: - script_module = torch.jit.script(f()) - x = new_instance(image_shape) - with self.assertRaises(Exception): - script_module(x) - box_tensors = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]) - x.proposal_boxes = Boxes(box_tensors) - length = script_module(x) - self.assertEqual(length, 2) - - fields = {"objectness_logits": Tensor} - with patch_instances(fields) as new_instance: - script_module = torch.jit.script(g()) - x = new_instance(image_shape) - objectness_logits = torch.tensor([1.0]).reshape(1, 1) - x.objectness_logits = objectness_logits - length = script_module(x) - self.assertEqual(length, 1) - - def test_script_has(self): - class f(torch.nn.Module): - def forward(self, x: Instances): - return x.has("proposal_boxes") - - image_shape = (15, 15) - fields = {"proposal_boxes": Boxes} - with patch_instances(fields) as new_instance: - script_module = torch.jit.script(f()) - x = new_instance(image_shape) - self.assertFalse(script_module(x)) - - box_tensors = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]) - x.proposal_boxes = Boxes(box_tensors) - self.assertTrue(script_module(x)) - - def test_script_to(self): - class f(torch.nn.Module): - def forward(self, x: Instances): - return x.to(torch.device("cpu")) - - image_shape = (15, 15) - fields = {"proposal_boxes": Boxes, "a": Tensor} - with patch_instances(fields) as new_instance: - script_module = torch.jit.script(f()) - x = new_instance(image_shape) - script_module(x) - - box_tensors = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]) - x.proposal_boxes = Boxes(box_tensors) - x.a = box_tensors - script_module(x) - - def test_script_getitem(self): - class f(torch.nn.Module): - def forward(self, x: Instances, idx): - return x[idx] - - image_shape = (15, 15) - fields = {"proposal_boxes": Boxes, "a": Tensor} - inst = Instances(image_shape) - inst.proposal_boxes = Boxes(torch.rand(4, 4)) - inst.a = torch.rand(4, 10) - idx = torch.tensor([True, False, True, False]) - with patch_instances(fields) as new_instance: - script_module = torch.jit.script(f()) - - out = f()(inst, idx) - out_scripted = script_module(new_instance.from_instances(inst), idx) - self.assertTrue( - torch.equal(out.proposal_boxes.tensor, out_scripted.proposal_boxes.tensor) - ) - self.assertTrue(torch.equal(out.a, out_scripted.a)) - - def test_from_to_instances(self): - orig = Instances((30, 30)) - orig.proposal_boxes = Boxes(torch.rand(3, 4)) - - fields = {"proposal_boxes": Boxes, "a": Tensor} - with patch_instances(fields) as NewInstances: - # convert to NewInstances and back - new1 = NewInstances.from_instances(orig) - new2 = convert_scripted_instances(new1) - self.assertTrue(torch.equal(orig.proposal_boxes.tensor, new1.proposal_boxes.tensor)) - self.assertTrue(torch.equal(orig.proposal_boxes.tensor, new2.proposal_boxes.tensor)) - - def test_script_init_args(self): - def f(x: Tensor): - image_shape = (15, 15) - # __init__ can take arguments - inst = Instances(image_shape, a=x, proposal_boxes=Boxes(x)) - inst2 = Instances(image_shape, a=x) - return inst.a, inst2.a - - fields = {"proposal_boxes": Boxes, "a": Tensor} - with patch_instances(fields): - script_f = torch.jit.script(f) - x = torch.randn(3, 4) - outputs = script_f(x) - self.assertTrue(torch.equal(outputs[0], x)) - self.assertTrue(torch.equal(outputs[1], x)) - - def test_script_cat(self): - def f(x: Tensor): - image_shape = (15, 15) - # __init__ can take arguments - inst = Instances(image_shape, a=x) - inst2 = Instances(image_shape, a=x) - - inst3 = Instances(image_shape, proposal_boxes=Boxes(x)) - return inst.cat([inst, inst2]), inst3.cat([inst3, inst3]) - - fields = {"proposal_boxes": Boxes, "a": Tensor} - with patch_instances(fields): - script_f = torch.jit.script(f) - x = torch.randn(3, 4) - output, output2 = script_f(x) - self.assertTrue(torch.equal(output.a, torch.cat([x, x]))) - self.assertFalse(output.has("proposal_boxes")) - self.assertTrue(torch.equal(output2.proposal_boxes.tensor, torch.cat([x, x]))) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/structures/test_keypoints.py b/detectron2/tests/structures/test_keypoints.py deleted file mode 100644 index adc616e42341343e503afcbe181dbfae3f8ea063..0000000000000000000000000000000000000000 --- a/detectron2/tests/structures/test_keypoints.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import unittest -import torch - -from detectron2.structures.keypoints import Keypoints - - -class TestKeypoints(unittest.TestCase): - def test_cat_keypoints(self): - keypoints1 = Keypoints(torch.rand(2, 21, 3)) - keypoints2 = Keypoints(torch.rand(4, 21, 3)) - - cat_keypoints = keypoints1.cat([keypoints1, keypoints2]) - self.assertTrue(torch.all(cat_keypoints.tensor[:2] == keypoints1.tensor).item()) - self.assertTrue(torch.all(cat_keypoints.tensor[2:] == keypoints2.tensor).item()) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/structures/test_masks.py b/detectron2/tests/structures/test_masks.py deleted file mode 100644 index 7991eb0b35724f2f2f402d788a273d68b7cad5f2..0000000000000000000000000000000000000000 --- a/detectron2/tests/structures/test_masks.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import unittest -import torch - -from detectron2.structures.masks import BitMasks, PolygonMasks, polygons_to_bitmask - - -class TestBitMask(unittest.TestCase): - def test_get_bounding_box(self): - masks = torch.tensor( - [ - [ - [False, False, False, True], - [False, False, True, True], - [False, True, True, False], - [False, True, True, False], - ], - [ - [False, False, False, False], - [False, False, True, False], - [False, True, True, False], - [False, True, True, False], - ], - torch.zeros(4, 4), - ] - ) - bitmask = BitMasks(masks) - box_true = torch.tensor([[1, 0, 4, 4], [1, 1, 3, 4], [0, 0, 0, 0]], dtype=torch.float32) - box = bitmask.get_bounding_boxes() - self.assertTrue(torch.all(box.tensor == box_true).item()) - - for box in box_true: - poly = box[[0, 1, 2, 1, 2, 3, 0, 3]].numpy() - mask = polygons_to_bitmask([poly], 4, 4) - reconstruct_box = BitMasks(mask[None, :, :]).get_bounding_boxes()[0].tensor - self.assertTrue(torch.all(box == reconstruct_box).item()) - - reconstruct_box = PolygonMasks([[poly]]).get_bounding_boxes()[0].tensor - self.assertTrue(torch.all(box == reconstruct_box).item()) - - def test_from_empty_polygons(self): - masks = BitMasks.from_polygon_masks([], 100, 100) - self.assertEqual(masks.tensor.shape, (0, 100, 100)) - - def test_getitem(self): - masks = BitMasks(torch.ones(3, 10, 10)) - self.assertEqual(masks[1].tensor.shape, (1, 10, 10)) - self.assertEqual(masks[1:3].tensor.shape, (2, 10, 10)) - self.assertEqual(masks[torch.tensor([True, False, False])].tensor.shape, (1, 10, 10)) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/structures/test_rotated_boxes.py b/detectron2/tests/structures/test_rotated_boxes.py deleted file mode 100644 index 478f034a4b8e1b48a1ace5c0a4823ecdf15c8536..0000000000000000000000000000000000000000 --- a/detectron2/tests/structures/test_rotated_boxes.py +++ /dev/null @@ -1,441 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -from __future__ import absolute_import, division, print_function, unicode_literals -import logging -import math -import random -import unittest -import torch -from fvcore.common.benchmark import benchmark - -from detectron2.layers.rotated_boxes import pairwise_iou_rotated -from detectron2.structures.boxes import Boxes -from detectron2.structures.rotated_boxes import RotatedBoxes, pairwise_iou -from detectron2.utils.testing import reload_script_model - -logger = logging.getLogger(__name__) - - -class TestRotatedBoxesLayer(unittest.TestCase): - def test_iou_0_dim_cpu(self): - boxes1 = torch.rand(0, 5, dtype=torch.float32) - boxes2 = torch.rand(10, 5, dtype=torch.float32) - expected_ious = torch.zeros(0, 10, dtype=torch.float32) - ious = pairwise_iou_rotated(boxes1, boxes2) - self.assertTrue(torch.allclose(ious, expected_ious)) - - boxes1 = torch.rand(10, 5, dtype=torch.float32) - boxes2 = torch.rand(0, 5, dtype=torch.float32) - expected_ious = torch.zeros(10, 0, dtype=torch.float32) - ious = pairwise_iou_rotated(boxes1, boxes2) - self.assertTrue(torch.allclose(ious, expected_ious)) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_iou_0_dim_cuda(self): - boxes1 = torch.rand(0, 5, dtype=torch.float32) - boxes2 = torch.rand(10, 5, dtype=torch.float32) - expected_ious = torch.zeros(0, 10, dtype=torch.float32) - ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) - self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious)) - - boxes1 = torch.rand(10, 5, dtype=torch.float32) - boxes2 = torch.rand(0, 5, dtype=torch.float32) - expected_ious = torch.zeros(10, 0, dtype=torch.float32) - ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) - self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious)) - - def test_iou_half_overlap_cpu(self): - boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32) - boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32) - expected_ious = torch.tensor([[0.5]], dtype=torch.float32) - ious = pairwise_iou_rotated(boxes1, boxes2) - self.assertTrue(torch.allclose(ious, expected_ious)) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_iou_half_overlap_cuda(self): - boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32) - boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32) - expected_ious = torch.tensor([[0.5]], dtype=torch.float32) - ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) - self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious)) - - def test_iou_precision(self): - for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []): - boxes1 = torch.tensor([[565, 565, 10, 10.0, 0]], dtype=torch.float32, device=device) - boxes2 = torch.tensor([[565, 565, 10, 8.3, 0]], dtype=torch.float32, device=device) - iou = 8.3 / 10.0 - expected_ious = torch.tensor([[iou]], dtype=torch.float32) - ious = pairwise_iou_rotated(boxes1, boxes2) - self.assertTrue(torch.allclose(ious.cpu(), expected_ious)) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_iou_too_many_boxes_cuda(self): - s1, s2 = 5, 1289035 - boxes1 = torch.zeros(s1, 5) - boxes2 = torch.zeros(s2, 5) - ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) - self.assertTupleEqual(tuple(ious_cuda.shape), (s1, s2)) - - def test_iou_extreme(self): - # Cause floating point issues in cuda kernels (#1266) - for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []): - boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device) - boxes2 = torch.tensor( - [ - [ - -1.117407639806935e17, - 1.3858420478349148e18, - 1000.0000610351562, - 1000.0000610351562, - 1612.0, - ] - ], - device=device, - ) - ious = pairwise_iou_rotated(boxes1, boxes2) - self.assertTrue(ious.min() >= 0, ious) - - def test_iou_issue_2154(self): - for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []): - boxes1 = torch.tensor( - [ - [ - 296.6620178222656, - 458.73883056640625, - 23.515729904174805, - 47.677001953125, - 0.08795166015625, - ] - ], - device=device, - ) - boxes2 = torch.tensor( - [[296.66201, 458.73882000000003, 23.51573, 47.67702, 0.087951]], - device=device, - ) - ious = pairwise_iou_rotated(boxes1, boxes2) - expected_ious = torch.tensor([[1.0]], dtype=torch.float32) - self.assertTrue(torch.allclose(ious.cpu(), expected_ious)) - - def test_iou_issue_2167(self): - for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []): - boxes1 = torch.tensor( - [ - [ - 2563.74462890625000000000, - 1436.79016113281250000000, - 2174.70336914062500000000, - 214.09500122070312500000, - 115.11834716796875000000, - ] - ], - device=device, - ) - boxes2 = torch.tensor( - [ - [ - 2563.74462890625000000000, - 1436.79028320312500000000, - 2174.70288085937500000000, - 214.09495544433593750000, - 115.11835479736328125000, - ] - ], - device=device, - ) - ious = pairwise_iou_rotated(boxes1, boxes2) - expected_ious = torch.tensor([[1.0]], dtype=torch.float32) - self.assertTrue(torch.allclose(ious.cpu(), expected_ious)) - - -class TestRotatedBoxesStructure(unittest.TestCase): - def test_clip_area_0_degree(self): - for _ in range(50): - num_boxes = 100 - boxes_5d = torch.zeros(num_boxes, 5) - boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500) - boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500) - # Convert from (x_ctr, y_ctr, w, h, 0) to (x1, y1, x2, y2) - boxes_4d = torch.zeros(num_boxes, 4) - boxes_4d[:, 0] = boxes_5d[:, 0] - boxes_5d[:, 2] / 2.0 - boxes_4d[:, 1] = boxes_5d[:, 1] - boxes_5d[:, 3] / 2.0 - boxes_4d[:, 2] = boxes_5d[:, 0] + boxes_5d[:, 2] / 2.0 - boxes_4d[:, 3] = boxes_5d[:, 1] + boxes_5d[:, 3] / 2.0 - - image_size = (500, 600) - test_boxes_4d = Boxes(boxes_4d) - test_boxes_5d = RotatedBoxes(boxes_5d) - # Before clip - areas_4d = test_boxes_4d.area() - areas_5d = test_boxes_5d.area() - self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5)) - # After clip - test_boxes_4d.clip(image_size) - test_boxes_5d.clip(image_size) - areas_4d = test_boxes_4d.area() - areas_5d = test_boxes_5d.area() - self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5)) - - def test_clip_area_arbitrary_angle(self): - num_boxes = 100 - boxes_5d = torch.zeros(num_boxes, 5) - boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500) - boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500) - boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800) - clip_angle_threshold = random.uniform(0, 180) - - image_size = (500, 600) - test_boxes_5d = RotatedBoxes(boxes_5d) - # Before clip - areas_before = test_boxes_5d.area() - # After clip - test_boxes_5d.clip(image_size, clip_angle_threshold) - areas_diff = test_boxes_5d.area() - areas_before - - # the areas should only decrease after clipping - self.assertTrue(torch.all(areas_diff <= 0)) - # whenever the box is clipped (thus the area shrinks), - # the angle for the box must be within the clip_angle_threshold - # Note that the clip function will normalize the angle range - # to be within (-180, 180] - - self.assertTrue( - torch.all( - torch.abs(test_boxes_5d.tensor[:, 4][torch.where(areas_diff < 0)]) - < clip_angle_threshold - ) - ) - - def test_normalize_angles(self): - # torch.manual_seed(0) - for _ in range(50): - num_boxes = 100 - boxes_5d = torch.zeros(num_boxes, 5) - boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500) - boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500) - boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800) - rotated_boxes = RotatedBoxes(boxes_5d) - normalized_boxes = rotated_boxes.clone() - normalized_boxes.normalize_angles() - self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] >= -180)) - self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] < 180)) - # x, y, w, h should not change - self.assertTrue(torch.allclose(boxes_5d[:, :4], normalized_boxes.tensor[:, :4])) - # the cos/sin values of the angles should stay the same - - self.assertTrue( - torch.allclose( - torch.cos(boxes_5d[:, 4] * math.pi / 180), - torch.cos(normalized_boxes.tensor[:, 4] * math.pi / 180), - atol=1e-5, - ) - ) - - self.assertTrue( - torch.allclose( - torch.sin(boxes_5d[:, 4] * math.pi / 180), - torch.sin(normalized_boxes.tensor[:, 4] * math.pi / 180), - atol=1e-5, - ) - ) - - def test_pairwise_iou_0_degree(self): - for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []): - boxes1 = torch.tensor( - [[0.5, 0.5, 1.0, 1.0, 0.0], [0.5, 0.5, 1.0, 1.0, 0.0]], - dtype=torch.float32, - device=device, - ) - boxes2 = torch.tensor( - [ - [0.5, 0.5, 1.0, 1.0, 0.0], - [0.25, 0.5, 0.5, 1.0, 0.0], - [0.5, 0.25, 1.0, 0.5, 0.0], - [0.25, 0.25, 0.5, 0.5, 0.0], - [0.75, 0.75, 0.5, 0.5, 0.0], - [1.0, 1.0, 1.0, 1.0, 0.0], - ], - dtype=torch.float32, - device=device, - ) - expected_ious = torch.tensor( - [ - [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], - [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], - ], - dtype=torch.float32, - device=device, - ) - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_45_degrees(self): - for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []): - boxes1 = torch.tensor( - [ - [1, 1, math.sqrt(2), math.sqrt(2), 45], - [1, 1, 2 * math.sqrt(2), 2 * math.sqrt(2), -45], - ], - dtype=torch.float32, - device=device, - ) - boxes2 = torch.tensor([[1, 1, 2, 2, 0]], dtype=torch.float32, device=device) - expected_ious = torch.tensor([[0.5], [0.5]], dtype=torch.float32, device=device) - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_orthogonal(self): - for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []): - boxes1 = torch.tensor([[5, 5, 10, 6, 55]], dtype=torch.float32, device=device) - boxes2 = torch.tensor([[5, 5, 10, 6, -35]], dtype=torch.float32, device=device) - iou = (6.0 * 6.0) / (6.0 * 6.0 + 4.0 * 6.0 + 4.0 * 6.0) - expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_large_close_boxes(self): - for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []): - boxes1 = torch.tensor( - [[299.500000, 417.370422, 600.000000, 364.259186, 27.1828]], - dtype=torch.float32, - device=device, - ) - boxes2 = torch.tensor( - [[299.500000, 417.370422, 600.000000, 364.259155, 27.1828]], - dtype=torch.float32, - device=device, - ) - iou = 364.259155 / 364.259186 - expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_many_boxes(self): - for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []): - num_boxes1 = 100 - num_boxes2 = 200 - boxes1 = torch.stack( - [ - torch.tensor( - [5 + 20 * i, 5 + 20 * i, 10, 10, 0], - dtype=torch.float32, - device=device, - ) - for i in range(num_boxes1) - ] - ) - boxes2 = torch.stack( - [ - torch.tensor( - [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0], - dtype=torch.float32, - device=device, - ) - for i in range(num_boxes2) - ] - ) - expected_ious = torch.zeros(num_boxes1, num_boxes2, dtype=torch.float32, device=device) - for i in range(min(num_boxes1, num_boxes2)): - expected_ious[i][i] = (1 + 9 * i / num_boxes2) / 10.0 - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_issue1207_simplified(self): - for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []): - # Simplified test case of D2-issue-1207 - boxes1 = torch.tensor([[3, 3, 8, 2, -45.0]], device=device) - boxes2 = torch.tensor([[6, 0, 8, 2, -45.0]], device=device) - iou = 0.0 - expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) - - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_issue1207(self): - for device in ["cpu"] + (["cuda"] if torch.cuda.is_available() else []): - # The original test case in D2-issue-1207 - boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device) - boxes2 = torch.tensor([[190.0, 127.0, 80.0, 21.0, -46.0]], device=device) - - iou = 0.0 - expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) - - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_empty_cat(self): - x = RotatedBoxes.cat([]) - self.assertTrue(x.tensor.shape, (0, 5)) - - def test_scriptability(self): - def func(x): - boxes = RotatedBoxes(x) - test = boxes.to(torch.device("cpu")).tensor - return boxes.area(), test - - f = torch.jit.script(func) - f = reload_script_model(f) - f(torch.rand((3, 5))) - - data = torch.rand((3, 5)) - - def func_cat(x: torch.Tensor): - boxes1 = RotatedBoxes(x) - boxes2 = RotatedBoxes(x) - # this is not supported by torchscript for now. - # boxes3 = RotatedBoxes.cat([boxes1, boxes2]) - boxes3 = boxes1.cat([boxes1, boxes2]) - return boxes3 - - f = torch.jit.script(func_cat) - script_box = f(data) - self.assertTrue(torch.equal(torch.cat([data, data]), script_box.tensor)) - - -def benchmark_rotated_iou(): - num_boxes1 = 200 - num_boxes2 = 500 - boxes1 = torch.stack( - [ - torch.tensor([5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32) - for i in range(num_boxes1) - ] - ) - boxes2 = torch.stack( - [ - torch.tensor( - [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0], - dtype=torch.float32, - ) - for i in range(num_boxes2) - ] - ) - - def func(dev, n=1): - b1 = boxes1.to(device=dev) - b2 = boxes2.to(device=dev) - - def bench(): - for _ in range(n): - pairwise_iou_rotated(b1, b2) - if dev.type == "cuda": - torch.cuda.synchronize() - - return bench - - # only run it once per timed loop, since it's slow - args = [{"dev": torch.device("cpu"), "n": 1}] - if torch.cuda.is_available(): - args.append({"dev": torch.device("cuda"), "n": 10}) - - benchmark(func, "rotated_iou", args, warmup_iters=3) - - -if __name__ == "__main__": - unittest.main() - benchmark_rotated_iou() diff --git a/detectron2/tests/test_checkpoint.py b/detectron2/tests/test_checkpoint.py deleted file mode 100644 index 6c0b1c1ca85e63e0848a4d4de2386c8c89fb6f76..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_checkpoint.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import os -import tempfile -import unittest -from collections import OrderedDict -import torch -from iopath.common.file_io import PathHandler, PathManager -from torch import nn - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.checkpoint.c2_model_loading import ( - _longest_common_prefix_str, - align_and_update_state_dicts, -) -from detectron2.utils.logger import setup_logger - - -class TestCheckpointer(unittest.TestCase): - def setUp(self): - setup_logger() - - def create_complex_model(self): - m = nn.Module() - m.block1 = nn.Module() - m.block1.layer1 = nn.Linear(2, 3) - m.layer2 = nn.Linear(3, 2) - m.res = nn.Module() - m.res.layer2 = nn.Linear(3, 2) - - state_dict = OrderedDict() - state_dict["layer1.weight"] = torch.rand(3, 2) - state_dict["layer1.bias"] = torch.rand(3) - state_dict["layer2.weight"] = torch.rand(2, 3) - state_dict["layer2.bias"] = torch.rand(2) - state_dict["res.layer2.weight"] = torch.rand(2, 3) - state_dict["res.layer2.bias"] = torch.rand(2) - return m, state_dict - - def test_complex_model_loaded(self): - for add_data_parallel in [False, True]: - model, state_dict = self.create_complex_model() - if add_data_parallel: - model = nn.DataParallel(model) - model_sd = model.state_dict() - - sd_to_load = align_and_update_state_dicts(model_sd, state_dict) - model.load_state_dict(sd_to_load) - for loaded, stored in zip(model_sd.values(), state_dict.values()): - # different tensor references - self.assertFalse(id(loaded) == id(stored)) - # same content - self.assertTrue(loaded.to(stored).equal(stored)) - - def test_load_with_matching_heuristics(self): - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - model, state_dict = self.create_complex_model() - torch.save({"model": state_dict}, os.path.join(d, "checkpoint.pth")) - checkpointer = DetectionCheckpointer(model, save_dir=d) - - with torch.no_grad(): - # use a different weight from the `state_dict`, since torch.rand is less than 1 - model.block1.layer1.weight.fill_(1) - - # load checkpoint without matching_heuristics - checkpointer.load(os.path.join(d, "checkpoint.pth")) - self.assertTrue(model.block1.layer1.weight.equal(torch.ones(3, 2))) - - # load checkpoint with matching_heuristics - checkpointer.load(os.path.join(d, "checkpoint.pth?matching_heuristics=True")) - self.assertFalse(model.block1.layer1.weight.equal(torch.ones(3, 2))) - - def test_custom_path_manager_handler(self): - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - - class CustomPathManagerHandler(PathHandler): - PREFIX = "detectron2_test://" - - def _get_supported_prefixes(self): - return [self.PREFIX] - - def _get_local_path(self, path, **kwargs): - name = path[len(self.PREFIX) :] - return os.path.join(d, name) - - def _open(self, path, mode="r", **kwargs): - return open(self._get_local_path(path), mode, **kwargs) - - pathmgr = PathManager() - pathmgr.register_handler(CustomPathManagerHandler()) - - model, state_dict = self.create_complex_model() - torch.save({"model": state_dict}, os.path.join(d, "checkpoint.pth")) - checkpointer = DetectionCheckpointer(model, save_dir=d) - checkpointer.path_manager = pathmgr - checkpointer.load("detectron2_test://checkpoint.pth") - checkpointer.load("detectron2_test://checkpoint.pth?matching_heuristics=True") - - def test_lcp(self): - self.assertEqual(_longest_common_prefix_str(["class", "dlaps_model"]), "") - self.assertEqual(_longest_common_prefix_str(["classA", "classB"]), "class") - self.assertEqual(_longest_common_prefix_str(["classA", "classB", "clab"]), "cla") - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/test_engine.py b/detectron2/tests/test_engine.py deleted file mode 100644 index c97c11bc20e57bebfad1830d8d035c53c8006756..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_engine.py +++ /dev/null @@ -1,264 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import json -import math -import os -import tempfile -import time -import unittest -from unittest import mock -import torch -from fvcore.common.checkpoint import Checkpointer -from torch import nn - -from detectron2 import model_zoo -from detectron2.config import configurable, get_cfg -from detectron2.engine import DefaultTrainer, SimpleTrainer, default_setup, hooks -from detectron2.modeling.meta_arch import META_ARCH_REGISTRY -from detectron2.utils.events import CommonMetricPrinter, JSONWriter - - -@META_ARCH_REGISTRY.register() -class _SimpleModel(nn.Module): - @configurable - def __init__(self, sleep_sec=0): - super().__init__() - self.mod = nn.Linear(10, 20) - self.sleep_sec = sleep_sec - - @classmethod - def from_config(cls, cfg): - return {} - - def forward(self, x): - if self.sleep_sec > 0: - time.sleep(self.sleep_sec) - return {"loss": x.sum() + sum([x.mean() for x in self.parameters()])} - - -class TestTrainer(unittest.TestCase): - def _data_loader(self, device): - device = torch.device(device) - while True: - yield torch.rand(3, 3).to(device) - - def test_simple_trainer(self, device="cpu"): - model = _SimpleModel().to(device=device) - trainer = SimpleTrainer( - model, self._data_loader(device), torch.optim.SGD(model.parameters(), 0.1) - ) - trainer.train(0, 10) - - def test_simple_trainer_reset_dataloader(self, device="cpu"): - model = _SimpleModel().to(device=device) - trainer = SimpleTrainer( - model, self._data_loader(device), torch.optim.SGD(model.parameters(), 0.1) - ) - trainer.train(0, 10) - trainer.reset_data_loader(lambda: self._data_loader(device)) - trainer.train(0, 10) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_simple_trainer_cuda(self): - self.test_simple_trainer(device="cuda") - - def test_writer_hooks(self): - model = _SimpleModel(sleep_sec=0.1) - trainer = SimpleTrainer( - model, self._data_loader("cpu"), torch.optim.SGD(model.parameters(), 0.1) - ) - - max_iter = 50 - - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - json_file = os.path.join(d, "metrics.json") - writers = [CommonMetricPrinter(max_iter), JSONWriter(json_file)] - - trainer.register_hooks( - [hooks.EvalHook(0, lambda: {"metric": 100}), hooks.PeriodicWriter(writers)] - ) - with self.assertLogs(writers[0].logger) as logs: - trainer.train(0, max_iter) - - with open(json_file, "r") as f: - data = [json.loads(line.strip()) for line in f] - self.assertEqual([x["iteration"] for x in data], [19, 39, 49, 50]) - # the eval metric is in the last line with iter 50 - self.assertIn("metric", data[-1], "Eval metric must be in last line of JSON!") - - # test logged messages from CommonMetricPrinter - self.assertEqual(len(logs.output), 3) - for log, iter in zip(logs.output, [19, 39, 49]): - self.assertIn(f"iter: {iter}", log) - - self.assertIn("eta: 0:00:00", logs.output[-1], "Last ETA must be 0!") - - def test_metric_gather_and_write(self): - gather_metric_period = 5 - writer_period = 10 - - model = _SimpleModel(sleep_sec=0.1) - trainer = SimpleTrainer( - model, - self._data_loader("cpu"), - torch.optim.SGD(model.parameters(), 0.1), - gather_metric_period=gather_metric_period, - ) - - max_iter = 50 - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - json_file = os.path.join(d, "metrics.json") - writers = [JSONWriter(json_file, window_size=writer_period)] - - trainer.register_hooks( - [ - hooks.IterationTimer(), - hooks.PeriodicWriter(writers, period=writer_period), - ] - ) - trainer.train(0, max_iter) - - with open(json_file, "r") as f: - data = [json.loads(line.strip()) for line in f] - self.assertEqual([x["iteration"] for x in data], [9, 19, 29, 39, 49]) - self.assertEqual(len(trainer.storage.history("time").values()), 48) - for key in ["data_time", "total_loss"]: - history = trainer.storage.history(key).values() - history_iters = [h[1] for h in history] - self.assertEqual(history_iters, [4, 9, 14, 19, 24, 29, 34, 39, 44, 49]) - for i in range(len(data)): - # written metric should equal to the median of 2 most recent logged metrics - logged1, logged2 = history[2 * i][0], history[2 * i + 1][0] - gt = data[i][key] - self.assertEqual(gt, (logged1 + logged2) / 2.0) - - def test_async_write_metrics(self): - writer_period = 1 - - model = _SimpleModel(sleep_sec=0.1) - trainer = SimpleTrainer( - model, - self._data_loader("cpu"), - torch.optim.SGD(model.parameters(), 0.1), - async_write_metrics=True, - ) - - max_iter = 50 - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - json_file = os.path.join(d, "metrics.json") - writers = [JSONWriter(json_file, window_size=writer_period)] - - trainer.register_hooks( - [ - hooks.IterationTimer(), - hooks.PeriodicWriter(writers, period=writer_period), - ] - ) - trainer.train(0, max_iter) - - self.assertEqual(len(trainer.storage.history("time").values()), 48) - for key in ["data_time", "total_loss"]: - history = trainer.storage.history(key).values() - history_iters = [h[1] for h in history] - self.assertEqual(history_iters, list(range(50))) - - def test_default_trainer(self): - # TODO: this test requires manifold access, so changed device to CPU. see: T88318502 - cfg = get_cfg() - cfg.MODEL.DEVICE = "cpu" - cfg.MODEL.META_ARCHITECTURE = "_SimpleModel" - cfg.DATASETS.TRAIN = ("coco_2017_val_100",) - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - cfg.OUTPUT_DIR = d - trainer = DefaultTrainer(cfg) - - # test property - self.assertIs(trainer.model, trainer._trainer.model) - trainer.model = _SimpleModel() - self.assertIs(trainer.model, trainer._trainer.model) - - def test_checkpoint_resume(self): - model = _SimpleModel() - dataloader = self._data_loader("cpu") - opt = torch.optim.SGD(model.parameters(), 0.1) - scheduler = torch.optim.lr_scheduler.StepLR(opt, 3) - - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - trainer = SimpleTrainer(model, dataloader, opt) - checkpointer = Checkpointer(model, d, opt=opt, trainer=trainer) - - trainer.register_hooks( - [ - hooks.LRScheduler(scheduler=scheduler), - # checkpoint after scheduler to properly save the state of scheduler - hooks.PeriodicCheckpointer(checkpointer, 10), - ] - ) - - trainer.train(0, 12) - self.assertAlmostEqual(opt.param_groups[0]["lr"], 1e-5) - self.assertEqual(scheduler.last_epoch, 12) - del trainer - - opt = torch.optim.SGD(model.parameters(), 999) # lr will be loaded - trainer = SimpleTrainer(model, dataloader, opt) - scheduler = torch.optim.lr_scheduler.StepLR(opt, 3) - trainer.register_hooks( - [ - hooks.LRScheduler(scheduler=scheduler), - ] - ) - checkpointer = Checkpointer(model, d, opt=opt, trainer=trainer) - checkpointer.resume_or_load("non_exist.pth") - self.assertEqual(trainer.iter, 11) # last finished iter number (0-based in Trainer) - # number of times `scheduler.step()` was called (1-based) - self.assertEqual(scheduler.last_epoch, 12) - self.assertAlmostEqual(opt.param_groups[0]["lr"], 1e-5) - - def test_eval_hook(self): - model = _SimpleModel() - dataloader = self._data_loader("cpu") - opt = torch.optim.SGD(model.parameters(), 0.1) - - for total_iter, period, eval_count in [(30, 15, 2), (31, 15, 3), (20, 0, 1)]: - test_func = mock.Mock(return_value={"metric": 3.0}) - trainer = SimpleTrainer(model, dataloader, opt) - trainer.register_hooks([hooks.EvalHook(period, test_func)]) - trainer.train(0, total_iter) - self.assertEqual(test_func.call_count, eval_count) - - def test_best_checkpointer(self): - model = _SimpleModel() - dataloader = self._data_loader("cpu") - opt = torch.optim.SGD(model.parameters(), 0.1) - metric_name = "metric" - total_iter = 40 - test_period = 10 - test_cases = [ - ("max", iter([0.3, 0.4, 0.35, 0.5]), 3), - ("min", iter([1.0, 0.8, 0.9, 0.9]), 2), - ("min", iter([math.nan, 0.8, 0.9, 0.9]), 1), - ] - for mode, metrics, call_count in test_cases: - trainer = SimpleTrainer(model, dataloader, opt) - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - checkpointer = Checkpointer(model, d, opt=opt, trainer=trainer) - trainer.register_hooks( - [ - hooks.EvalHook(test_period, lambda: {metric_name: next(metrics)}), - hooks.BestCheckpointer(test_period, checkpointer, metric_name, mode=mode), - ] - ) - with mock.patch.object(checkpointer, "save") as mock_save_method: - trainer.train(0, total_iter) - self.assertEqual(mock_save_method.call_count, call_count) - - def test_setup_config(self): - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - cfg = get_cfg() - cfg.OUTPUT_DIR = os.path.join(d, "yacs") - default_setup(cfg, {}) - - cfg = model_zoo.get_config("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.py") - cfg.train.output_dir = os.path.join(d, "omegaconf") - default_setup(cfg, {}) diff --git a/detectron2/tests/test_events.py b/detectron2/tests/test_events.py deleted file mode 100644 index 174ca978de21fa09fdf79eca62936ef497aaf2e8..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_events.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import json -import os -import tempfile -import unittest - -from detectron2.utils.events import ( - CommonMetricPrinter, - EventStorage, - JSONWriter, - get_event_storage, - has_event_storage, -) - - -class TestEventWriter(unittest.TestCase): - def testScalar(self): - with tempfile.TemporaryDirectory( - prefix="detectron2_tests" - ) as dir, EventStorage() as storage: - json_file = os.path.join(dir, "test.json") - writer = JSONWriter(json_file) - for k in range(60): - storage.put_scalar("key", k, smoothing_hint=False) - if (k + 1) % 20 == 0: - writer.write() - storage.step() - writer.close() - with open(json_file) as f: - data = [json.loads(l) for l in f] - self.assertTrue([int(k["key"]) for k in data] == [19, 39, 59]) - - def testScalarMismatchedPeriod(self): - with tempfile.TemporaryDirectory( - prefix="detectron2_tests" - ) as dir, EventStorage() as storage: - json_file = os.path.join(dir, "test.json") - - writer = JSONWriter(json_file) - for k in range(60): - if k % 17 == 0: # write in a differnt period - storage.put_scalar("key2", k, smoothing_hint=False) - storage.put_scalar("key", k, smoothing_hint=False) - if (k + 1) % 20 == 0: - writer.write() - storage.step() - writer.close() - with open(json_file) as f: - data = [json.loads(l) for l in f] - self.assertTrue([int(k.get("key2", 0)) for k in data] == [17, 0, 34, 0, 51, 0]) - self.assertTrue([int(k.get("key", 0)) for k in data] == [0, 19, 0, 39, 0, 59]) - self.assertTrue([int(k["iteration"]) for k in data] == [17, 19, 34, 39, 51, 59]) - - def testPrintETA(self): - with EventStorage() as s: - p1 = CommonMetricPrinter(10) - p2 = CommonMetricPrinter() - - s.put_scalar("time", 1.0) - s.step() - s.put_scalar("time", 1.0) - s.step() - - with self.assertLogs("detectron2.utils.events") as logs: - p1.write() - self.assertIn("eta", logs.output[0]) - - with self.assertLogs("detectron2.utils.events") as logs: - p2.write() - self.assertNotIn("eta", logs.output[0]) - - def testPrintNonLosses(self): - with EventStorage() as s: - p1 = CommonMetricPrinter(10) - p2 = CommonMetricPrinter() - - s.put_scalar("time", 1.0) - s.put_scalar("[metric]bn_stat", 1.0) - s.step() - s.put_scalar("time", 1.0) - s.put_scalar("[metric]bn_stat", 1.0) - s.step() - - with self.assertLogs("detectron2.utils.events") as logs: - p1.write() - self.assertIn("[metric]bn_stat", logs.output[0]) - - with self.assertLogs("detectron2.utils.events") as logs: - p2.write() - self.assertIn("[metric]bn_stat", logs.output[0]) - - def testSmoothingWithWindowSize(self): - with tempfile.TemporaryDirectory( - prefix="detectron2_tests" - ) as dir, EventStorage() as storage: - json_file = os.path.join(dir, "test.json") - writer = JSONWriter(json_file, window_size=10) - for k in range(20): - storage.put_scalar("key1", k, smoothing_hint=True) - if (k + 1) % 2 == 0: - storage.put_scalar("key2", k, smoothing_hint=True) - if (k + 1) % 5 == 0: - storage.put_scalar("key3", k, smoothing_hint=True) - if (k + 1) % 10 == 0: - writer.write() - storage.step() - - num_samples = {k: storage.count_samples(k, 10) for k in ["key1", "key2", "key3"]} - self.assertEqual(num_samples, {"key1": 10, "key2": 5, "key3": 2}) - writer.close() - with open(json_file) as f: - data = [json.loads(l) for l in f] - self.assertEqual([k["key1"] for k in data], [4.5, 14.5]) - self.assertEqual([k["key2"] for k in data], [5, 15]) - self.assertEqual([k["key3"] for k in data], [6.5, 16.5]) - - def testEventStorage(self): - self.assertFalse(has_event_storage()) - with EventStorage() as storage: - self.assertTrue(has_event_storage()) - self.assertEqual(storage, get_event_storage()) - self.assertFalse(has_event_storage()) diff --git a/detectron2/tests/test_export_caffe2.py b/detectron2/tests/test_export_caffe2.py deleted file mode 100644 index 58e9f681c356d05e3d03b06b603721ed51840c5c..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_export_caffe2.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# -*- coding: utf-8 -*- - -import copy -import os -import tempfile -import unittest -import torch -from torch.hub import _check_module_exists - -from detectron2 import model_zoo -from detectron2.utils.logger import setup_logger -from detectron2.utils.testing import get_sample_coco_image - -try: - # Caffe2 used to be included in PyTorch, but since PyTorch 1.10+, - # Caffe2 is not included in pre-built packages. This is a safety BC check - from detectron2.export import Caffe2Model, Caffe2Tracer -except ImportError: - raise unittest.SkipTest( - f"PyTorch does not have Caffe2 support. Skipping all tests in {__name__}" - ) from None - - -# TODO: this test requires manifold access, see: T88318502 -# Running it on CircleCI causes crash, not sure why. -@unittest.skipIf(os.environ.get("CIRCLECI"), "Caffe2 tests crash on CircleCI.") -@unittest.skipIf(not _check_module_exists("onnx"), "ONNX not installed.") -class TestCaffe2Export(unittest.TestCase): - def setUp(self): - setup_logger() - - def _test_model(self, config_path, device="cpu"): - cfg = model_zoo.get_config(config_path) - cfg.MODEL.DEVICE = device - model = model_zoo.get(config_path, trained=True, device=device) - - inputs = [{"image": get_sample_coco_image()}] - tracer = Caffe2Tracer(cfg, model, copy.deepcopy(inputs)) - - with tempfile.TemporaryDirectory(prefix="detectron2_unittest") as d: - if not os.environ.get("CI"): - # This requires onnx, which is not yet available on public CI - c2_model = tracer.export_caffe2() - c2_model.save_protobuf(d) - c2_model.save_graph(os.path.join(d, "test.svg"), inputs=copy.deepcopy(inputs)) - - c2_model = Caffe2Model.load_protobuf(d) - c2_model(inputs)[0]["instances"] - - ts_model = tracer.export_torchscript() - ts_model.save(os.path.join(d, "model.ts")) - - def testMaskRCNN(self): - self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def testMaskRCNNGPU(self): - self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", device="cuda") - - def testRetinaNet(self): - self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml") diff --git a/detectron2/tests/test_export_onnx.py b/detectron2/tests/test_export_onnx.py deleted file mode 100644 index e0536774bf1d166b73b106a47d1f25273c2038a0..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_export_onnx.py +++ /dev/null @@ -1,243 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import io -import unittest -import warnings -import onnx -import torch -from packaging import version -from torch.hub import _check_module_exists - -from detectron2 import model_zoo -from detectron2.config import get_cfg -from detectron2.export import STABLE_ONNX_OPSET_VERSION -from detectron2.export.flatten import TracingAdapter -from detectron2.export.torchscript_patch import patch_builtin_len -from detectron2.layers import ShapeSpec -from detectron2.modeling import build_model -from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead -from detectron2.structures import Boxes, Instances -from detectron2.utils.testing import ( - _pytorch1111_symbolic_opset9_repeat_interleave, - _pytorch1111_symbolic_opset9_to, - get_sample_coco_image, - has_dynamic_axes, - random_boxes, - register_custom_op_onnx_export, - skipIfOnCPUCI, - skipIfUnsupportedMinOpsetVersion, - skipIfUnsupportedMinTorchVersion, - unregister_custom_op_onnx_export, -) - - -@unittest.skipIf(not _check_module_exists("onnx"), "ONNX not installed.") -@skipIfUnsupportedMinTorchVersion("1.10") -class TestONNXTracingExport(unittest.TestCase): - opset_version = STABLE_ONNX_OPSET_VERSION - - def testMaskRCNNFPN(self): - def inference_func(model, images): - with warnings.catch_warnings(record=True): - inputs = [{"image": image} for image in images] - inst = model.inference(inputs, do_postprocess=False)[0] - return [{"instances": inst}] - - self._test_model_zoo_from_config_path( - "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", inference_func - ) - - @skipIfOnCPUCI - def testMaskRCNNC4(self): - def inference_func(model, image): - inputs = [{"image": image}] - return model.inference(inputs, do_postprocess=False)[0] - - self._test_model_zoo_from_config_path( - "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml", inference_func - ) - - @skipIfOnCPUCI - def testCascadeRCNN(self): - def inference_func(model, image): - inputs = [{"image": image}] - return model.inference(inputs, do_postprocess=False)[0] - - self._test_model_zoo_from_config_path( - "Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml", inference_func - ) - - def testRetinaNet(self): - def inference_func(model, image): - return model.forward([{"image": image}])[0]["instances"] - - self._test_model_zoo_from_config_path( - "COCO-Detection/retinanet_R_50_FPN_3x.yaml", inference_func - ) - - @skipIfOnCPUCI - def testMaskRCNNFPN_batched(self): - def inference_func(model, image1, image2): - inputs = [{"image": image1}, {"image": image2}] - return model.inference(inputs, do_postprocess=False) - - self._test_model_zoo_from_config_path( - "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", inference_func, batch=2 - ) - - @skipIfUnsupportedMinOpsetVersion(16, STABLE_ONNX_OPSET_VERSION) - @skipIfUnsupportedMinTorchVersion("1.11.1") - def testMaskRCNNFPN_with_postproc(self): - def inference_func(model, image): - inputs = [{"image": image, "height": image.shape[1], "width": image.shape[2]}] - return model.inference(inputs, do_postprocess=True)[0]["instances"] - - self._test_model_zoo_from_config_path( - "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", - inference_func, - ) - - @unittest.skipIf( - version.Version(onnx.version.version) >= version.Version("1.16.0"), - "This test fails on ONNX Runtime >= 1.16", - ) - def testKeypointHead(self): - class M(torch.nn.Module): - def __init__(self): - super().__init__() - self.model = KRCNNConvDeconvUpsampleHead( - ShapeSpec(channels=4, height=14, width=14), num_keypoints=17, conv_dims=(4,) - ) - - def forward(self, x, predbox1, predbox2): - inst = [ - Instances((100, 100), pred_boxes=Boxes(predbox1)), - Instances((100, 100), pred_boxes=Boxes(predbox2)), - ] - ret = self.model(x, inst) - return tuple(x.pred_keypoints for x in ret) - - model = M() - model.eval() - - def gen_input(num1, num2): - feat = torch.randn((num1 + num2, 4, 14, 14)) - box1 = random_boxes(num1) - box2 = random_boxes(num2) - return feat, box1, box2 - - with patch_builtin_len(): - onnx_model = self._test_model( - model, - gen_input(1, 2), - input_names=["features", "pred_boxes", "pred_classes"], - output_names=["box1", "box2"], - dynamic_axes={ - "features": {0: "batch", 1: "static_four", 2: "height", 3: "width"}, - "pred_boxes": {0: "batch", 1: "static_four"}, - "pred_classes": {0: "batch", 1: "static_four"}, - "box1": {0: "num_instance", 1: "K", 2: "static_three"}, - "box2": {0: "num_instance", 1: "K", 2: "static_three"}, - }, - ) - - # Although ONNX models are not executable by PyTorch to verify - # support of batches with different sizes, we can verify model's IR - # does not hard-code input and/or output shapes. - # TODO: Add tests with different batch sizes when detectron2's CI - # support ONNX Runtime backend. - assert has_dynamic_axes(onnx_model) - - ################################################################################ - # Testcase internals - DO NOT add tests below this point - ################################################################################ - - def setUp(self): - register_custom_op_onnx_export("::to", _pytorch1111_symbolic_opset9_to, 9, "1.11.1") - register_custom_op_onnx_export( - "::repeat_interleave", - _pytorch1111_symbolic_opset9_repeat_interleave, - 9, - "1.11.1", - ) - - def tearDown(self): - unregister_custom_op_onnx_export("::to", 9, "1.11.1") - unregister_custom_op_onnx_export("::repeat_interleave", 9, "1.11.1") - - def _test_model( - self, - model, - inputs, - inference_func=None, - opset_version=STABLE_ONNX_OPSET_VERSION, - save_onnx_graph_path=None, - **export_kwargs, - ): - # Not imported in the beginning of file to prevent runtime errors - # for environments without ONNX. - # This testcase checks dependencies before running - import onnx # isort:skip - - f = io.BytesIO() - adapter_model = TracingAdapter(model, inputs, inference_func) - adapter_model.eval() - with torch.no_grad(): - try: - torch.onnx.enable_log() - except AttributeError: - # Older ONNX versions does not have this API - pass - torch.onnx.export( - adapter_model, - adapter_model.flattened_inputs, - f, - training=torch.onnx.TrainingMode.EVAL, - opset_version=opset_version, - verbose=True, - **export_kwargs, - ) - onnx_model = onnx.load_from_string(f.getvalue()) - assert onnx_model is not None - if save_onnx_graph_path: - onnx.save(onnx_model, save_onnx_graph_path) - return onnx_model - - def _test_model_zoo_from_config_path( - self, - config_path, - inference_func, - batch=1, - opset_version=STABLE_ONNX_OPSET_VERSION, - save_onnx_graph_path=None, - **export_kwargs, - ): - model = model_zoo.get(config_path, trained=True) - image = get_sample_coco_image() - inputs = tuple(image.clone() for _ in range(batch)) - return self._test_model( - model, inputs, inference_func, opset_version, save_onnx_graph_path, **export_kwargs - ) - - def _test_model_from_config_path( - self, - config_path, - inference_func, - batch=1, - opset_version=STABLE_ONNX_OPSET_VERSION, - save_onnx_graph_path=None, - **export_kwargs, - ): - from projects.PointRend import point_rend # isort:skip - - cfg = get_cfg() - cfg.DATALOADER.NUM_WORKERS = 0 - point_rend.add_pointrend_config(cfg) - cfg.merge_from_file(config_path) - cfg.freeze() - model = build_model(cfg) - image = get_sample_coco_image() - inputs = tuple(image.clone() for _ in range(batch)) - return self._test_model( - model, inputs, inference_func, opset_version, save_onnx_graph_path, **export_kwargs - ) diff --git a/detectron2/tests/test_export_torchscript.py b/detectron2/tests/test_export_torchscript.py deleted file mode 100644 index f4dcfc25001254029b3b656487159585e76e687d..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_export_torchscript.py +++ /dev/null @@ -1,338 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import copy -import glob -import json -import os -import random -import tempfile -import unittest -import zipfile -import torch -from torch import Tensor, nn - -from detectron2 import model_zoo -from detectron2.config import get_cfg -from detectron2.config.instantiate import dump_dataclass, instantiate -from detectron2.export import dump_torchscript_IR, scripting_with_instances -from detectron2.export.flatten import TracingAdapter, flatten_to_tuple -from detectron2.export.torchscript_patch import patch_builtin_len -from detectron2.layers import ShapeSpec -from detectron2.modeling import build_backbone -from detectron2.modeling.postprocessing import detector_postprocess -from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead -from detectron2.structures import Boxes, Instances -from detectron2.utils.env import TORCH_VERSION -from detectron2.utils.testing import ( - assert_instances_allclose, - convert_scripted_instances, - get_sample_coco_image, - random_boxes, - reload_script_model, - skipIfOnCPUCI, -) - - -""" -https://detectron2.readthedocs.io/tutorials/deployment.html -contains some explanations of this file. -""" - - -class TestScripting(unittest.TestCase): - def testMaskRCNNFPN(self): - self._test_rcnn_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") - - @skipIfOnCPUCI - def testMaskRCNNC4(self): - self._test_rcnn_model("COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml") - - def testRetinaNet(self): - self._test_retinanet_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml") - - def _test_rcnn_model(self, config_path): - model = model_zoo.get(config_path, trained=True) - model.eval() - - fields = { - "proposal_boxes": Boxes, - "objectness_logits": Tensor, - "pred_boxes": Boxes, - "scores": Tensor, - "pred_classes": Tensor, - "pred_masks": Tensor, - } - script_model = scripting_with_instances(model, fields) - script_model = reload_script_model(script_model) - - # Test that batch inference with different shapes are supported - image = get_sample_coco_image() - small_image = nn.functional.interpolate(image, scale_factor=0.5) - inputs = [{"image": image}, {"image": small_image}] - with torch.no_grad(): - instance = model.inference(inputs, do_postprocess=False)[0] - scripted_instance = script_model.inference(inputs, do_postprocess=False)[0] - assert_instances_allclose(instance, scripted_instance) - - def _test_retinanet_model(self, config_path): - model = model_zoo.get(config_path, trained=True) - model.eval() - - fields = { - "pred_boxes": Boxes, - "scores": Tensor, - "pred_classes": Tensor, - } - script_model = scripting_with_instances(model, fields) - - img = get_sample_coco_image() - inputs = [{"image": img}] * 2 - with torch.no_grad(): - instance = model(inputs)[0]["instances"] - scripted_instance = convert_scripted_instances(script_model(inputs)[0]) - scripted_instance = detector_postprocess(scripted_instance, img.shape[1], img.shape[2]) - assert_instances_allclose(instance, scripted_instance) - # Note that the model currently cannot be saved and loaded into a new process: - # https://github.com/pytorch/pytorch/issues/46944 - - -# TODO: this test requires manifold access, see: T88318502 -class TestTracing(unittest.TestCase): - def testMaskRCNNFPN(self): - def inference_func(model, image): - inputs = [{"image": image}] - return model.inference(inputs, do_postprocess=False)[0] - - self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", inference_func) - - def testMaskRCNNFPN_with_postproc(self): - def inference_func(model, image): - inputs = [{"image": image, "height": image.shape[1], "width": image.shape[2]}] - return model.inference(inputs, do_postprocess=True)[0]["instances"] - - self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", inference_func) - - @skipIfOnCPUCI - def testMaskRCNNC4(self): - def inference_func(model, image): - inputs = [{"image": image}] - return model.inference(inputs, do_postprocess=False)[0] - - self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml", inference_func) - - @skipIfOnCPUCI - def testCascadeRCNN(self): - def inference_func(model, image): - inputs = [{"image": image}] - return model.inference(inputs, do_postprocess=False)[0] - - self._test_model("Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml", inference_func) - - # bug fixed by https://github.com/pytorch/pytorch/pull/67734 - @unittest.skipIf(TORCH_VERSION == (1, 10) and os.environ.get("CI"), "1.10 has bugs.") - def testRetinaNet(self): - def inference_func(model, image): - return model.forward([{"image": image}])[0]["instances"] - - self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml", inference_func) - - def _check_torchscript_no_hardcoded_device(self, jitfile, extract_dir, device): - zipfile.ZipFile(jitfile).extractall(extract_dir) - dir_path = os.path.join(extract_dir, os.path.splitext(os.path.basename(jitfile))[0]) - error_files = [] - for f in glob.glob(f"{dir_path}/code/**/*.py", recursive=True): - content = open(f).read() - if device in content: - error_files.append((f, content)) - if len(error_files): - msg = "\n".join(f"{f}\n{content}" for f, content in error_files) - raise ValueError(f"Found device '{device}' in following files:\n{msg}") - - def _get_device_casting_test_cases(self, model): - # Indexing operation can causes hardcoded device type before 1.10 - if not TORCH_VERSION >= (1, 10) or torch.cuda.device_count() == 0: - return [None] - - testing_devices = ["cpu", "cuda:0"] - if torch.cuda.device_count() > 1: - testing_devices.append(f"cuda:{torch.cuda.device_count() - 1}") - assert str(model.device) in testing_devices - testing_devices.remove(str(model.device)) - testing_devices = [None] + testing_devices # test no casting first - - return testing_devices - - def _test_model(self, config_path, inference_func, batch=1): - model = model_zoo.get(config_path, trained=True) - image = get_sample_coco_image() - inputs = tuple(image.clone() for _ in range(batch)) - - wrapper = TracingAdapter(model, inputs, inference_func) - wrapper.eval() - with torch.no_grad(): - # trace with smaller images, and the trace must still work - trace_inputs = tuple( - nn.functional.interpolate(image, scale_factor=random.uniform(0.5, 0.7)) - for _ in range(batch) - ) - traced_model = torch.jit.trace(wrapper, trace_inputs) - - testing_devices = self._get_device_casting_test_cases(model) - # save and load back the model in order to show traceback of TorchScript - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - basename = "model" - jitfile = f"{d}/{basename}.jit" - torch.jit.save(traced_model, jitfile) - traced_model = torch.jit.load(jitfile) - - if any(device and "cuda" in device for device in testing_devices): - self._check_torchscript_no_hardcoded_device(jitfile, d, "cuda") - - for device in testing_devices: - print(f"Testing casting to {device} for inference (traced on {model.device}) ...") - with torch.no_grad(): - outputs = inference_func(copy.deepcopy(model).to(device), *inputs) - traced_outputs = wrapper.outputs_schema(traced_model.to(device)(*inputs)) - if batch > 1: - for output, traced_output in zip(outputs, traced_outputs): - assert_instances_allclose(output, traced_output, size_as_tensor=True) - else: - assert_instances_allclose(outputs, traced_outputs, size_as_tensor=True) - - @skipIfOnCPUCI - def testMaskRCNNFPN_batched(self): - def inference_func(model, image1, image2): - inputs = [{"image": image1}, {"image": image2}] - return model.inference(inputs, do_postprocess=False) - - self._test_model( - "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", inference_func, batch=2 - ) - - def testKeypointHead(self): - class M(nn.Module): - def __init__(self): - super().__init__() - self.model = KRCNNConvDeconvUpsampleHead( - ShapeSpec(channels=4, height=14, width=14), num_keypoints=17, conv_dims=(4,) - ) - - def forward(self, x, predbox1, predbox2): - inst = [ - Instances((100, 100), pred_boxes=Boxes(predbox1)), - Instances((100, 100), pred_boxes=Boxes(predbox2)), - ] - ret = self.model(x, inst) - return tuple(x.pred_keypoints for x in ret) - - model = M() - model.eval() - - def gen_input(num1, num2): - feat = torch.randn((num1 + num2, 4, 14, 14)) - box1 = random_boxes(num1) - box2 = random_boxes(num2) - return feat, box1, box2 - - with torch.no_grad(), patch_builtin_len(): - trace = torch.jit.trace(model, gen_input(15, 15), check_trace=False) - - inputs = gen_input(12, 10) - trace_outputs = trace(*inputs) - true_outputs = model(*inputs) - for trace_output, true_output in zip(trace_outputs, true_outputs): - self.assertTrue(torch.allclose(trace_output, true_output)) - - -class TestTorchscriptUtils(unittest.TestCase): - # TODO: add test to dump scripting - def test_dump_IR_tracing(self): - cfg = get_cfg() - cfg.MODEL.RESNETS.DEPTH = 18 - cfg.MODEL.RESNETS.RES2_OUT_CHANNELS = 64 - - class Mod(nn.Module): - def forward(self, x): - return tuple(self.m(x).values()) - - model = Mod() - model.m = build_backbone(cfg) - model.eval() - - with torch.no_grad(): - ts_model = torch.jit.trace(model, (torch.rand(2, 3, 224, 224),)) - - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - dump_torchscript_IR(ts_model, d) - # check that the files are created - for name in ["model_ts_code", "model_ts_IR", "model_ts_IR_inlined", "model"]: - fname = os.path.join(d, name + ".txt") - self.assertTrue(os.stat(fname).st_size > 0, fname) - - def test_dump_IR_function(self): - @torch.jit.script - def gunc(x, y): - return x + y - - def func(x, y): - return x + y + gunc(x, y) - - ts_model = torch.jit.trace(func, (torch.rand(3), torch.rand(3))) - with tempfile.TemporaryDirectory(prefix="detectron2_test") as d: - dump_torchscript_IR(ts_model, d) - for name in ["model_ts_code", "model_ts_IR", "model_ts_IR_inlined"]: - fname = os.path.join(d, name + ".txt") - self.assertTrue(os.stat(fname).st_size > 0, fname) - - def test_flatten_basic(self): - obj = [3, ([5, 6], {"name": [7, 9], "name2": 3})] - res, schema = flatten_to_tuple(obj) - self.assertEqual(res, (3, 5, 6, 7, 9, 3)) - new_obj = schema(res) - self.assertEqual(new_obj, obj) - - _, new_schema = flatten_to_tuple(new_obj) - self.assertEqual(schema, new_schema) # test __eq__ - self._check_schema(schema) - - def _check_schema(self, schema): - dumped_schema = dump_dataclass(schema) - # Check that the schema is json-serializable - # Although in reality you might want to use yaml because it often has many levels - json.dumps(dumped_schema) - - # Check that the schema can be deserialized - new_schema = instantiate(dumped_schema) - self.assertEqual(schema, new_schema) - - def test_flatten_instances_boxes(self): - inst = Instances( - torch.tensor([5, 8]), pred_masks=torch.tensor([3]), pred_boxes=Boxes(torch.ones((1, 4))) - ) - obj = [3, ([5, 6], inst)] - res, schema = flatten_to_tuple(obj) - self.assertEqual(res[:3], (3, 5, 6)) - for r, expected in zip(res[3:], (inst.pred_boxes.tensor, inst.pred_masks, inst.image_size)): - self.assertIs(r, expected) - new_obj = schema(res) - assert_instances_allclose(new_obj[1][1], inst, rtol=0.0, size_as_tensor=True) - - self._check_schema(schema) - - def test_allow_non_tensor(self): - data = (torch.tensor([5, 8]), 3) # contains non-tensor - - class M(nn.Module): - def forward(self, input, number): - return input - - model = M() - with self.assertRaisesRegex(ValueError, "must only contain tensors"): - adap = TracingAdapter(model, data, allow_non_tensor=False) - - adap = TracingAdapter(model, data, allow_non_tensor=True) - _ = adap(*adap.flattened_inputs) - - newdata = (data[0].clone(),) - with self.assertRaisesRegex(ValueError, "cannot generalize"): - _ = adap(*newdata) diff --git a/detectron2/tests/test_model_analysis.py b/detectron2/tests/test_model_analysis.py deleted file mode 100644 index c01b7af09703c8dad889dee0118d74fcc12ac4b0..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_model_analysis.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - - -import unittest -import torch -from torch import nn - -from detectron2.utils.analysis import find_unused_parameters, flop_count_operators, parameter_count -from detectron2.utils.testing import get_model_no_weights - - -class RetinaNetTest(unittest.TestCase): - def setUp(self): - self.model = get_model_no_weights("COCO-Detection/retinanet_R_50_FPN_1x.yaml") - - def test_flop(self): - # RetinaNet supports flop-counting with random inputs - inputs = [{"image": torch.rand(3, 800, 800), "test_unused": "abcd"}] - res = flop_count_operators(self.model, inputs) - self.assertEqual(int(res["conv"]), 146) # 146B flops - - def test_param_count(self): - res = parameter_count(self.model) - self.assertEqual(res[""], 37915572) - self.assertEqual(res["backbone"], 31452352) - - -class FasterRCNNTest(unittest.TestCase): - def setUp(self): - self.model = get_model_no_weights("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml") - - def test_flop(self): - # Faster R-CNN supports flop-counting with random inputs - inputs = [{"image": torch.rand(3, 800, 800)}] - res = flop_count_operators(self.model, inputs) - - # This only checks flops for backbone & proposal generator - # Flops for box head is not conv, and depends on #proposals, which is - # almost 0 for random inputs. - self.assertEqual(int(res["conv"]), 117) - - def test_flop_with_output_shape(self): - inputs = [{"image": torch.rand(3, 800, 800), "height": 700, "width": 700}] - res = flop_count_operators(self.model, inputs) - self.assertEqual(int(res["conv"]), 117) - - def test_param_count(self): - res = parameter_count(self.model) - self.assertEqual(res[""], 41699936) - self.assertEqual(res["backbone"], 26799296) - - -class MaskRCNNTest(unittest.TestCase): - def setUp(self): - self.model = get_model_no_weights("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml") - - def test_flop(self): - inputs1 = [{"image": torch.rand(3, 800, 800)}] - inputs2 = [{"image": torch.rand(3, 800, 800), "height": 700, "width": 700}] - - for inputs in [inputs1, inputs2]: - res = flop_count_operators(self.model, inputs) - # The mask head could have extra conv flops, so total >= 117 - self.assertGreaterEqual(int(res["conv"]), 117) - - -class UnusedParamTest(unittest.TestCase): - def test_unused(self): - class TestMod(nn.Module): - def __init__(self): - super().__init__() - self.fc1 = nn.Linear(10, 10) - self.t = nn.Linear(10, 10) - - def forward(self, x): - return self.fc1(x).mean() - - m = TestMod() - ret = find_unused_parameters(m, torch.randn(10, 10)) - self.assertEqual(set(ret), {"t.weight", "t.bias"}) diff --git a/detectron2/tests/test_model_zoo.py b/detectron2/tests/test_model_zoo.py deleted file mode 100644 index e3360a74864e0c00ed92ffbc8531c8d36e8be379..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_model_zoo.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import logging -import unittest - -from detectron2 import model_zoo -from detectron2.config import instantiate -from detectron2.modeling import FPN, GeneralizedRCNN - -logger = logging.getLogger(__name__) - - -class TestModelZoo(unittest.TestCase): - def test_get_returns_model(self): - model = model_zoo.get("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml", trained=False) - self.assertIsInstance(model, GeneralizedRCNN) - self.assertIsInstance(model.backbone, FPN) - - def test_get_invalid_model(self): - self.assertRaises(RuntimeError, model_zoo.get, "Invalid/config.yaml") - - def test_get_url(self): - url = model_zoo.get_checkpoint_url("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml") - self.assertEqual( - url, - "https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl", # noqa - ) - url2 = model_zoo.get_checkpoint_url("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.py") - self.assertEqual(url, url2) - - def _build_lazy_model(self, name): - cfg = model_zoo.get_config("common/models/" + name) - instantiate(cfg.model) - - def test_mask_rcnn_fpn(self): - self._build_lazy_model("mask_rcnn_fpn.py") - - def test_mask_rcnn_c4(self): - self._build_lazy_model("mask_rcnn_c4.py") - - def test_panoptic_fpn(self): - self._build_lazy_model("panoptic_fpn.py") - - def test_schedule(self): - cfg = model_zoo.get_config("common/coco_schedule.py") - for _, v in cfg.items(): - instantiate(v) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/test_packaging.py b/detectron2/tests/test_packaging.py deleted file mode 100644 index a5b1661e8f341fe66a6e02c59fe172bce445782b..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_packaging.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import unittest - -from detectron2.utils.collect_env import collect_env_info - - -class TestProjects(unittest.TestCase): - def test_import(self): - from detectron2.projects import point_rend - - _ = point_rend.add_pointrend_config - - import detectron2.projects.deeplab as deeplab - - _ = deeplab.add_deeplab_config - - # import detectron2.projects.panoptic_deeplab as panoptic_deeplab - - # _ = panoptic_deeplab.add_panoptic_deeplab_config - - -class TestCollectEnv(unittest.TestCase): - def test(self): - _ = collect_env_info() diff --git a/detectron2/tests/test_registry.py b/detectron2/tests/test_registry.py deleted file mode 100644 index 4e425a6ec44c7c47a5a106bfdf5ce8062c2110c9..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_registry.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import unittest -import torch - -from detectron2.modeling.meta_arch import GeneralizedRCNN -from detectron2.utils.registry import _convert_target_to_string, locate - - -class A: - class B: - pass - - -class TestLocate(unittest.TestCase): - def _test_obj(self, obj): - name = _convert_target_to_string(obj) - newobj = locate(name) - self.assertIs(obj, newobj) - - def test_basic(self): - self._test_obj(GeneralizedRCNN) - - def test_inside_class(self): - # requires using __qualname__ instead of __name__ - self._test_obj(A.B) - - def test_builtin(self): - self._test_obj(len) - self._test_obj(dict) - - def test_pytorch_optim(self): - # pydoc.locate does not work for it - self._test_obj(torch.optim.SGD) - - def test_failure(self): - with self.assertRaises(ImportError): - locate("asdf") - - def test_compress_target(self): - from detectron2.data.transforms import RandomCrop - - name = _convert_target_to_string(RandomCrop) - # name shouldn't contain 'augmentation_impl' - self.assertEqual(name, "detectron2.data.transforms.RandomCrop") - self.assertIs(RandomCrop, locate(name)) diff --git a/detectron2/tests/test_scheduler.py b/detectron2/tests/test_scheduler.py deleted file mode 100644 index 5649a4a2e167f44a734cfcc3ec86ab3a22bfc1b0..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_scheduler.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. - -import math -import numpy as np -from unittest import TestCase -import torch -from fvcore.common.param_scheduler import ( - CosineParamScheduler, - MultiStepParamScheduler, - StepWithFixedGammaParamScheduler, -) -from torch import nn - -from detectron2.solver import LRMultiplier, WarmupParamScheduler, build_lr_scheduler - - -class TestScheduler(TestCase): - def test_warmup_multistep(self): - p = nn.Parameter(torch.zeros(0)) - opt = torch.optim.SGD([p], lr=5) - - multiplier = WarmupParamScheduler( - MultiStepParamScheduler( - [1, 0.1, 0.01, 0.001], - milestones=[10, 15, 20], - num_updates=30, - ), - 0.001, - 5 / 30, - ) - sched = LRMultiplier(opt, multiplier, 30) - # This is an equivalent of: - # sched = WarmupMultiStepLR( - # opt, milestones=[10, 15, 20], gamma=0.1, warmup_factor=0.001, warmup_iters=5) - - p.sum().backward() - opt.step() - - lrs = [0.005] - for _ in range(30): - sched.step() - lrs.append(opt.param_groups[0]["lr"]) - self.assertTrue(np.allclose(lrs[:5], [0.005, 1.004, 2.003, 3.002, 4.001])) - self.assertTrue(np.allclose(lrs[5:10], 5.0)) - self.assertTrue(np.allclose(lrs[10:15], 0.5)) - self.assertTrue(np.allclose(lrs[15:20], 0.05)) - self.assertTrue(np.allclose(lrs[20:], 0.005)) - - def test_warmup_cosine(self): - p = nn.Parameter(torch.zeros(0)) - opt = torch.optim.SGD([p], lr=5) - multiplier = WarmupParamScheduler( - CosineParamScheduler(1, 0), - 0.001, - 5 / 30, - ) - sched = LRMultiplier(opt, multiplier, 30) - - p.sum().backward() - opt.step() - self.assertEqual(opt.param_groups[0]["lr"], 0.005) - lrs = [0.005] - - for _ in range(30): - sched.step() - lrs.append(opt.param_groups[0]["lr"]) - for idx, lr in enumerate(lrs): - expected_cosine = 2.5 * (1.0 + math.cos(math.pi * idx / 30)) - if idx >= 5: - self.assertAlmostEqual(lr, expected_cosine) - else: - self.assertNotAlmostEqual(lr, expected_cosine) - - def test_warmup_cosine_end_value(self): - from detectron2.config import CfgNode, get_cfg - - def _test_end_value(cfg_dict): - cfg = get_cfg() - cfg.merge_from_other_cfg(CfgNode(cfg_dict)) - - p = nn.Parameter(torch.zeros(0)) - opt = torch.optim.SGD([p], lr=cfg.SOLVER.BASE_LR) - - scheduler = build_lr_scheduler(cfg, opt) - - p.sum().backward() - opt.step() - self.assertEqual( - opt.param_groups[0]["lr"], cfg.SOLVER.BASE_LR * cfg.SOLVER.WARMUP_FACTOR - ) - - lrs = [] - for _ in range(cfg.SOLVER.MAX_ITER): - scheduler.step() - lrs.append(opt.param_groups[0]["lr"]) - - self.assertAlmostEqual(lrs[-1], cfg.SOLVER.BASE_LR_END) - - _test_end_value( - { - "SOLVER": { - "LR_SCHEDULER_NAME": "WarmupCosineLR", - "MAX_ITER": 100, - "WARMUP_ITERS": 10, - "WARMUP_FACTOR": 0.1, - "BASE_LR": 5.0, - "BASE_LR_END": 0.0, - } - } - ) - - _test_end_value( - { - "SOLVER": { - "LR_SCHEDULER_NAME": "WarmupCosineLR", - "MAX_ITER": 100, - "WARMUP_ITERS": 10, - "WARMUP_FACTOR": 0.1, - "BASE_LR": 5.0, - "BASE_LR_END": 0.5, - } - } - ) - - def test_warmup_stepwithfixedgamma(self): - p = nn.Parameter(torch.zeros(0)) - opt = torch.optim.SGD([p], lr=5) - - multiplier = WarmupParamScheduler( - StepWithFixedGammaParamScheduler( - base_value=1.0, - gamma=0.1, - num_decays=4, - num_updates=30, - ), - 0.001, - 5 / 30, - rescale_interval=True, - ) - sched = LRMultiplier(opt, multiplier, 30) - - p.sum().backward() - opt.step() - - lrs = [0.005] - for _ in range(29): - sched.step() - lrs.append(opt.param_groups[0]["lr"]) - self.assertTrue(np.allclose(lrs[:5], [0.005, 1.004, 2.003, 3.002, 4.001])) - self.assertTrue(np.allclose(lrs[5:10], 5.0)) - self.assertTrue(np.allclose(lrs[10:15], 0.5)) - self.assertTrue(np.allclose(lrs[15:20], 0.05)) - self.assertTrue(np.allclose(lrs[20:25], 0.005)) - self.assertTrue(np.allclose(lrs[25:], 0.0005)) - - # Calling sche.step() after the last training iteration is done will trigger IndexError - with self.assertRaises(IndexError, msg="list index out of range"): - sched.step() diff --git a/detectron2/tests/test_solver.py b/detectron2/tests/test_solver.py deleted file mode 100644 index 6b3ae84c00b789df071ab5d12bae42d991df1d0b..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_solver.py +++ /dev/null @@ -1,66 +0,0 @@ -import unittest - -from detectron2.solver.build import _expand_param_groups, reduce_param_groups - - -class TestOptimizer(unittest.TestCase): - def testExpandParamsGroups(self): - params = [ - { - "params": ["p1", "p2", "p3", "p4"], - "lr": 1.0, - "weight_decay": 3.0, - }, - { - "params": ["p2", "p3", "p5"], - "lr": 2.0, - "momentum": 2.0, - }, - { - "params": ["p1"], - "weight_decay": 4.0, - }, - ] - out = _expand_param_groups(params) - gt = [ - dict(params=["p1"], lr=1.0, weight_decay=4.0), # noqa - dict(params=["p2"], lr=2.0, weight_decay=3.0, momentum=2.0), # noqa - dict(params=["p3"], lr=2.0, weight_decay=3.0, momentum=2.0), # noqa - dict(params=["p4"], lr=1.0, weight_decay=3.0), # noqa - dict(params=["p5"], lr=2.0, momentum=2.0), # noqa - ] - self.assertEqual(out, gt) - - def testReduceParamGroups(self): - params = [ - dict(params=["p1"], lr=1.0, weight_decay=4.0), # noqa - dict(params=["p2", "p6"], lr=2.0, weight_decay=3.0, momentum=2.0), # noqa - dict(params=["p3"], lr=2.0, weight_decay=3.0, momentum=2.0), # noqa - dict(params=["p4"], lr=1.0, weight_decay=3.0), # noqa - dict(params=["p5"], lr=2.0, momentum=2.0), # noqa - ] - gt_groups = [ - { - "lr": 1.0, - "weight_decay": 4.0, - "params": ["p1"], - }, - { - "lr": 2.0, - "weight_decay": 3.0, - "momentum": 2.0, - "params": ["p2", "p6", "p3"], - }, - { - "lr": 1.0, - "weight_decay": 3.0, - "params": ["p4"], - }, - { - "lr": 2.0, - "momentum": 2.0, - "params": ["p5"], - }, - ] - out = reduce_param_groups(params) - self.assertEqual(out, gt_groups) diff --git a/detectron2/tests/test_visualizer.py b/detectron2/tests/test_visualizer.py deleted file mode 100644 index 646e5f32b5c570bd8024c13b417a45c07aad8453..0000000000000000000000000000000000000000 --- a/detectron2/tests/test_visualizer.py +++ /dev/null @@ -1,278 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import numpy as np -import os -import tempfile -import unittest -import cv2 -import torch - -from detectron2.data import MetadataCatalog -from detectron2.structures import BoxMode, Instances, RotatedBoxes -from detectron2.utils.visualizer import ColorMode, Visualizer - - -class TestVisualizer(unittest.TestCase): - def _random_data(self): - H, W = 100, 100 - N = 10 - img = np.random.rand(H, W, 3) * 255 - boxxy = np.random.rand(N, 2) * (H // 2) - boxes = np.concatenate((boxxy, boxxy + H // 2), axis=1) - - def _rand_poly(): - return np.random.rand(3, 2).flatten() * H - - polygons = [[_rand_poly() for _ in range(np.random.randint(1, 5))] for _ in range(N)] - - mask = np.zeros_like(img[:, :, 0], dtype=bool) - mask[:40, 10:20] = 1 - - labels = [str(i) for i in range(N)] - return img, boxes, labels, polygons, [mask] * N - - @property - def metadata(self): - return MetadataCatalog.get("coco_2017_train") - - def test_draw_dataset_dict(self): - img = np.random.rand(512, 512, 3) * 255 - dic = { - "annotations": [ - { - "bbox": [ - 368.9946492271106, - 330.891438763377, - 13.148537455410235, - 13.644708680142685, - ], - "bbox_mode": BoxMode.XYWH_ABS, - "category_id": 0, - "iscrowd": 1, - "segmentation": { - "counts": "_jh52m?2N2N2N2O100O10O001N1O2MceP2", - "size": [512, 512], - }, - } - ], - "height": 512, - "image_id": 1, - "width": 512, - } - v = Visualizer(img) - v.draw_dataset_dict(dic) - - v = Visualizer(img, self.metadata) - v.draw_dataset_dict(dic) - - def test_draw_rotated_dataset_dict(self): - img = np.random.rand(512, 512, 3) * 255 - dic = { - "annotations": [ - { - "bbox": [ - 368.9946492271106, - 330.891438763377, - 13.148537455410235, - 13.644708680142685, - 45.0, - ], - "bbox_mode": BoxMode.XYWHA_ABS, - "category_id": 0, - "iscrowd": 1, - } - ], - "height": 512, - "image_id": 1, - "width": 512, - } - v = Visualizer(img, self.metadata) - v.draw_dataset_dict(dic) - - def test_overlay_instances(self): - img, boxes, labels, polygons, masks = self._random_data() - - v = Visualizer(img, self.metadata) - output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image() - self.assertEqual(output.shape, img.shape) - - # Test 2x scaling - v = Visualizer(img, self.metadata, scale=2.0) - output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image() - self.assertEqual(output.shape[0], img.shape[0] * 2) - - # Test overlay masks - v = Visualizer(img, self.metadata) - output = v.overlay_instances(masks=masks, boxes=boxes, labels=labels).get_image() - self.assertEqual(output.shape, img.shape) - - def test_overlay_instances_no_boxes(self): - img, boxes, labels, polygons, _ = self._random_data() - v = Visualizer(img, self.metadata) - v.overlay_instances(masks=polygons, boxes=None, labels=labels).get_image() - - def test_draw_instance_predictions(self): - img, boxes, _, _, masks = self._random_data() - num_inst = len(boxes) - inst = Instances((img.shape[0], img.shape[1])) - inst.pred_classes = torch.randint(0, 80, size=(num_inst,)) - inst.scores = torch.rand(num_inst) - inst.pred_boxes = torch.from_numpy(boxes) - inst.pred_masks = torch.from_numpy(np.asarray(masks)) - - v = Visualizer(img) - v.draw_instance_predictions(inst) - - v = Visualizer(img, self.metadata) - v.draw_instance_predictions(inst) - - def test_BWmode_nomask(self): - img, boxes, _, _, masks = self._random_data() - num_inst = len(boxes) - inst = Instances((img.shape[0], img.shape[1])) - inst.pred_classes = torch.randint(0, 80, size=(num_inst,)) - inst.scores = torch.rand(num_inst) - inst.pred_boxes = torch.from_numpy(boxes) - - v = Visualizer(img, self.metadata, instance_mode=ColorMode.IMAGE_BW) - v.draw_instance_predictions(inst) - - # check that output is grayscale - inst = inst[:0] - v = Visualizer(img, self.metadata, instance_mode=ColorMode.IMAGE_BW) - output = v.draw_instance_predictions(inst).get_image() - self.assertTrue(np.allclose(output[:, :, 0], output[:, :, 1])) - self.assertTrue(np.allclose(output[:, :, 0], output[:, :, 2])) - - def test_draw_empty_mask_predictions(self): - img, boxes, _, _, masks = self._random_data() - num_inst = len(boxes) - inst = Instances((img.shape[0], img.shape[1])) - inst.pred_classes = torch.randint(0, 80, size=(num_inst,)) - inst.scores = torch.rand(num_inst) - inst.pred_boxes = torch.from_numpy(boxes) - inst.pred_masks = torch.from_numpy(np.zeros_like(np.asarray(masks))) - - v = Visualizer(img, self.metadata) - v.draw_instance_predictions(inst) - - def test_correct_output_shape(self): - img = np.random.rand(928, 928, 3) * 255 - v = Visualizer(img, self.metadata) - out = v.output.get_image() - self.assertEqual(out.shape, img.shape) - - def test_overlay_rotated_instances(self): - H, W = 100, 150 - img = np.random.rand(H, W, 3) * 255 - num_boxes = 50 - boxes_5d = torch.zeros(num_boxes, 5) - boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-0.1 * W, 1.1 * W) - boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-0.1 * H, 1.1 * H) - boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H)) - boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H)) - boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800) - rotated_boxes = RotatedBoxes(boxes_5d) - labels = [str(i) for i in range(num_boxes)] - - v = Visualizer(img, self.metadata) - output = v.overlay_instances(boxes=rotated_boxes, labels=labels).get_image() - self.assertEqual(output.shape, img.shape) - - def test_draw_no_metadata(self): - img, boxes, _, _, masks = self._random_data() - num_inst = len(boxes) - inst = Instances((img.shape[0], img.shape[1])) - inst.pred_classes = torch.randint(0, 80, size=(num_inst,)) - inst.scores = torch.rand(num_inst) - inst.pred_boxes = torch.from_numpy(boxes) - inst.pred_masks = torch.from_numpy(np.asarray(masks)) - - v = Visualizer(img, MetadataCatalog.get("asdfasdf")) - v.draw_instance_predictions(inst) - - def test_draw_binary_mask(self): - img, boxes, _, _, masks = self._random_data() - img[:, :, 0] = 0 # remove red color - mask = masks[0] - mask_with_hole = np.zeros_like(mask).astype("uint8") - mask_with_hole = cv2.rectangle(mask_with_hole, (10, 10), (50, 50), 1, 5) - - for m in [mask, mask_with_hole]: - for save in [True, False]: - v = Visualizer(img) - o = v.draw_binary_mask(m, color="red", text="test") - if save: - with tempfile.TemporaryDirectory(prefix="detectron2_viz") as d: - path = os.path.join(d, "output.png") - o.save(path) - o = cv2.imread(path)[:, :, ::-1] - else: - o = o.get_image().astype("float32") - # red color is drawn on the image - self.assertTrue(o[:, :, 0].sum() > 0) - - def test_draw_soft_mask(self): - img = np.random.rand(100, 100, 3) * 255 - img[:, :, 0] = 0 # remove red color - mask = np.zeros((100, 100), dtype=np.float32) - mask[30:50, 40:50] = 1.0 - cv2.GaussianBlur(mask, (21, 21), 10) - - v = Visualizer(img) - o = v.draw_soft_mask(mask, color="red", text="test") - o = o.get_image().astype("float32") - # red color is drawn on the image - self.assertTrue(o[:, :, 0].sum() > 0) - - # test draw empty mask - v = Visualizer(img) - o = v.draw_soft_mask(np.zeros((100, 100), dtype=np.float32), color="red", text="test") - o = o.get_image().astype("float32") - - def test_border_mask_with_holes(self): - H, W = 200, 200 - img = np.zeros((H, W, 3)) - img[:, :, 0] = 255.0 - v = Visualizer(img, scale=3) - - mask = np.zeros((H, W)) - mask[:, 100:150] = 1 - # create a hole, to trigger imshow - mask = cv2.rectangle(mask, (110, 110), (130, 130), 0, thickness=-1) - output = v.draw_binary_mask(mask, color="blue") - output = output.get_image()[:, :, ::-1] - - first_row = {tuple(x.tolist()) for x in output[0]} - last_row = {tuple(x.tolist()) for x in output[-1]} - # Check quantization / off-by-1 error: the first and last row must have two colors - self.assertEqual(len(last_row), 2) - self.assertEqual(len(first_row), 2) - self.assertIn((0, 0, 255), last_row) - self.assertIn((0, 0, 255), first_row) - - def test_border_polygons(self): - H, W = 200, 200 - img = np.zeros((H, W, 3)) - img[:, :, 0] = 255.0 - v = Visualizer(img, scale=3) - mask = np.zeros((H, W)) - mask[:, 100:150] = 1 - - output = v.draw_binary_mask(mask, color="blue") - output = output.get_image()[:, :, ::-1] - - first_row = {tuple(x.tolist()) for x in output[0]} - last_row = {tuple(x.tolist()) for x in output[-1]} - # Check quantization / off-by-1 error: - # the first and last row must have >=2 colors, because the polygon - # touches both rows - self.assertGreaterEqual(len(last_row), 2) - self.assertGreaterEqual(len(first_row), 2) - self.assertIn((0, 0, 255), last_row) - self.assertIn((0, 0, 255), first_row) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/tracking/__init__.py b/detectron2/tests/tracking/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/detectron2/tests/tracking/test_bbox_iou_tracker.py b/detectron2/tests/tracking/test_bbox_iou_tracker.py deleted file mode 100644 index e720b2eb98788670c7daf2a694eff1fdc7b9f1bd..0000000000000000000000000000000000000000 --- a/detectron2/tests/tracking/test_bbox_iou_tracker.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -import unittest -from copy import deepcopy -from typing import Dict -import torch - -from detectron2.config import CfgNode as CfgNode_ -from detectron2.config import instantiate -from detectron2.structures import Boxes, Instances -from detectron2.tracking.base_tracker import build_tracker_head -from detectron2.tracking.bbox_iou_tracker import BBoxIOUTracker # noqa - - -class TestBBoxIOUTracker(unittest.TestCase): - def setUp(self): - self._img_size = np.array([600, 800]) - self._prev_boxes = np.array( - [ - [101, 101, 200, 200], - [301, 301, 450, 450], - ] - ).astype(np.float32) - self._prev_scores = np.array([0.9, 0.9]) - self._prev_classes = np.array([1, 1]) - self._prev_masks = np.ones((2, 600, 800)).astype("uint8") - self._curr_boxes = np.array( - [ - [302, 303, 451, 452], - [101, 102, 201, 203], - ] - ).astype(np.float32) - self._curr_scores = np.array([0.95, 0.85]) - self._curr_classes = np.array([1, 1]) - self._curr_masks = np.ones((2, 600, 800)).astype("uint8") - - self._prev_instances = { - "image_size": self._img_size, - "pred_boxes": self._prev_boxes, - "scores": self._prev_scores, - "pred_classes": self._prev_classes, - "pred_masks": self._prev_masks, - } - self._prev_instances = self._convertDictPredictionToInstance(self._prev_instances) - self._curr_instances = { - "image_size": self._img_size, - "pred_boxes": self._curr_boxes, - "scores": self._curr_scores, - "pred_classes": self._curr_classes, - "pred_masks": self._curr_masks, - } - self._curr_instances = self._convertDictPredictionToInstance(self._curr_instances) - - self._max_num_instances = 200 - self._max_lost_frame_count = 0 - self._min_box_rel_dim = 0.02 - self._min_instance_period = 1 - self._track_iou_threshold = 0.5 - - def _convertDictPredictionToInstance(self, prediction: Dict) -> Instances: - """ - convert prediction from Dict to D2 Instances format - """ - res = Instances( - image_size=torch.IntTensor(prediction["image_size"]), - pred_boxes=Boxes(torch.FloatTensor(prediction["pred_boxes"])), - pred_masks=torch.IntTensor(prediction["pred_masks"]), - pred_classes=torch.IntTensor(prediction["pred_classes"]), - scores=torch.FloatTensor(prediction["scores"]), - ) - return res - - def test_init(self): - cfg = { - "_target_": "detectron2.tracking.bbox_iou_tracker.BBoxIOUTracker", - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - self.assertTrue(tracker._video_height == self._img_size[0]) - - def test_from_config(self): - cfg = CfgNode_() - cfg.TRACKER_HEADS = CfgNode_() - cfg.TRACKER_HEADS.TRACKER_NAME = "BBoxIOUTracker" - cfg.TRACKER_HEADS.VIDEO_HEIGHT = int(self._img_size[0]) - cfg.TRACKER_HEADS.VIDEO_WIDTH = int(self._img_size[1]) - cfg.TRACKER_HEADS.MAX_NUM_INSTANCES = self._max_num_instances - cfg.TRACKER_HEADS.MAX_LOST_FRAME_COUNT = self._max_lost_frame_count - cfg.TRACKER_HEADS.MIN_BOX_REL_DIM = self._min_box_rel_dim - cfg.TRACKER_HEADS.MIN_INSTANCE_PERIOD = self._min_instance_period - cfg.TRACKER_HEADS.TRACK_IOU_THRESHOLD = self._track_iou_threshold - tracker = build_tracker_head(cfg) - self.assertTrue(tracker._video_height == self._img_size[0]) - - def test_initialize_extra_fields(self): - cfg = { - "_target_": "detectron2.tracking.bbox_iou_tracker.BBoxIOUTracker", - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - instances = tracker._initialize_extra_fields(self._curr_instances) - self.assertTrue(instances.has("ID")) - self.assertTrue(instances.has("ID_period")) - self.assertTrue(instances.has("lost_frame_count")) - - def test_assign_new_id(self): - cfg = { - "_target_": "detectron2.tracking.bbox_iou_tracker.BBoxIOUTracker", - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - instances = deepcopy(self._curr_instances) - instances = tracker._initialize_extra_fields(instances) - instances = tracker._assign_new_id(instances) - self.assertTrue(len(instances.ID) == 2) - self.assertTrue(instances.ID[0] == 2) - self.assertTrue(instances.ID[1] == 3) - - def test_update(self): - cfg = { - "_target_": "detectron2.tracking.bbox_iou_tracker.BBoxIOUTracker", - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - prev_instances = tracker.update(self._prev_instances) - self.assertTrue(len(prev_instances.ID) == 2) - self.assertTrue(prev_instances.ID[0] == 0) - self.assertTrue(prev_instances.ID[1] == 1) - curr_instances = tracker.update(self._curr_instances) - self.assertTrue(len(curr_instances.ID) == 2) - self.assertTrue(curr_instances.ID[0] == 1) - self.assertTrue(curr_instances.ID[1] == 0) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/tracking/test_hungarian_tracker.py b/detectron2/tests/tracking/test_hungarian_tracker.py deleted file mode 100644 index 660c635990a3370945e7f14422dcd978320e4782..0000000000000000000000000000000000000000 --- a/detectron2/tests/tracking/test_hungarian_tracker.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import numpy as np -import unittest -from typing import Dict -import torch - -from detectron2.config import instantiate -from detectron2.structures import Boxes, Instances - - -class TestBaseHungarianTracker(unittest.TestCase): - def setUp(self): - self._img_size = np.array([600, 800]) - self._prev_boxes = np.array( - [ - [101, 101, 200, 200], - [301, 301, 450, 450], - ] - ).astype(np.float32) - self._prev_scores = np.array([0.9, 0.9]) - self._prev_classes = np.array([1, 1]) - self._prev_masks = np.ones((2, 600, 800)).astype("uint8") - self._curr_boxes = np.array( - [ - [302, 303, 451, 452], - [101, 102, 201, 203], - ] - ).astype(np.float32) - self._curr_scores = np.array([0.95, 0.85]) - self._curr_classes = np.array([1, 1]) - self._curr_masks = np.ones((2, 600, 800)).astype("uint8") - - self._prev_instances = { - "image_size": self._img_size, - "pred_boxes": self._prev_boxes, - "scores": self._prev_scores, - "pred_classes": self._prev_classes, - "pred_masks": self._prev_masks, - } - self._prev_instances = self._convertDictPredictionToInstance(self._prev_instances) - self._curr_instances = { - "image_size": self._img_size, - "pred_boxes": self._curr_boxes, - "scores": self._curr_scores, - "pred_classes": self._curr_classes, - "pred_masks": self._curr_masks, - } - self._curr_instances = self._convertDictPredictionToInstance(self._curr_instances) - - self._max_num_instances = 200 - self._max_lost_frame_count = 0 - self._min_box_rel_dim = 0.02 - self._min_instance_period = 1 - self._track_iou_threshold = 0.5 - - def _convertDictPredictionToInstance(self, prediction: Dict) -> Instances: - """ - convert prediction from Dict to D2 Instances format - """ - res = Instances( - image_size=torch.IntTensor(prediction["image_size"]), - pred_boxes=Boxes(torch.FloatTensor(prediction["pred_boxes"])), - pred_masks=torch.IntTensor(prediction["pred_masks"]), - pred_classes=torch.IntTensor(prediction["pred_classes"]), - scores=torch.FloatTensor(prediction["scores"]), - ) - return res - - def test_init(self): - cfg = { - "_target_": "detectron2.tracking.hungarian_tracker.BaseHungarianTracker", - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - self.assertTrue(tracker._video_height == self._img_size[0]) - - def test_initialize_extra_fields(self): - cfg = { - "_target_": "detectron2.tracking.hungarian_tracker.BaseHungarianTracker", - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - instances = tracker._initialize_extra_fields(self._curr_instances) - self.assertTrue(instances.has("ID")) - self.assertTrue(instances.has("ID_period")) - self.assertTrue(instances.has("lost_frame_count")) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/tracking/test_iou_weighted_hungarian_bbox_iou_tracker.py b/detectron2/tests/tracking/test_iou_weighted_hungarian_bbox_iou_tracker.py deleted file mode 100644 index 6947399fc4bd356a5c0e8168334e490ab651ae27..0000000000000000000000000000000000000000 --- a/detectron2/tests/tracking/test_iou_weighted_hungarian_bbox_iou_tracker.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import numpy as np -import unittest -from typing import Dict -import torch - -from detectron2.config import CfgNode as CfgNode_ -from detectron2.config import instantiate -from detectron2.structures import Boxes, Instances -from detectron2.tracking.base_tracker import build_tracker_head -from detectron2.tracking.iou_weighted_hungarian_bbox_iou_tracker import ( # noqa - IOUWeightedHungarianBBoxIOUTracker, -) - - -class TestIOUWeightedHungarianBBoxIOUTracker(unittest.TestCase): - def setUp(self): - self._img_size = np.array([600, 800]) - self._prev_boxes = np.array( - [ - [101, 101, 200, 200], - [301, 301, 450, 450], - ] - ).astype(np.float32) - self._prev_scores = np.array([0.9, 0.9]) - self._prev_classes = np.array([1, 1]) - self._prev_masks = np.ones((2, 600, 800)).astype("uint8") - self._curr_boxes = np.array( - [ - [302, 303, 451, 452], - [101, 102, 201, 203], - ] - ).astype(np.float32) - self._curr_scores = np.array([0.95, 0.85]) - self._curr_classes = np.array([1, 1]) - self._curr_masks = np.ones((2, 600, 800)).astype("uint8") - - self._prev_instances = { - "image_size": self._img_size, - "pred_boxes": self._prev_boxes, - "scores": self._prev_scores, - "pred_classes": self._prev_classes, - "pred_masks": self._prev_masks, - } - self._prev_instances = self._convertDictPredictionToInstance(self._prev_instances) - self._curr_instances = { - "image_size": self._img_size, - "pred_boxes": self._curr_boxes, - "scores": self._curr_scores, - "pred_classes": self._curr_classes, - "pred_masks": self._curr_masks, - } - self._curr_instances = self._convertDictPredictionToInstance(self._curr_instances) - - self._max_num_instances = 10 - self._max_lost_frame_count = 3 - self._min_box_rel_dim = 0.02 - self._min_instance_period = 1 - self._track_iou_threshold = 0.5 - - def _convertDictPredictionToInstance(self, prediction: Dict) -> Instances: - """ - convert prediction from Dict to D2 Instances format - """ - res = Instances( - image_size=torch.IntTensor(prediction["image_size"]), - pred_boxes=Boxes(torch.FloatTensor(prediction["pred_boxes"])), - pred_masks=torch.IntTensor(prediction["pred_masks"]), - pred_classes=torch.IntTensor(prediction["pred_classes"]), - scores=torch.FloatTensor(prediction["scores"]), - ) - return res - - def test_init(self): - cfg = { - "_target_": "detectron2.tracking.iou_weighted_hungarian_bbox_iou_tracker.IOUWeightedHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - self.assertTrue(tracker._video_height == self._img_size[0]) - - def test_from_config(self): - cfg = CfgNode_() - cfg.TRACKER_HEADS = CfgNode_() - cfg.TRACKER_HEADS.TRACKER_NAME = "IOUWeightedHungarianBBoxIOUTracker" - cfg.TRACKER_HEADS.VIDEO_HEIGHT = int(self._img_size[0]) - cfg.TRACKER_HEADS.VIDEO_WIDTH = int(self._img_size[1]) - cfg.TRACKER_HEADS.MAX_NUM_INSTANCES = self._max_num_instances - cfg.TRACKER_HEADS.MAX_LOST_FRAME_COUNT = self._max_lost_frame_count - cfg.TRACKER_HEADS.MIN_BOX_REL_DIM = self._min_box_rel_dim - cfg.TRACKER_HEADS.MIN_INSTANCE_PERIOD = self._min_instance_period - cfg.TRACKER_HEADS.TRACK_IOU_THRESHOLD = self._track_iou_threshold - tracker = build_tracker_head(cfg) - self.assertTrue(tracker._video_height == self._img_size[0]) - - def test_initialize_extra_fields(self): - cfg = { - "_target_": "detectron2.tracking.iou_weighted_hungarian_bbox_iou_tracker.IOUWeightedHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - instances = tracker._initialize_extra_fields(self._curr_instances) - self.assertTrue(instances.has("ID")) - self.assertTrue(instances.has("ID_period")) - self.assertTrue(instances.has("lost_frame_count")) - - def test_process_matched_idx(self): - cfg = { - "_target_": "detectron2.tracking.iou_weighted_hungarian_bbox_iou_tracker.IOUWeightedHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - prev_instances = tracker._initialize_extra_fields(self._prev_instances) - tracker._prev_instances = prev_instances - curr_instances = tracker._initialize_extra_fields(self._curr_instances) - matched_idx = np.array([0]) - matched_prev_idx = np.array([1]) - curr_instances = tracker._process_matched_idx(curr_instances, matched_idx, matched_prev_idx) - self.assertTrue(curr_instances.ID[0] == 1) - - def test_process_unmatched_idx(self): - cfg = { - "_target_": "detectron2.tracking.iou_weighted_hungarian_bbox_iou_tracker.IOUWeightedHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - prev_instances = tracker._initialize_extra_fields(self._prev_instances) - tracker._prev_instances = prev_instances - curr_instances = tracker._initialize_extra_fields(self._curr_instances) - matched_idx = np.array([0]) - matched_prev_idx = np.array([1]) - curr_instances = tracker._process_matched_idx(curr_instances, matched_idx, matched_prev_idx) - curr_instances = tracker._process_unmatched_idx(curr_instances, matched_idx) - self.assertTrue(curr_instances.ID[1] == 2) - - def test_process_unmatched_prev_idx(self): - cfg = { - "_target_": "detectron2.tracking.iou_weighted_hungarian_bbox_iou_tracker.IOUWeightedHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - prev_instances = tracker._initialize_extra_fields(self._prev_instances) - prev_instances.ID_period = [3, 3] - tracker._prev_instances = prev_instances - curr_instances = tracker._initialize_extra_fields(self._curr_instances) - matched_idx = np.array([0]) - matched_prev_idx = np.array([1]) - curr_instances = tracker._process_matched_idx(curr_instances, matched_idx, matched_prev_idx) - curr_instances = tracker._process_unmatched_idx(curr_instances, matched_idx) - curr_instances = tracker._process_unmatched_prev_idx(curr_instances, matched_prev_idx) - self.assertTrue(curr_instances.ID[2] == 0) - - def test_assign_cost_matrix_values(self): - cfg = { - "_target_": "detectron2.tracking.iou_weighted_hungarian_bbox_iou_tracker.IOUWeightedHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - pair1 = {"idx": 0, "prev_idx": 1, "IoU": 0.6} - pair2 = {"idx": 1, "prev_idx": 0, "IoU": 0.8} - bbox_pairs = [pair1, pair2] - cost_matrix = np.full((2, 2), np.inf) - target_matrix = copy.deepcopy(cost_matrix) - target_matrix[0, 1] = -0.6 - target_matrix[1, 0] = -0.8 - cost_matrix = tracker.assign_cost_matrix_values(cost_matrix, bbox_pairs) - self.assertTrue(np.allclose(cost_matrix, target_matrix)) - - def test_update(self): - cfg = { - "_target_": "detectron2.tracking.iou_weighted_hungarian_bbox_iou_tracker.IOUWeightedHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - _ = tracker.update(self._prev_instances) - curr_instances = tracker.update(self._curr_instances) - self.assertTrue(curr_instances.ID[0] == 1) - self.assertTrue(curr_instances.ID[1] == 0) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/tracking/test_vanilla_hungarian_bbox_iou_tracker.py b/detectron2/tests/tracking/test_vanilla_hungarian_bbox_iou_tracker.py deleted file mode 100644 index c33e3d971583c52e29284ab9538e4a2ba4e5d8d5..0000000000000000000000000000000000000000 --- a/detectron2/tests/tracking/test_vanilla_hungarian_bbox_iou_tracker.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -import copy -import numpy as np -import unittest -from typing import Dict -import torch - -from detectron2.config import CfgNode as CfgNode_ -from detectron2.config import instantiate -from detectron2.structures import Boxes, Instances -from detectron2.tracking.base_tracker import build_tracker_head -from detectron2.tracking.vanilla_hungarian_bbox_iou_tracker import ( # noqa - VanillaHungarianBBoxIOUTracker, -) - - -class TestVanillaHungarianBBoxIOUTracker(unittest.TestCase): - def setUp(self): - self._img_size = np.array([600, 800]) - self._prev_boxes = np.array( - [ - [101, 101, 200, 200], - [301, 301, 450, 450], - ] - ).astype(np.float32) - self._prev_scores = np.array([0.9, 0.9]) - self._prev_classes = np.array([1, 1]) - self._prev_masks = np.ones((2, 600, 800)).astype("uint8") - self._curr_boxes = np.array( - [ - [302, 303, 451, 452], - [101, 102, 201, 203], - ] - ).astype(np.float32) - self._curr_scores = np.array([0.95, 0.85]) - self._curr_classes = np.array([1, 1]) - self._curr_masks = np.ones((2, 600, 800)).astype("uint8") - - self._prev_instances = { - "image_size": self._img_size, - "pred_boxes": self._prev_boxes, - "scores": self._prev_scores, - "pred_classes": self._prev_classes, - "pred_masks": self._prev_masks, - } - self._prev_instances = self._convertDictPredictionToInstance(self._prev_instances) - self._curr_instances = { - "image_size": self._img_size, - "pred_boxes": self._curr_boxes, - "scores": self._curr_scores, - "pred_classes": self._curr_classes, - "pred_masks": self._curr_masks, - } - self._curr_instances = self._convertDictPredictionToInstance(self._curr_instances) - - self._max_num_instances = 10 - self._max_lost_frame_count = 3 - self._min_box_rel_dim = 0.02 - self._min_instance_period = 1 - self._track_iou_threshold = 0.5 - - def _convertDictPredictionToInstance(self, prediction: Dict) -> Instances: - """ - convert prediction from Dict to D2 Instances format - """ - res = Instances( - image_size=torch.IntTensor(prediction["image_size"]), - pred_boxes=Boxes(torch.FloatTensor(prediction["pred_boxes"])), - pred_masks=torch.IntTensor(prediction["pred_masks"]), - pred_classes=torch.IntTensor(prediction["pred_classes"]), - scores=torch.FloatTensor(prediction["scores"]), - ) - return res - - def test_init(self): - cfg = { - "_target_": "detectron2.tracking.vanilla_hungarian_bbox_iou_tracker.VanillaHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - self.assertTrue(tracker._video_height == self._img_size[0]) - - def test_from_config(self): - cfg = CfgNode_() - cfg.TRACKER_HEADS = CfgNode_() - cfg.TRACKER_HEADS.TRACKER_NAME = "VanillaHungarianBBoxIOUTracker" - cfg.TRACKER_HEADS.VIDEO_HEIGHT = int(self._img_size[0]) - cfg.TRACKER_HEADS.VIDEO_WIDTH = int(self._img_size[1]) - cfg.TRACKER_HEADS.MAX_NUM_INSTANCES = self._max_num_instances - cfg.TRACKER_HEADS.MAX_LOST_FRAME_COUNT = self._max_lost_frame_count - cfg.TRACKER_HEADS.MIN_BOX_REL_DIM = self._min_box_rel_dim - cfg.TRACKER_HEADS.MIN_INSTANCE_PERIOD = self._min_instance_period - cfg.TRACKER_HEADS.TRACK_IOU_THRESHOLD = self._track_iou_threshold - tracker = build_tracker_head(cfg) - self.assertTrue(tracker._video_height == self._img_size[0]) - - def test_initialize_extra_fields(self): - cfg = { - "_target_": "detectron2.tracking.vanilla_hungarian_bbox_iou_tracker.VanillaHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - instances = tracker._initialize_extra_fields(self._curr_instances) - self.assertTrue(instances.has("ID")) - self.assertTrue(instances.has("ID_period")) - self.assertTrue(instances.has("lost_frame_count")) - - def test_process_matched_idx(self): - cfg = { - "_target_": "detectron2.tracking.vanilla_hungarian_bbox_iou_tracker.VanillaHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - prev_instances = tracker._initialize_extra_fields(self._prev_instances) - tracker._prev_instances = prev_instances - curr_instances = tracker._initialize_extra_fields(self._curr_instances) - matched_idx = np.array([0]) - matched_prev_idx = np.array([1]) - curr_instances = tracker._process_matched_idx(curr_instances, matched_idx, matched_prev_idx) - self.assertTrue(curr_instances.ID[0] == 1) - - def test_process_unmatched_idx(self): - cfg = { - "_target_": "detectron2.tracking.vanilla_hungarian_bbox_iou_tracker.VanillaHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - prev_instances = tracker._initialize_extra_fields(self._prev_instances) - tracker._prev_instances = prev_instances - curr_instances = tracker._initialize_extra_fields(self._curr_instances) - matched_idx = np.array([0]) - matched_prev_idx = np.array([1]) - curr_instances = tracker._process_matched_idx(curr_instances, matched_idx, matched_prev_idx) - curr_instances = tracker._process_unmatched_idx(curr_instances, matched_idx) - self.assertTrue(curr_instances.ID[1] == 2) - - def test_process_unmatched_prev_idx(self): - cfg = { - "_target_": "detectron2.tracking.vanilla_hungarian_bbox_iou_tracker.VanillaHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - prev_instances = tracker._initialize_extra_fields(self._prev_instances) - prev_instances.ID_period = [3, 3] - tracker._prev_instances = prev_instances - curr_instances = tracker._initialize_extra_fields(self._curr_instances) - matched_idx = np.array([0]) - matched_prev_idx = np.array([1]) - curr_instances = tracker._process_matched_idx(curr_instances, matched_idx, matched_prev_idx) - curr_instances = tracker._process_unmatched_idx(curr_instances, matched_idx) - curr_instances = tracker._process_unmatched_prev_idx(curr_instances, matched_prev_idx) - self.assertTrue(curr_instances.ID[2] == 0) - - def test_assign_cost_matrix_values(self): - cfg = { - "_target_": "detectron2.tracking.vanilla_hungarian_bbox_iou_tracker.VanillaHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - pair1 = {"idx": 0, "prev_idx": 1} - pair2 = {"idx": 1, "prev_idx": 0} - bbox_pairs = [pair1, pair2] - cost_matrix = np.full((2, 2), np.inf) - target_matrix = copy.deepcopy(cost_matrix) - target_matrix[0, 1] = -1 - target_matrix[1, 0] = -1 - cost_matrix = tracker.assign_cost_matrix_values(cost_matrix, bbox_pairs) - self.assertTrue(np.allclose(cost_matrix, target_matrix)) - - def test_update(self): - cfg = { - "_target_": "detectron2.tracking.vanilla_hungarian_bbox_iou_tracker.VanillaHungarianBBoxIOUTracker", # noqa - "video_height": self._img_size[0], - "video_width": self._img_size[1], - "max_num_instances": self._max_num_instances, - "max_lost_frame_count": self._max_lost_frame_count, - "min_box_rel_dim": self._min_box_rel_dim, - "min_instance_period": self._min_instance_period, - "track_iou_threshold": self._track_iou_threshold, - } - tracker = instantiate(cfg) - _ = tracker.update(self._prev_instances) - curr_instances = tracker.update(self._curr_instances) - self.assertTrue(curr_instances.ID[0] == 1) - self.assertTrue(curr_instances.ID[1] == 0) - - -if __name__ == "__main__": - unittest.main() diff --git a/detectron2/tests/utils/test_tensorboardx.py b/detectron2/tests/utils/test_tensorboardx.py deleted file mode 100644 index 885fb8d3576ff598b988427137c421bc17a41aaf..0000000000000000000000000000000000000000 --- a/detectron2/tests/utils/test_tensorboardx.py +++ /dev/null @@ -1,23 +0,0 @@ -import os -import tempfile -import unittest - -from detectron2.utils.events import TensorboardXWriter - - -# TODO Fix up capitalization -class TestTensorboardXWriter(unittest.TestCase): - def test_no_files_created(self) -> None: - with tempfile.TemporaryDirectory() as tmp_dir: - writer = TensorboardXWriter(tmp_dir) - writer.close() - - self.assertFalse(os.listdir(tmp_dir)) - - def test_single_write(self) -> None: - with tempfile.TemporaryDirectory() as tmp_dir: - writer = TensorboardXWriter(tmp_dir) - writer._writer.add_scalar("testing", 1, 1) - writer.close() - - self.assertTrue(os.listdir(tmp_dir)) diff --git a/detectron2/tools/README.md b/detectron2/tools/README.md deleted file mode 100644 index 0b40d5319c0838fdaa22bc6a10ef0d88bc6578ed..0000000000000000000000000000000000000000 --- a/detectron2/tools/README.md +++ /dev/null @@ -1,49 +0,0 @@ - -This directory contains a few example scripts that demonstrate features of detectron2. - - -* `train_net.py` - -An example training script that's made to train builtin models of detectron2. - -For usage, see [GETTING_STARTED.md](../GETTING_STARTED.md). - -* `plain_train_net.py` - -Similar to `train_net.py`, but implements a training loop instead of using `Trainer`. -This script includes fewer features but it may be more friendly to hackers. - -* `benchmark.py` - -Benchmark the training speed, inference speed or data loading speed of a given config. - -Usage: -``` -python benchmark.py --config-file config.yaml --task train/eval/data [optional DDP flags] -``` - -* `analyze_model.py` - -Analyze FLOPs, parameters, activations of a detectron2 model. See its `--help` for usage. - -* `visualize_json_results.py` - -Visualize the json instance detection/segmentation results dumped by `COCOEvalutor` or `LVISEvaluator` - -Usage: -``` -python visualize_json_results.py --input x.json --output dir/ --dataset coco_2017_val -``` -If not using a builtin dataset, you'll need your own script or modify this script. - -* `visualize_data.py` - -Visualize ground truth raw annotations or training data (after preprocessing/augmentations). - -Usage: -``` -python visualize_data.py --config-file config.yaml --source annotation/dataloader --output-dir dir/ [--show] -``` - -NOTE: the script does not stop by itself when using `--source dataloader` because a training -dataloader is usually infinite. diff --git a/detectron2/tools/__init__.py b/detectron2/tools/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/detectron2/tools/analyze_model.py b/detectron2/tools/analyze_model.py deleted file mode 100644 index ba177d7cd97050e413f12a70e85c4fb760ff8029..0000000000000000000000000000000000000000 --- a/detectron2/tools/analyze_model.py +++ /dev/null @@ -1,164 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. - -import logging -import numpy as np -from collections import Counter -import tqdm -from fvcore.nn import flop_count_table # can also try flop_count_str - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import CfgNode, LazyConfig, get_cfg, instantiate -from detectron2.data import build_detection_test_loader -from detectron2.engine import default_argument_parser -from detectron2.modeling import build_model -from detectron2.utils.analysis import ( - FlopCountAnalysis, - activation_count_operators, - parameter_count_table, -) -from detectron2.utils.logger import setup_logger - -logger = logging.getLogger("detectron2") - - -def setup(args): - if args.config_file.endswith(".yaml"): - cfg = get_cfg() - cfg.merge_from_file(args.config_file) - cfg.DATALOADER.NUM_WORKERS = 0 - cfg.merge_from_list(args.opts) - cfg.freeze() - else: - cfg = LazyConfig.load(args.config_file) - cfg = LazyConfig.apply_overrides(cfg, args.opts) - setup_logger(name="fvcore") - setup_logger() - return cfg - - -def do_flop(cfg): - if isinstance(cfg, CfgNode): - data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) - model = build_model(cfg) - DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) - else: - data_loader = instantiate(cfg.dataloader.test) - model = instantiate(cfg.model) - model.to(cfg.train.device) - DetectionCheckpointer(model).load(cfg.train.init_checkpoint) - model.eval() - - counts = Counter() - total_flops = [] - for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa - flops = FlopCountAnalysis(model, data) - if idx > 0: - flops.unsupported_ops_warnings(False).uncalled_modules_warnings(False) - counts += flops.by_operator() - total_flops.append(flops.total()) - - logger.info("Flops table computed from only one input sample:\n" + flop_count_table(flops)) - logger.info( - "Average GFlops for each type of operators:\n" - + str([(k, v / (idx + 1) / 1e9) for k, v in counts.items()]) - ) - logger.info( - "Total GFlops: {:.1f}Β±{:.1f}".format(np.mean(total_flops) / 1e9, np.std(total_flops) / 1e9) - ) - - -def do_activation(cfg): - if isinstance(cfg, CfgNode): - data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) - model = build_model(cfg) - DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) - else: - data_loader = instantiate(cfg.dataloader.test) - model = instantiate(cfg.model) - model.to(cfg.train.device) - DetectionCheckpointer(model).load(cfg.train.init_checkpoint) - model.eval() - - counts = Counter() - total_activations = [] - for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa - count = activation_count_operators(model, data) - counts += count - total_activations.append(sum(count.values())) - logger.info( - "(Million) Activations for Each Type of Operators:\n" - + str([(k, v / idx) for k, v in counts.items()]) - ) - logger.info( - "Total (Million) Activations: {}Β±{}".format( - np.mean(total_activations), np.std(total_activations) - ) - ) - - -def do_parameter(cfg): - if isinstance(cfg, CfgNode): - model = build_model(cfg) - else: - model = instantiate(cfg.model) - logger.info("Parameter Count:\n" + parameter_count_table(model, max_depth=5)) - - -def do_structure(cfg): - if isinstance(cfg, CfgNode): - model = build_model(cfg) - else: - model = instantiate(cfg.model) - logger.info("Model Structure:\n" + str(model)) - - -def main() -> None: - global cfg, args - parser = default_argument_parser( - epilog=""" -Examples: - -To show parameters of a model: -$ ./analyze_model.py --tasks parameter \\ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml - -Flops and activations are data-dependent, therefore inputs and model weights -are needed to count them: - -$ ./analyze_model.py --num-inputs 100 --tasks flop \\ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \\ - MODEL.WEIGHTS /path/to/model.pkl -""" - ) - parser.add_argument( - "--tasks", - choices=["flop", "activation", "parameter", "structure"], - required=True, - nargs="+", - ) - parser.add_argument( - "-n", - "--num-inputs", - default=100, - type=int, - help="number of inputs used to compute statistics for flops/activations, " - "both are data dependent.", - ) - args = parser.parse_args() - assert not args.eval_only - assert args.num_gpus == 1 - - cfg = setup(args) - - for task in args.tasks: - { - "flop": do_flop, - "activation": do_activation, - "parameter": do_parameter, - "structure": do_structure, - }[task](cfg) - - -if __name__ == "__main__": - main() # pragma: no cover diff --git a/detectron2/tools/benchmark.py b/detectron2/tools/benchmark.py deleted file mode 100644 index c80c8ca76fe24fc1d424dd9d02453d1e0fefaeec..0000000000000000000000000000000000000000 --- a/detectron2/tools/benchmark.py +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. -""" -A script to benchmark builtin models. - -Note: this script has an extra dependency of psutil. -""" - -import itertools -import logging -import psutil -import torch -import tqdm -from fvcore.common.timer import Timer -from torch.nn.parallel import DistributedDataParallel - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import LazyConfig, get_cfg, instantiate -from detectron2.data import ( - DatasetFromList, - build_detection_test_loader, - build_detection_train_loader, -) -from detectron2.data.benchmark import DataLoaderBenchmark -from detectron2.engine import AMPTrainer, SimpleTrainer, default_argument_parser, hooks, launch -from detectron2.modeling import build_model -from detectron2.solver import build_optimizer -from detectron2.utils import comm -from detectron2.utils.collect_env import collect_env_info -from detectron2.utils.events import CommonMetricPrinter -from detectron2.utils.logger import setup_logger - -logger = logging.getLogger("detectron2") - - -def setup(args): - if args.config_file.endswith(".yaml"): - cfg = get_cfg() - cfg.merge_from_file(args.config_file) - cfg.SOLVER.BASE_LR = 0.001 # Avoid NaNs. Not useful in this script anyway. - cfg.merge_from_list(args.opts) - cfg.freeze() - else: - cfg = LazyConfig.load(args.config_file) - cfg = LazyConfig.apply_overrides(cfg, args.opts) - setup_logger(distributed_rank=comm.get_rank()) - return cfg - - -def create_data_benchmark(cfg, args): - if args.config_file.endswith(".py"): - dl_cfg = cfg.dataloader.train - dl_cfg._target_ = DataLoaderBenchmark - return instantiate(dl_cfg) - else: - kwargs = build_detection_train_loader.from_config(cfg) - kwargs.pop("aspect_ratio_grouping", None) - kwargs["_target_"] = DataLoaderBenchmark - return instantiate(kwargs) - - -def RAM_msg(): - vram = psutil.virtual_memory() - return "RAM Usage: {:.2f}/{:.2f} GB".format( - (vram.total - vram.available) / 1024**3, vram.total / 1024**3 - ) - - -def benchmark_data(args): - cfg = setup(args) - logger.info("After spawning " + RAM_msg()) - - benchmark = create_data_benchmark(cfg, args) - benchmark.benchmark_distributed(250, 10) - # test for a few more rounds - for k in range(10): - logger.info(f"Iteration {k} " + RAM_msg()) - benchmark.benchmark_distributed(250, 1) - - -def benchmark_data_advanced(args): - # benchmark dataloader with more details to help analyze performance bottleneck - cfg = setup(args) - benchmark = create_data_benchmark(cfg, args) - - if comm.get_rank() == 0: - benchmark.benchmark_dataset(100) - benchmark.benchmark_mapper(100) - benchmark.benchmark_workers(100, warmup=10) - benchmark.benchmark_IPC(100, warmup=10) - if comm.get_world_size() > 1: - benchmark.benchmark_distributed(100) - logger.info("Rerun ...") - benchmark.benchmark_distributed(100) - - -def benchmark_train(args): - cfg = setup(args) - model = build_model(cfg) - logger.info("Model:\n{}".format(model)) - if comm.get_world_size() > 1: - model = DistributedDataParallel( - model, device_ids=[comm.get_local_rank()], broadcast_buffers=False - ) - optimizer = build_optimizer(cfg, model) - checkpointer = DetectionCheckpointer(model, optimizer=optimizer) - checkpointer.load(cfg.MODEL.WEIGHTS) - - cfg.defrost() - cfg.DATALOADER.NUM_WORKERS = 2 - data_loader = build_detection_train_loader(cfg) - dummy_data = list(itertools.islice(data_loader, 100)) - - def f(): - data = DatasetFromList(dummy_data, copy=False, serialize=False) - while True: - yield from data - - max_iter = 400 - trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(model, f(), optimizer) - trainer.register_hooks( - [ - hooks.IterationTimer(), - hooks.PeriodicWriter([CommonMetricPrinter(max_iter)]), - hooks.TorchProfiler( - lambda trainer: trainer.iter == max_iter - 1, - cfg.OUTPUT_DIR, - save_tensorboard=True, - ), - ] - ) - trainer.train(1, max_iter) - - -@torch.no_grad() -def benchmark_eval(args): - cfg = setup(args) - if args.config_file.endswith(".yaml"): - model = build_model(cfg) - DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) - - cfg.defrost() - cfg.DATALOADER.NUM_WORKERS = 0 - data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) - else: - model = instantiate(cfg.model) - model.to(cfg.train.device) - DetectionCheckpointer(model).load(cfg.train.init_checkpoint) - - cfg.dataloader.num_workers = 0 - data_loader = instantiate(cfg.dataloader.test) - - model.eval() - logger.info("Model:\n{}".format(model)) - dummy_data = DatasetFromList(list(itertools.islice(data_loader, 100)), copy=False) - - def f(): - while True: - yield from dummy_data - - for k in range(5): # warmup - model(dummy_data[k]) - - max_iter = 300 - timer = Timer() - with tqdm.tqdm(total=max_iter) as pbar: - for idx, d in enumerate(f()): - if idx == max_iter: - break - model(d) - pbar.update() - logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds())) - - -def main() -> None: - parser = default_argument_parser() - parser.add_argument("--task", choices=["train", "eval", "data", "data_advanced"], required=True) - args = parser.parse_args() - assert not args.eval_only - - logger.info("Environment info:\n" + collect_env_info()) - if "data" in args.task: - print("Initial " + RAM_msg()) - if args.task == "data": - f = benchmark_data - if args.task == "data_advanced": - f = benchmark_data_advanced - elif args.task == "train": - """ - Note: training speed may not be representative. - The training cost of a R-CNN model varies with the content of the data - and the quality of the model. - """ - f = benchmark_train - elif args.task == "eval": - f = benchmark_eval - # only benchmark single-GPU inference. - assert args.num_gpus == 1 and args.num_machines == 1 - launch( - f, - args.num_gpus, - args.num_machines, - args.machine_rank, - args.dist_url, - args=(args,), - ) - - -if __name__ == "__main__": - main() # pragma: no cover diff --git a/detectron2/tools/convert-torchvision-to-d2.py b/detectron2/tools/convert-torchvision-to-d2.py deleted file mode 100644 index 4b827d960cca69657e98bd89a9aa5623a847099d..0000000000000000000000000000000000000000 --- a/detectron2/tools/convert-torchvision-to-d2.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. - -import pickle as pkl -import sys -import torch - -""" -Usage: - # download one of the ResNet{18,34,50,101,152} models from torchvision: - wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O r50.pth - # run the conversion - ./convert-torchvision-to-d2.py r50.pth r50.pkl - - # Then, use r50.pkl with the following changes in config: - -MODEL: - WEIGHTS: "/path/to/r50.pkl" - PIXEL_MEAN: [123.675, 116.280, 103.530] - PIXEL_STD: [58.395, 57.120, 57.375] - RESNETS: - DEPTH: 50 - STRIDE_IN_1X1: False -INPUT: - FORMAT: "RGB" - - These models typically produce slightly worse results than the - pre-trained ResNets we use in official configs, which are the - original ResNet models released by MSRA. -""" - -if __name__ == "__main__": - input = sys.argv[1] - - obj = torch.load(input, map_location="cpu") - - newmodel = {} - for k in list(obj.keys()): - old_k = k - if "layer" not in k: - k = "stem." + k - for t in [1, 2, 3, 4]: - k = k.replace("layer{}".format(t), "res{}".format(t + 1)) - for t in [1, 2, 3]: - k = k.replace("bn{}".format(t), "conv{}.norm".format(t)) - k = k.replace("downsample.0", "shortcut") - k = k.replace("downsample.1", "shortcut.norm") - print(old_k, "->", k) - newmodel[k] = obj.pop(old_k).detach().numpy() - - res = {"model": newmodel, "__author__": "torchvision", "matching_heuristics": True} - - with open(sys.argv[2], "wb") as f: - pkl.dump(res, f) - if obj: - print("Unconverted keys:", obj.keys()) diff --git a/detectron2/tools/deploy/CMakeLists.txt b/detectron2/tools/deploy/CMakeLists.txt deleted file mode 100644 index 80dae12500af4c7e7e6cfc5b7b3a5800782956c3..0000000000000000000000000000000000000000 --- a/detectron2/tools/deploy/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# See https://pytorch.org/tutorials/advanced/cpp_frontend.html -cmake_minimum_required(VERSION 3.12 FATAL_ERROR) -project(torchscript_mask_rcnn) - -find_package(Torch REQUIRED) -find_package(OpenCV REQUIRED) -find_package(TorchVision REQUIRED) # needed by export-method=tracing/scripting - -add_executable(torchscript_mask_rcnn torchscript_mask_rcnn.cpp) -target_link_libraries( - torchscript_mask_rcnn - -Wl,--no-as-needed TorchVision::TorchVision -Wl,--as-needed - "${TORCH_LIBRARIES}" ${OpenCV_LIBS}) -set_property(TARGET torchscript_mask_rcnn PROPERTY CXX_STANDARD 14) diff --git a/detectron2/tools/deploy/README.md b/detectron2/tools/deploy/README.md deleted file mode 100644 index e33cbeb54c003a5738da68c838fdaa4e0d218501..0000000000000000000000000000000000000000 --- a/detectron2/tools/deploy/README.md +++ /dev/null @@ -1,66 +0,0 @@ -See [deployment tutorial](https://detectron2.readthedocs.io/tutorials/deployment.html) -for some high-level background about deployment. - -This directory contains the following examples: - -1. An example script `export_model.py` - that exports a detectron2 model for deployment using different methods and formats. - -2. A C++ example that runs inference with Mask R-CNN model in TorchScript format. - -## Build -Deployment depends on libtorch and OpenCV. Some require more dependencies: - -* Running TorchScript-format models produced by `--export-method=caffe2_tracing` requires libtorch - to be built with caffe2 enabled. -* Running TorchScript-format models produced by `--export-method=tracing/scripting` requires libtorchvision (C++ library of torchvision). - -All methods are supported in one C++ file that requires all the above dependencies. -Adjust it and remove code you don't need. -As a reference, we provide a [Dockerfile](../../docker/deploy.Dockerfile) that installs all the above dependencies and builds the C++ example. - -## Use - -We show a few example commands to export and execute a Mask R-CNN model in C++. - -* `export-method=tracing, format=torchscript`: -``` -./export_model.py --config-file ../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \ - --output ./output --export-method tracing --format torchscript \ - MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl \ - MODEL.DEVICE cuda - -./build/torchscript_mask_rcnn output/model.ts input.jpg tracing -``` - -* `export-method=scripting, format=torchscript`: -``` -./export_model.py --config-file ../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \ - --output ./output --export-method scripting --format torchscript \ - MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl \ - -./build/torchscript_mask_rcnn output/model.ts input.jpg scripting -``` - -* `export-method=caffe2_tracing, format=torchscript`: - -``` -./export_model.py --config-file ../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \ - --output ./output --export-method caffe2_tracing --format torchscript \ - MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl \ - -./build/torchscript_mask_rcnn output/model.ts input.jpg caffe2_tracing -``` - - -## Notes: - -1. Tracing/Caffe2-tracing requires valid weights & sample inputs. - Therefore the above commands require pre-trained models and [COCO dataset](https://detectron2.readthedocs.io/tutorials/builtin_datasets.html). - You can modify the script to obtain sample inputs in other ways instead of from COCO. - -2. `--run-eval` is implemented only for tracing mode - to evaluate the exported model using the dataset in the config. - It's recommended to always verify the accuracy in case the conversion is not successful. - Evaluation can be slow if model is exported to CPU or dataset is too large ("coco_2017_val_100" is a small subset of COCO useful for evaluation). - `caffe2_tracing` accuracy may be slightly different (within 0.1 AP) from original model due to numerical precisions between different runtime. diff --git a/detectron2/tools/deploy/export_model.py b/detectron2/tools/deploy/export_model.py deleted file mode 100644 index 560143d77716315f5ebd541007332c0b52f4b3f1..0000000000000000000000000000000000000000 --- a/detectron2/tools/deploy/export_model.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. -import argparse -import os -from typing import Dict, List, Tuple -import torch -from torch import Tensor, nn - -import detectron2.data.transforms as T -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import build_detection_test_loader, detection_utils -from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format -from detectron2.export import ( - STABLE_ONNX_OPSET_VERSION, - TracingAdapter, - dump_torchscript_IR, - scripting_with_instances, -) -from detectron2.modeling import GeneralizedRCNN, RetinaNet, build_model -from detectron2.modeling.postprocessing import detector_postprocess -from detectron2.projects.point_rend import add_pointrend_config -from detectron2.structures import Boxes -from detectron2.utils.env import TORCH_VERSION -from detectron2.utils.file_io import PathManager -from detectron2.utils.logger import setup_logger - - -def setup_cfg(args): - cfg = get_cfg() - # cuda context is initialized before creating dataloader, so we don't fork anymore - cfg.DATALOADER.NUM_WORKERS = 0 - add_pointrend_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - return cfg - - -def export_caffe2_tracing(cfg, torch_model, inputs): - from detectron2.export import Caffe2Tracer - - tracer = Caffe2Tracer(cfg, torch_model, inputs) - if args.format == "caffe2": - caffe2_model = tracer.export_caffe2() - caffe2_model.save_protobuf(args.output) - # draw the caffe2 graph - caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=inputs) - return caffe2_model - elif args.format == "onnx": - import onnx - - onnx_model = tracer.export_onnx() - onnx.save(onnx_model, os.path.join(args.output, "model.onnx")) - elif args.format == "torchscript": - ts_model = tracer.export_torchscript() - with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f: - torch.jit.save(ts_model, f) - dump_torchscript_IR(ts_model, args.output) - - -# experimental. API not yet final -def export_scripting(torch_model): - assert TORCH_VERSION >= (1, 8) - fields = { - "proposal_boxes": Boxes, - "objectness_logits": Tensor, - "pred_boxes": Boxes, - "scores": Tensor, - "pred_classes": Tensor, - "pred_masks": Tensor, - "pred_keypoints": torch.Tensor, - "pred_keypoint_heatmaps": torch.Tensor, - } - assert args.format == "torchscript", "Scripting only supports torchscript format." - - class ScriptableAdapterBase(nn.Module): - # Use this adapter to workaround https://github.com/pytorch/pytorch/issues/46944 - # by not retuning instances but dicts. Otherwise the exported model is not deployable - def __init__(self): - super().__init__() - self.model = torch_model - self.eval() - - if isinstance(torch_model, GeneralizedRCNN): - - class ScriptableAdapter(ScriptableAdapterBase): - def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]: - instances = self.model.inference(inputs, do_postprocess=False) - return [i.get_fields() for i in instances] - - else: - - class ScriptableAdapter(ScriptableAdapterBase): - def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]: - instances = self.model(inputs) - return [i.get_fields() for i in instances] - - ts_model = scripting_with_instances(ScriptableAdapter(), fields) - with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f: - torch.jit.save(ts_model, f) - dump_torchscript_IR(ts_model, args.output) - # TODO inference in Python now missing postprocessing glue code - return None - - -# experimental. API not yet final -def export_tracing(torch_model, inputs): - assert TORCH_VERSION >= (1, 8) - image = inputs[0]["image"] - inputs = [{"image": image}] # remove other unused keys - - if isinstance(torch_model, GeneralizedRCNN): - - def inference(model, inputs): - # use do_postprocess=False so it returns ROI mask - inst = model.inference(inputs, do_postprocess=False)[0] - return [{"instances": inst}] - - else: - inference = None # assume that we just call the model directly - - traceable_model = TracingAdapter(torch_model, inputs, inference) - - if args.format == "torchscript": - ts_model = torch.jit.trace(traceable_model, (image,)) - with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f: - torch.jit.save(ts_model, f) - dump_torchscript_IR(ts_model, args.output) - elif args.format == "onnx": - with PathManager.open(os.path.join(args.output, "model.onnx"), "wb") as f: - torch.onnx.export(traceable_model, (image,), f, opset_version=STABLE_ONNX_OPSET_VERSION) - logger.info("Inputs schema: " + str(traceable_model.inputs_schema)) - logger.info("Outputs schema: " + str(traceable_model.outputs_schema)) - - if args.format != "torchscript": - return None - if not isinstance(torch_model, (GeneralizedRCNN, RetinaNet)): - return None - - def eval_wrapper(inputs): - """ - The exported model does not contain the final resize step, which is typically - unused in deployment but needed for evaluation. We add it manually here. - """ - input = inputs[0] - instances = traceable_model.outputs_schema(ts_model(input["image"]))[0]["instances"] - postprocessed = detector_postprocess(instances, input["height"], input["width"]) - return [{"instances": postprocessed}] - - return eval_wrapper - - -def get_sample_inputs(args): - - if args.sample_image is None: - # get a first batch from dataset - data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) - first_batch = next(iter(data_loader)) - return first_batch - else: - # get a sample data - original_image = detection_utils.read_image(args.sample_image, format=cfg.INPUT.FORMAT) - # Do same preprocessing as DefaultPredictor - aug = T.ResizeShortestEdge( - [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST - ) - height, width = original_image.shape[:2] - image = aug.get_transform(original_image).apply_image(original_image) - image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) - - inputs = {"image": image, "height": height, "width": width} - - # Sample ready - sample_inputs = [inputs] - return sample_inputs - - -def main() -> None: - global logger, cfg, args - parser = argparse.ArgumentParser(description="Export a model for deployment.") - parser.add_argument( - "--format", - choices=["caffe2", "onnx", "torchscript"], - help="output format", - default="torchscript", - ) - parser.add_argument( - "--export-method", - choices=["caffe2_tracing", "tracing", "scripting"], - help="Method to export models", - default="tracing", - ) - parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file") - parser.add_argument("--sample-image", default=None, type=str, help="sample image for input") - parser.add_argument("--run-eval", action="store_true") - parser.add_argument("--output", help="output directory for the converted model") - parser.add_argument( - "opts", - help="Modify config options using the command-line", - default=None, - nargs=argparse.REMAINDER, - ) - args = parser.parse_args() - logger = setup_logger() - logger.info("Command line arguments: " + str(args)) - PathManager.mkdirs(args.output) - # Disable re-specialization on new shapes. Otherwise --run-eval will be slow - torch._C._jit_set_bailout_depth(1) - - cfg = setup_cfg(args) - - # create a torch model - torch_model = build_model(cfg) - DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS) - torch_model.eval() - - # convert and save model - if args.export_method == "caffe2_tracing": - sample_inputs = get_sample_inputs(args) - exported_model = export_caffe2_tracing(cfg, torch_model, sample_inputs) - elif args.export_method == "scripting": - exported_model = export_scripting(torch_model) - elif args.export_method == "tracing": - sample_inputs = get_sample_inputs(args) - exported_model = export_tracing(torch_model, sample_inputs) - - # run evaluation with the converted model - if args.run_eval: - assert exported_model is not None, ( - "Python inference is not yet implemented for " - f"export_method={args.export_method}, format={args.format}." - ) - logger.info("Running evaluation ... this takes a long time if you export to CPU.") - dataset = cfg.DATASETS.TEST[0] - data_loader = build_detection_test_loader(cfg, dataset) - # NOTE: hard-coded evaluator. change to the evaluator for your dataset - evaluator = COCOEvaluator(dataset, output_dir=args.output) - metrics = inference_on_dataset(exported_model, data_loader, evaluator) - print_csv_format(metrics) - logger.info("Success.") - - -if __name__ == "__main__": - main() # pragma: no cover diff --git a/detectron2/tools/deploy/torchscript_mask_rcnn.cpp b/detectron2/tools/deploy/torchscript_mask_rcnn.cpp deleted file mode 100644 index fd6e1e9f82652a1d4d221447cd140ab675f312b2..0000000000000000000000000000000000000000 --- a/detectron2/tools/deploy/torchscript_mask_rcnn.cpp +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// @lint-ignore-every CLANGTIDY -// This is an example code that demonstrates how to run inference -// with a torchscript format Mask R-CNN model exported by ./export_model.py -// using export method=tracing, caffe2_tracing & scripting. - -#include -#include -#include - -#include -#include -#include -#include - -// only needed for export_method=tracing -#include // @oss-only -// @fb-only: #include - -using namespace std; - -c10::IValue get_caffe2_tracing_inputs(cv::Mat& img, c10::Device device) { - const int height = img.rows; - const int width = img.cols; - // FPN models require divisibility of 32. - // Tracing mode does padding inside the graph, but caffe2_tracing does not. - assert(height % 32 == 0 && width % 32 == 0); - const int channels = 3; - - auto input = - torch::from_blob(img.data, {1, height, width, channels}, torch::kUInt8); - // NHWC to NCHW - input = input.to(device, torch::kFloat).permute({0, 3, 1, 2}).contiguous(); - - std::array im_info_data{height * 1.0f, width * 1.0f, 1.0f}; - auto im_info = - torch::from_blob(im_info_data.data(), {1, 3}).clone().to(device); - return std::make_tuple(input, im_info); -} - -c10::IValue get_tracing_inputs(cv::Mat& img, c10::Device device) { - const int height = img.rows; - const int width = img.cols; - const int channels = 3; - - auto input = - torch::from_blob(img.data, {height, width, channels}, torch::kUInt8); - // HWC to CHW - input = input.to(device, torch::kFloat).permute({2, 0, 1}).contiguous(); - return input; -} - -// create a Tuple[Dict[str, Tensor]] which is the input type of scripted model -c10::IValue get_scripting_inputs(cv::Mat& img, c10::Device device) { - const int height = img.rows; - const int width = img.cols; - const int channels = 3; - - auto img_tensor = - torch::from_blob(img.data, {height, width, channels}, torch::kUInt8); - // HWC to CHW - img_tensor = - img_tensor.to(device, torch::kFloat).permute({2, 0, 1}).contiguous(); - auto dic = c10::Dict(); - dic.insert("image", img_tensor); - return std::make_tuple(dic); -} - -c10::IValue -get_inputs(std::string export_method, cv::Mat& img, c10::Device device) { - // Given an image, create inputs in the format required by the model. - if (export_method == "tracing") - return get_tracing_inputs(img, device); - if (export_method == "caffe2_tracing") - return get_caffe2_tracing_inputs(img, device); - if (export_method == "scripting") - return get_scripting_inputs(img, device); - abort(); -} - -struct MaskRCNNOutputs { - at::Tensor pred_boxes, pred_classes, pred_masks, scores; - int num_instances() const { - return pred_boxes.sizes()[0]; - } -}; - -MaskRCNNOutputs get_outputs(std::string export_method, c10::IValue outputs) { - // Given outputs of the model, extract tensors from it to turn into a - // common MaskRCNNOutputs format. - if (export_method == "tracing") { - auto out_tuple = outputs.toTuple()->elements(); - // They are ordered alphabetically by their field name in Instances - return MaskRCNNOutputs{ - out_tuple[0].toTensor(), - out_tuple[1].toTensor(), - out_tuple[2].toTensor(), - out_tuple[3].toTensor()}; - } - if (export_method == "caffe2_tracing") { - auto out_tuple = outputs.toTuple()->elements(); - // A legacy order used by caffe2 models - return MaskRCNNOutputs{ - out_tuple[0].toTensor(), - out_tuple[2].toTensor(), - out_tuple[3].toTensor(), - out_tuple[1].toTensor()}; - } - if (export_method == "scripting") { - // With the ScriptableAdapter defined in export_model.py, the output is - // List[Dict[str, Any]]. - auto out_dict = outputs.toList().get(0).toGenericDict(); - return MaskRCNNOutputs{ - out_dict.at("pred_boxes").toTensor(), - out_dict.at("pred_classes").toTensor(), - out_dict.at("pred_masks").toTensor(), - out_dict.at("scores").toTensor()}; - } - abort(); -} - -int main(int argc, const char* argv[]) { - if (argc != 4) { - cerr << R"xx( -Usage: - ./torchscript_mask_rcnn model.ts input.jpg EXPORT_METHOD - - EXPORT_METHOD can be "tracing", "caffe2_tracing" or "scripting". -)xx"; - return 1; - } - std::string image_file = argv[2]; - std::string export_method = argv[3]; - assert( - export_method == "caffe2_tracing" || export_method == "tracing" || - export_method == "scripting"); - - torch::jit::FusionStrategy strat = {{torch::jit::FusionBehavior::DYNAMIC, 1}}; - torch::jit::setFusionStrategy(strat); - torch::autograd::AutoGradMode guard(false); - auto module = torch::jit::load(argv[1]); - - assert(module.buffers().size() > 0); - // Assume that the entire model is on the same device. - // We just put input to this device. - auto device = (*begin(module.buffers())).device(); - - cv::Mat input_img = cv::imread(image_file, cv::IMREAD_COLOR); - auto inputs = get_inputs(export_method, input_img, device); - - // Run the network - auto output = module.forward({inputs}); - if (device.is_cuda()) - c10::cuda::getCurrentCUDAStream().synchronize(); - - // run 3 more times to benchmark - int N_benchmark = 3, N_warmup = 1; - auto start_time = chrono::high_resolution_clock::now(); - for (int i = 0; i < N_benchmark + N_warmup; ++i) { - if (i == N_warmup) - start_time = chrono::high_resolution_clock::now(); - output = module.forward({inputs}); - if (device.is_cuda()) - c10::cuda::getCurrentCUDAStream().synchronize(); - } - auto end_time = chrono::high_resolution_clock::now(); - auto ms = chrono::duration_cast(end_time - start_time) - .count(); - cout << "Latency (should vary with different inputs): " - << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl; - - // Parse Mask R-CNN outputs - auto rcnn_outputs = get_outputs(export_method, output); - cout << "Number of detected objects: " << rcnn_outputs.num_instances() - << endl; - - cout << "pred_boxes: " << rcnn_outputs.pred_boxes.toString() << " " - << rcnn_outputs.pred_boxes.sizes() << endl; - cout << "scores: " << rcnn_outputs.scores.toString() << " " - << rcnn_outputs.scores.sizes() << endl; - cout << "pred_classes: " << rcnn_outputs.pred_classes.toString() << " " - << rcnn_outputs.pred_classes.sizes() << endl; - cout << "pred_masks: " << rcnn_outputs.pred_masks.toString() << " " - << rcnn_outputs.pred_masks.sizes() << endl; - - cout << rcnn_outputs.pred_boxes << endl; - return 0; -} diff --git a/detectron2/tools/lazyconfig_train_net.py b/detectron2/tools/lazyconfig_train_net.py deleted file mode 100644 index 59ae5c887abd8d4ad3cbddca7461c3ebd19ec424..0000000000000000000000000000000000000000 --- a/detectron2/tools/lazyconfig_train_net.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. -""" -Training script using the new "LazyConfig" python config files. - -This scripts reads a given python config file and runs the training or evaluation. -It can be used to train any models or dataset as long as they can be -instantiated by the recursive construction defined in the given config file. - -Besides lazy construction of models, dataloader, etc., this scripts expects a -few common configuration parameters currently defined in "configs/common/train.py". -To add more complicated training logic, you can easily add other configs -in the config file and implement a new train_net.py to handle them. -""" -import logging - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import LazyConfig, instantiate -from detectron2.engine import ( - AMPTrainer, - SimpleTrainer, - default_argument_parser, - default_setup, - default_writers, - hooks, - launch, -) -from detectron2.engine.defaults import create_ddp_model -from detectron2.evaluation import inference_on_dataset, print_csv_format -from detectron2.utils import comm - -logger = logging.getLogger("detectron2") - - -def do_test(cfg, model): - if "evaluator" in cfg.dataloader: - ret = inference_on_dataset( - model, - instantiate(cfg.dataloader.test), - instantiate(cfg.dataloader.evaluator), - ) - print_csv_format(ret) - return ret - - -def do_train(args, cfg): - """ - Args: - cfg: an object with the following attributes: - model: instantiate to a module - dataloader.{train,test}: instantiate to dataloaders - dataloader.evaluator: instantiate to evaluator for test set - optimizer: instantaite to an optimizer - lr_multiplier: instantiate to a fvcore scheduler - train: other misc config defined in `configs/common/train.py`, including: - output_dir (str) - init_checkpoint (str) - amp.enabled (bool) - max_iter (int) - eval_period, log_period (int) - device (str) - checkpointer (dict) - ddp (dict) - """ - model = instantiate(cfg.model) - logger = logging.getLogger("detectron2") - logger.info("Model:\n{}".format(model)) - model.to(cfg.train.device) - - cfg.optimizer.params.model = model - optim = instantiate(cfg.optimizer) - - train_loader = instantiate(cfg.dataloader.train) - - model = create_ddp_model(model, **cfg.train.ddp) - trainer = (AMPTrainer if cfg.train.amp.enabled else SimpleTrainer)(model, train_loader, optim) - checkpointer = DetectionCheckpointer( - model, - cfg.train.output_dir, - trainer=trainer, - ) - trainer.register_hooks( - [ - hooks.IterationTimer(), - hooks.LRScheduler(scheduler=instantiate(cfg.lr_multiplier)), - ( - hooks.PeriodicCheckpointer(checkpointer, **cfg.train.checkpointer) - if comm.is_main_process() - else None - ), - hooks.EvalHook(cfg.train.eval_period, lambda: do_test(cfg, model)), - ( - hooks.PeriodicWriter( - default_writers(cfg.train.output_dir, cfg.train.max_iter), - period=cfg.train.log_period, - ) - if comm.is_main_process() - else None - ), - ] - ) - - checkpointer.resume_or_load(cfg.train.init_checkpoint, resume=args.resume) - if args.resume and checkpointer.has_checkpoint(): - # The checkpoint stores the training iteration that just finished, thus we start - # at the next iteration - start_iter = trainer.iter + 1 - else: - start_iter = 0 - trainer.train(start_iter, cfg.train.max_iter) - - -def main(args): - cfg = LazyConfig.load(args.config_file) - cfg = LazyConfig.apply_overrides(cfg, args.opts) - default_setup(cfg, args) - - if args.eval_only: - model = instantiate(cfg.model) - model.to(cfg.train.device) - model = create_ddp_model(model) - DetectionCheckpointer(model).load(cfg.train.init_checkpoint) - print(do_test(cfg, model)) - else: - do_train(args, cfg) - - -def invoke_main() -> None: - args = default_argument_parser().parse_args() - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) - - -if __name__ == "__main__": - invoke_main() # pragma: no cover diff --git a/detectron2/tools/lightning_train_net.py b/detectron2/tools/lightning_train_net.py deleted file mode 100644 index 87cfe84feb780b023f6f354fdc3e3f5bce801025..0000000000000000000000000000000000000000 --- a/detectron2/tools/lightning_train_net.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. -# Lightning Trainer should be considered beta at this point -# We have confirmed that training and validation run correctly and produce correct results -# Depending on how you launch the trainer, there are issues with processes terminating correctly -# This module is still dependent on D2 logging, but could be transferred to use Lightning logging - -import logging -import os -import time -import weakref -from collections import OrderedDict -from typing import Any, Dict, List -import pytorch_lightning as pl # type: ignore -from pytorch_lightning import LightningDataModule, LightningModule - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import build_detection_test_loader, build_detection_train_loader -from detectron2.engine import ( - DefaultTrainer, - SimpleTrainer, - default_argument_parser, - default_setup, - default_writers, - hooks, -) -from detectron2.evaluation import print_csv_format -from detectron2.evaluation.testing import flatten_results_dict -from detectron2.modeling import build_model -from detectron2.solver import build_lr_scheduler, build_optimizer -from detectron2.utils.events import EventStorage -from detectron2.utils.logger import setup_logger - -from train_net import build_evaluator - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger("detectron2") - - -class TrainingModule(LightningModule): - def __init__(self, cfg): - super().__init__() - if not logger.isEnabledFor(logging.INFO): # setup_logger is not called for d2 - setup_logger() - self.cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size()) - self.storage: EventStorage = None - self.model = build_model(self.cfg) - - self.start_iter = 0 - self.max_iter = cfg.SOLVER.MAX_ITER - - def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None: - checkpoint["iteration"] = self.storage.iter - - def on_load_checkpoint(self, checkpointed_state: Dict[str, Any]) -> None: - self.start_iter = checkpointed_state["iteration"] - self.storage.iter = self.start_iter - - def setup(self, stage: str): - if self.cfg.MODEL.WEIGHTS: - self.checkpointer = DetectionCheckpointer( - # Assume you want to save checkpoints together with logs/statistics - self.model, - self.cfg.OUTPUT_DIR, - ) - logger.info(f"Load model weights from checkpoint: {self.cfg.MODEL.WEIGHTS}.") - # Only load weights, use lightning checkpointing if you want to resume - self.checkpointer.load(self.cfg.MODEL.WEIGHTS) - - self.iteration_timer = hooks.IterationTimer() - self.iteration_timer.before_train() - self.data_start = time.perf_counter() - self.writers = None - - def training_step(self, batch, batch_idx): - data_time = time.perf_counter() - self.data_start - # Need to manually enter/exit since trainer may launch processes - # This ideally belongs in setup, but setup seems to run before processes are spawned - if self.storage is None: - self.storage = EventStorage(0) - self.storage.__enter__() - self.iteration_timer.trainer = weakref.proxy(self) - self.iteration_timer.before_step() - self.writers = ( - default_writers(self.cfg.OUTPUT_DIR, self.max_iter) - if comm.is_main_process() - else {} - ) - - loss_dict = self.model(batch) - SimpleTrainer.write_metrics(loss_dict, data_time) - - opt = self.optimizers() - self.storage.put_scalar( - "lr", - opt.param_groups[self._best_param_group_id]["lr"], - smoothing_hint=False, - ) - self.iteration_timer.after_step() - self.storage.step() - # A little odd to put before step here, but it's the best way to get a proper timing - self.iteration_timer.before_step() - - if self.storage.iter % 20 == 0: - for writer in self.writers: - writer.write() - return sum(loss_dict.values()) - - def training_step_end(self, training_step_outpus): - self.data_start = time.perf_counter() - return training_step_outpus - - def training_epoch_end(self, training_step_outputs): - self.iteration_timer.after_train() - if comm.is_main_process(): - self.checkpointer.save("model_final") - for writer in self.writers: - writer.write() - writer.close() - self.storage.__exit__(None, None, None) - - def _process_dataset_evaluation_results(self) -> OrderedDict: - results = OrderedDict() - for idx, dataset_name in enumerate(self.cfg.DATASETS.TEST): - results[dataset_name] = self._evaluators[idx].evaluate() - if comm.is_main_process(): - print_csv_format(results[dataset_name]) - - if len(results) == 1: - results = list(results.values())[0] - return results - - def _reset_dataset_evaluators(self): - self._evaluators = [] - for dataset_name in self.cfg.DATASETS.TEST: - evaluator = build_evaluator(self.cfg, dataset_name) - evaluator.reset() - self._evaluators.append(evaluator) - - def on_validation_epoch_start(self, _outputs): - self._reset_dataset_evaluators() - - def validation_epoch_end(self, _outputs): - results = self._process_dataset_evaluation_results(_outputs) - - flattened_results = flatten_results_dict(results) - for k, v in flattened_results.items(): - try: - v = float(v) - except Exception as e: - raise ValueError( - "[EvalHook] eval_function should return a nested dict of float. " - "Got '{}: {}' instead.".format(k, v) - ) from e - self.storage.put_scalars(**flattened_results, smoothing_hint=False) - - def validation_step(self, batch, batch_idx: int, dataloader_idx: int = 0) -> None: - if not isinstance(batch, List): - batch = [batch] - outputs = self.model(batch) - self._evaluators[dataloader_idx].process(batch, outputs) - - def configure_optimizers(self): - optimizer = build_optimizer(self.cfg, self.model) - self._best_param_group_id = hooks.LRScheduler.get_best_param_group_id(optimizer) - scheduler = build_lr_scheduler(self.cfg, optimizer) - return [optimizer], [{"scheduler": scheduler, "interval": "step"}] - - -class DataModule(LightningDataModule): - def __init__(self, cfg): - super().__init__() - self.cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size()) - - def train_dataloader(self): - return build_detection_train_loader(self.cfg) - - def val_dataloader(self): - dataloaders = [] - for dataset_name in self.cfg.DATASETS.TEST: - dataloaders.append(build_detection_test_loader(self.cfg, dataset_name)) - return dataloaders - - -def main(args): - cfg = setup(args) - train(cfg, args) - - -def train(cfg, args): - trainer_params = { - # training loop is bounded by max steps, use a large max_epochs to make - # sure max_steps is met first - "max_epochs": 10**8, - "max_steps": cfg.SOLVER.MAX_ITER, - "val_check_interval": cfg.TEST.EVAL_PERIOD if cfg.TEST.EVAL_PERIOD > 0 else 10**8, - "num_nodes": args.num_machines, - "gpus": args.num_gpus, - "num_sanity_val_steps": 0, - } - if cfg.SOLVER.AMP.ENABLED: - trainer_params["precision"] = 16 - - last_checkpoint = os.path.join(cfg.OUTPUT_DIR, "last.ckpt") - if args.resume: - # resume training from checkpoint - trainer_params["resume_from_checkpoint"] = last_checkpoint - logger.info(f"Resuming training from checkpoint: {last_checkpoint}.") - - trainer = pl.Trainer(**trainer_params) - logger.info(f"start to train with {args.num_machines} nodes and {args.num_gpus} GPUs") - - module = TrainingModule(cfg) - data_module = DataModule(cfg) - if args.eval_only: - logger.info("Running inference") - trainer.validate(module, data_module) - else: - logger.info("Running training") - trainer.fit(module, data_module) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def invoke_main() -> None: - parser = default_argument_parser() - args = parser.parse_args() - logger.info("Command Line Args:", args) - main(args) - - -if __name__ == "__main__": - invoke_main() # pragma: no cover diff --git a/detectron2/tools/plain_train_net.py b/detectron2/tools/plain_train_net.py deleted file mode 100644 index 0c373368891bbdf02388800452b11800cc6dbcdf..0000000000000000000000000000000000000000 --- a/detectron2/tools/plain_train_net.py +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. -""" -Detectron2 training script with a plain training loop. - -This script reads a given config file and runs the training or evaluation. -It is an entry point that is able to train standard models in detectron2. - -In order to let one script support training of many models, -this script contains logic that are specific to these built-in models and therefore -may not be suitable for your own project. -For example, your research project perhaps only needs a single "evaluator". - -Therefore, we recommend you to use detectron2 as a library and take -this file as an example of how to use the library. -You may want to write your own script with your datasets and other customizations. - -Compared to "train_net.py", this script supports fewer default features. -It also includes fewer abstraction, therefore is easier to add custom logic. -""" - -import logging -import os -from collections import OrderedDict -import torch -from torch.nn.parallel import DistributedDataParallel - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer, PeriodicCheckpointer -from detectron2.config import get_cfg -from detectron2.data import ( - MetadataCatalog, - build_detection_test_loader, - build_detection_train_loader, -) -from detectron2.engine import default_argument_parser, default_setup, default_writers, launch -from detectron2.evaluation import ( - CityscapesInstanceEvaluator, - CityscapesSemSegEvaluator, - COCOEvaluator, - COCOPanopticEvaluator, - DatasetEvaluators, - LVISEvaluator, - PascalVOCDetectionEvaluator, - SemSegEvaluator, - inference_on_dataset, - print_csv_format, -) -from detectron2.modeling import build_model -from detectron2.solver import build_lr_scheduler, build_optimizer -from detectron2.utils.events import EventStorage - -logger = logging.getLogger("detectron2") - - -def get_evaluator(cfg, dataset_name, output_folder=None): - """ - Create evaluator(s) for a given dataset. - This uses the special metadata "evaluator_type" associated with each builtin dataset. - For your own dataset, you can simply create an evaluator manually in your - script and do not have to worry about the hacky if-else logic here. - """ - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluator_list = [] - evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: - evaluator_list.append( - SemSegEvaluator( - dataset_name, - distributed=True, - output_dir=output_folder, - ) - ) - if evaluator_type in ["coco", "coco_panoptic_seg"]: - evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder)) - if evaluator_type == "coco_panoptic_seg": - evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) - if evaluator_type == "cityscapes_instance": - return CityscapesInstanceEvaluator(dataset_name) - if evaluator_type == "cityscapes_sem_seg": - return CityscapesSemSegEvaluator(dataset_name) - if evaluator_type == "pascal_voc": - return PascalVOCDetectionEvaluator(dataset_name) - if evaluator_type == "lvis": - return LVISEvaluator(dataset_name, cfg, True, output_folder) - if len(evaluator_list) == 0: - raise NotImplementedError( - "no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type) - ) - if len(evaluator_list) == 1: - return evaluator_list[0] - return DatasetEvaluators(evaluator_list) - - -def do_test(cfg, model): - results = OrderedDict() - for dataset_name in cfg.DATASETS.TEST: - data_loader = build_detection_test_loader(cfg, dataset_name) - evaluator = get_evaluator( - cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) - ) - results_i = inference_on_dataset(model, data_loader, evaluator) - results[dataset_name] = results_i - if comm.is_main_process(): - logger.info("Evaluation results for {} in csv format:".format(dataset_name)) - print_csv_format(results_i) - if len(results) == 1: - results = list(results.values())[0] - return results - - -def do_train(cfg, model, resume=False): - model.train() - optimizer = build_optimizer(cfg, model) - scheduler = build_lr_scheduler(cfg, optimizer) - - checkpointer = DetectionCheckpointer( - model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler - ) - start_iter = ( - checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1 - ) - max_iter = cfg.SOLVER.MAX_ITER - - periodic_checkpointer = PeriodicCheckpointer( - checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter - ) - - writers = default_writers(cfg.OUTPUT_DIR, max_iter) if comm.is_main_process() else [] - - # compared to "train_net.py", we do not support accurate timing and - # precise BN here, because they are not trivial to implement in a small training loop - data_loader = build_detection_train_loader(cfg) - logger.info("Starting training from iteration {}".format(start_iter)) - with EventStorage(start_iter) as storage: - for data, iteration in zip(data_loader, range(start_iter, max_iter)): - storage.iter = iteration - - loss_dict = model(data) - losses = sum(loss_dict.values()) - assert torch.isfinite(losses).all(), loss_dict - - loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()} - losses_reduced = sum(loss for loss in loss_dict_reduced.values()) - if comm.is_main_process(): - storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced) - - optimizer.zero_grad() - losses.backward() - optimizer.step() - storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False) - scheduler.step() - - if ( - cfg.TEST.EVAL_PERIOD > 0 - and (iteration + 1) % cfg.TEST.EVAL_PERIOD == 0 - and iteration != max_iter - 1 - ): - do_test(cfg, model) - # Compared to "train_net.py", the test results are not dumped to EventStorage - comm.synchronize() - - if iteration - start_iter > 5 and ( - (iteration + 1) % 20 == 0 or iteration == max_iter - 1 - ): - for writer in writers: - writer.write() - periodic_checkpointer.step(iteration) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup( - cfg, args - ) # if you don't like any of the default setup, write your own setup code - return cfg - - -def main(args): - cfg = setup(args) - - model = build_model(cfg) - logger.info("Model:\n{}".format(model)) - if args.eval_only: - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - return do_test(cfg, model) - - distributed = comm.get_world_size() > 1 - if distributed: - model = DistributedDataParallel( - model, device_ids=[comm.get_local_rank()], broadcast_buffers=False - ) - - do_train(cfg, model, resume=args.resume) - return do_test(cfg, model) - - -def invoke_main() -> None: - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) - - -if __name__ == "__main__": - invoke_main() # pragma: no cover diff --git a/detectron2/tools/train_net.py b/detectron2/tools/train_net.py deleted file mode 100644 index a82a8dfb5ff6a60f1f51a461319f642aaaba5bce..0000000000000000000000000000000000000000 --- a/detectron2/tools/train_net.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. -""" -A main training script. - -This scripts reads a given config file and runs the training or evaluation. -It is an entry point that is made to train standard models in detectron2. - -In order to let one script support training of many models, -this script contains logic that are specific to these built-in models and therefore -may not be suitable for your own project. -For example, your research project perhaps only needs a single "evaluator". - -Therefore, we recommend you to use detectron2 as an library and take -this file as an example of how to use the library. -You may want to write your own script with your datasets and other customizations. -""" - -import logging -import os -from collections import OrderedDict - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch -from detectron2.evaluation import ( - CityscapesInstanceEvaluator, - CityscapesSemSegEvaluator, - COCOEvaluator, - COCOPanopticEvaluator, - DatasetEvaluators, - LVISEvaluator, - PascalVOCDetectionEvaluator, - SemSegEvaluator, - verify_results, -) -from detectron2.modeling import GeneralizedRCNNWithTTA - - -def build_evaluator(cfg, dataset_name, output_folder=None): - """ - Create evaluator(s) for a given dataset. - This uses the special metadata "evaluator_type" associated with each builtin dataset. - For your own dataset, you can simply create an evaluator manually in your - script and do not have to worry about the hacky if-else logic here. - """ - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluator_list = [] - evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: - evaluator_list.append( - SemSegEvaluator( - dataset_name, - distributed=True, - output_dir=output_folder, - ) - ) - if evaluator_type in ["coco", "coco_panoptic_seg"]: - evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder)) - if evaluator_type == "coco_panoptic_seg": - evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) - if evaluator_type == "cityscapes_instance": - return CityscapesInstanceEvaluator(dataset_name) - if evaluator_type == "cityscapes_sem_seg": - return CityscapesSemSegEvaluator(dataset_name) - elif evaluator_type == "pascal_voc": - return PascalVOCDetectionEvaluator(dataset_name) - elif evaluator_type == "lvis": - return LVISEvaluator(dataset_name, output_dir=output_folder) - if len(evaluator_list) == 0: - raise NotImplementedError( - "no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type) - ) - elif len(evaluator_list) == 1: - return evaluator_list[0] - return DatasetEvaluators(evaluator_list) - - -class Trainer(DefaultTrainer): - """ - We use the "DefaultTrainer" which contains pre-defined default logic for - standard training workflow. They may not work for you, especially if you - are working on a new research project. In that case you can write your - own training loop. You can use "tools/plain_train_net.py" as an example. - """ - - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - return build_evaluator(cfg, dataset_name, output_folder) - - @classmethod - def test_with_TTA(cls, cfg, model): - logger = logging.getLogger("detectron2.trainer") - # In the end of training, run an evaluation with TTA - # Only support some R-CNN models. - logger.info("Running inference with test-time augmentation ...") - model = GeneralizedRCNNWithTTA(cfg, model) - evaluators = [ - cls.build_evaluator( - cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") - ) - for name in cfg.DATASETS.TEST - ] - res = cls.test(cfg, model, evaluators) - res = OrderedDict({k + "_TTA": v for k, v in res.items()}) - return res - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - if cfg.TEST.AUG.ENABLED: - res.update(Trainer.test_with_TTA(cfg, model)) - if comm.is_main_process(): - verify_results(cfg, res) - return res - - """ - If you'd like to do anything fancier than the standard training logic, - consider writing your own training loop (see plain_train_net.py) or - subclassing the trainer. - """ - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - if cfg.TEST.AUG.ENABLED: - trainer.register_hooks( - [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] - ) - return trainer.train() - - -def invoke_main() -> None: - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) - - -if __name__ == "__main__": - invoke_main() # pragma: no cover diff --git a/detectron2/tools/visualize_data.py b/detectron2/tools/visualize_data.py deleted file mode 100644 index 25d569cd95f3e4e85fa175e4f96107b55ae3f02d..0000000000000000000000000000000000000000 --- a/detectron2/tools/visualize_data.py +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. -import argparse -import os -from itertools import chain -import cv2 -import tqdm - -from detectron2.config import get_cfg -from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader -from detectron2.data import detection_utils as utils -from detectron2.data.build import filter_images_with_few_keypoints -from detectron2.utils.logger import setup_logger -from detectron2.utils.visualizer import Visualizer - - -def setup(args): - cfg = get_cfg() - if args.config_file: - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.DATALOADER.NUM_WORKERS = 0 - cfg.freeze() - return cfg - - -def parse_args(in_args=None): - parser = argparse.ArgumentParser(description="Visualize ground-truth data") - parser.add_argument( - "--source", - choices=["annotation", "dataloader"], - required=True, - help="visualize the annotations or the data loader (with pre-processing)", - ) - parser.add_argument("--config-file", metavar="FILE", help="path to config file") - parser.add_argument("--output-dir", default="./", help="path to output directory") - parser.add_argument("--show", action="store_true", help="show output in a window") - parser.add_argument( - "opts", - help="Modify config options using the command-line", - default=None, - nargs=argparse.REMAINDER, - ) - return parser.parse_args(in_args) - - -def main() -> None: - global img - args = parse_args() - logger = setup_logger() - logger.info("Arguments: " + str(args)) - cfg = setup(args) - - dirname = args.output_dir - os.makedirs(dirname, exist_ok=True) - metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) - - def output(vis, fname): - if args.show: - print(fname) - cv2.imshow("window", vis.get_image()[:, :, ::-1]) - cv2.waitKey() - else: - filepath = os.path.join(dirname, fname) - print("Saving to {} ...".format(filepath)) - vis.save(filepath) - - scale = 1.0 - if args.source == "dataloader": - train_data_loader = build_detection_train_loader(cfg) - for batch in train_data_loader: - for per_image in batch: - # Pytorch tensor is in (C, H, W) format - img = per_image["image"].permute(1, 2, 0).cpu().detach().numpy() - img = utils.convert_image_to_rgb(img, cfg.INPUT.FORMAT) - - visualizer = Visualizer(img, metadata=metadata, scale=scale) - target_fields = per_image["instances"].get_fields() - labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]] - vis = visualizer.overlay_instances( - labels=labels, - boxes=target_fields.get("gt_boxes", None), - masks=target_fields.get("gt_masks", None), - keypoints=target_fields.get("gt_keypoints", None), - ) - output(vis, str(per_image["image_id"]) + ".jpg") - else: - dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN])) - if cfg.MODEL.KEYPOINT_ON: - dicts = filter_images_with_few_keypoints(dicts, 1) - for dic in tqdm.tqdm(dicts): - img = utils.read_image(dic["file_name"], "RGB") - visualizer = Visualizer(img, metadata=metadata, scale=scale) - vis = visualizer.draw_dataset_dict(dic) - output(vis, os.path.basename(dic["file_name"])) - - -if __name__ == "__main__": - main() # pragma: no cover diff --git a/detectron2/tools/visualize_json_results.py b/detectron2/tools/visualize_json_results.py deleted file mode 100644 index e32d80e35b447c79b7ef617a14641f1460110622..0000000000000000000000000000000000000000 --- a/detectron2/tools/visualize_json_results.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. - -import argparse -import json -import numpy as np -import os -from collections import defaultdict -import cv2 -import tqdm - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import Boxes, BoxMode, Instances -from detectron2.utils.file_io import PathManager -from detectron2.utils.logger import setup_logger -from detectron2.utils.visualizer import Visualizer - - -def create_instances(predictions, image_size): - ret = Instances(image_size) - - score = np.asarray([x["score"] for x in predictions]) - chosen = (score > args.conf_threshold).nonzero()[0] - score = score[chosen] - bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4) - bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) - - labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen]) - - ret.scores = score - ret.pred_boxes = Boxes(bbox) - ret.pred_classes = labels - - try: - ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] - except KeyError: - pass - return ret - - -def main() -> None: - global args, dataset_id_map - parser = argparse.ArgumentParser( - description="A script that visualizes the json predictions from COCO or LVIS dataset." - ) - parser.add_argument("--input", required=True, help="JSON file produced by the model") - parser.add_argument("--output", required=True, help="output directory") - parser.add_argument("--dataset", help="name of the dataset", default="coco_2017_val") - parser.add_argument("--conf-threshold", default=0.5, type=float, help="confidence threshold") - args = parser.parse_args() - - setup_logger() - - with PathManager.open(args.input, "r") as f: - predictions = json.load(f) - - pred_by_image = defaultdict(list) - for p in predictions: - pred_by_image[p["image_id"]].append(p) - - dicts = list(DatasetCatalog.get(args.dataset)) - metadata = MetadataCatalog.get(args.dataset) - if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): - - def dataset_id_map(ds_id): - return metadata.thing_dataset_id_to_contiguous_id[ds_id] - - elif "lvis" in args.dataset: - # LVIS results are in the same format as COCO results, but have a different - # mapping from dataset category id to contiguous category id in [0, #categories - 1] - def dataset_id_map(ds_id): - return ds_id - 1 - - else: - raise ValueError("Unsupported dataset: {}".format(args.dataset)) - - os.makedirs(args.output, exist_ok=True) - - for dic in tqdm.tqdm(dicts): - img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1] - basename = os.path.basename(dic["file_name"]) - - predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2]) - vis = Visualizer(img, metadata) - vis_pred = vis.draw_instance_predictions(predictions).get_image() - - vis = Visualizer(img, metadata) - vis_gt = vis.draw_dataset_dict(dic).get_image() - - concat = np.concatenate((vis_pred, vis_gt), axis=1) - cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1]) - - -if __name__ == "__main__": - main() # pragma: no cover diff --git a/pytorch3d/.circleci/build_count.py b/pytorch3d/.circleci/build_count.py deleted file mode 100644 index aecb54d1bfeeecfcc90570a68552f97011780cec..0000000000000000000000000000000000000000 --- a/pytorch3d/.circleci/build_count.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -""" -Print the number of nightly builds -""" - -from collections import Counter - -import yaml - - -conf = yaml.safe_load(open("config.yml")) -jobs = conf["workflows"]["build_and_test"]["jobs"] - - -def jobtype(job): - if isinstance(job, str): - return job - if len(job) == 1: - [name] = job.keys() - return name - return "MULTIPLE PARTS" - - -for i, j in Counter(map(jobtype, jobs)).items(): - print(i, j) -print() -print(len(jobs)) diff --git a/pytorch3d/.circleci/check.sh b/pytorch3d/.circleci/check.sh deleted file mode 100644 index bea4614153227769e0ab676ba857a473e32bfc8c..0000000000000000000000000000000000000000 --- a/pytorch3d/.circleci/check.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Run this script before committing config.yml to verify it is valid yaml. - -python -c 'import yaml; yaml.safe_load(open("config.yml"))' && echo OK - valid yaml - -msg="circleci not installed so can't check schema" -command -v circleci > /dev/null && (cd ..; circleci config validate) || echo "$msg" diff --git a/pytorch3d/.circleci/config.in.yml b/pytorch3d/.circleci/config.in.yml deleted file mode 100644 index b32ba66a68c3a8e3bc232e5d2c36024bca799c5e..0000000000000000000000000000000000000000 --- a/pytorch3d/.circleci/config.in.yml +++ /dev/null @@ -1,171 +0,0 @@ -version: 2.1 - -#examples: -#https://github.com/facebookresearch/ParlAI/blob/master/.circleci/config.yml -#https://github.com/facebookresearch/hydra/blob/master/.circleci/config.yml -#https://github.com/facebookresearch/habitat-api/blob/master/.circleci/config.yml - -#drive tests with nox or tox or pytest? - -# ------------------------------------------------------------------------------------- -# environments where we run our jobs -# ------------------------------------------------------------------------------------- - - -setupcuda: &setupcuda - run: - name: Setup CUDA - working_directory: ~/ - command: | - # download and install nvidia drivers, cuda, etc - wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run - sudo sh ~/nvidia-downloads/cuda_11.3.1_465.19.01_linux.run --silent - echo "Done installing CUDA." - pyenv versions - nvidia-smi - pyenv global 3.9.1 - -binary_common: &binary_common - parameters: - # Edit these defaults to do a release` - build_version: - description: "version number of release binary; by default, build a nightly" - type: string - default: "" - pytorch_version: - description: "PyTorch version to build against; by default, use a nightly" - type: string - default: "" - # Don't edit these - python_version: - description: "Python version to build against (e.g., 3.7)" - type: string - cu_version: - description: "CUDA version to build against, in CU format (e.g., cpu or cu100)" - type: string - wheel_docker_image: - description: "Wheel only: what docker image to use" - type: string - default: "pytorch/manylinux-cuda101" - conda_docker_image: - description: "what docker image to use for docker" - type: string - default: "pytorch/conda-cuda" - environment: - PYTHON_VERSION: << parameters.python_version >> - BUILD_VERSION: << parameters.build_version >> - PYTORCH_VERSION: << parameters.pytorch_version >> - CU_VERSION: << parameters.cu_version >> - TESTRUN_DOCKER_IMAGE: << parameters.conda_docker_image >> - -jobs: - main: - environment: - CUDA_VERSION: "11.3" - resource_class: gpu.nvidia.small.multi - machine: - image: linux-cuda-11:default - steps: - - checkout - - <<: *setupcuda - - run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7' - - run: pip3 install --progress-bar off torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html - # - run: conda create -p ~/conda_env python=3.7 numpy - # - run: conda activate ~/conda_env - # - run: conda install -c pytorch pytorch torchvision - - - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore' - - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/iopath' - - run: - name: build - command: | - export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 - python3 setup.py build_ext --inplace - - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python -m unittest discover -v -s tests -t . - - run: python3 setup.py bdist_wheel - - binary_linux_wheel: - <<: *binary_common - docker: - - image: << parameters.wheel_docker_image >> - auth: - username: $DOCKERHUB_USERNAME - password: $DOCKERHUB_TOKEN - resource_class: 2xlarge+ - steps: - - checkout - - run: MAX_JOBS=15 packaging/build_wheel.sh - - store_artifacts: - path: dist - - persist_to_workspace: - root: dist - paths: - - "*" - - binary_linux_conda: - <<: *binary_common - docker: - - image: "<< parameters.conda_docker_image >>" - auth: - username: $DOCKERHUB_USERNAME - password: $DOCKERHUB_TOKEN - resource_class: 2xlarge+ - steps: - - checkout - # This is building with cuda but no gpu present, - # so we aren't running the tests. - - run: - name: build - no_output_timeout: 40m - command: MAX_JOBS=15 TEST_FLAG=--no-test python3 packaging/build_conda.py - - store_artifacts: - path: /opt/conda/conda-bld/linux-64 - - persist_to_workspace: - root: /opt/conda/conda-bld/linux-64 - paths: - - "*" - - binary_linux_conda_cuda: - <<: *binary_common - machine: - image: linux-cuda-11:default - resource_class: gpu.nvidia.small.multi - steps: - - checkout - - - run: - name: Pull docker image - command: | - nvidia-smi - set -e - - { docker login -u="$DOCKERHUB_USERNAME" -p="$DOCKERHUB_TOKEN" ; } 2> /dev/null - - echo Pulling docker image $TESTRUN_DOCKER_IMAGE - docker pull $TESTRUN_DOCKER_IMAGE - - run: - name: Build and run tests - no_output_timeout: 40m - command: | - set -e - - cd ${HOME}/project/ - - export JUST_TESTRUN=1 - VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e CU_VERSION -e JUST_TESTRUN" - - docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${TESTRUN_DOCKER_IMAGE} python3 ./packaging/build_conda.py - -workflows: - version: 2 - build_and_test: - jobs: - # - main: - # context: DOCKERHUB_TOKEN - {{workflows()}} - - binary_linux_conda_cuda: - name: testrun_conda_cuda_py310_cu117_pyt201 - context: DOCKERHUB_TOKEN - python_version: "3.10" - pytorch_version: '2.0.1' - cu_version: "cu117" diff --git a/pytorch3d/.circleci/config.yml b/pytorch3d/.circleci/config.yml deleted file mode 100644 index 17e36959555e8cc6252d40c5771dccf5bf57a27e..0000000000000000000000000000000000000000 --- a/pytorch3d/.circleci/config.yml +++ /dev/null @@ -1,660 +0,0 @@ -version: 2.1 - -#examples: -#https://github.com/facebookresearch/ParlAI/blob/master/.circleci/config.yml -#https://github.com/facebookresearch/hydra/blob/master/.circleci/config.yml -#https://github.com/facebookresearch/habitat-api/blob/master/.circleci/config.yml - -#drive tests with nox or tox or pytest? - -# ------------------------------------------------------------------------------------- -# environments where we run our jobs -# ------------------------------------------------------------------------------------- - - -setupcuda: &setupcuda - run: - name: Setup CUDA - working_directory: ~/ - command: | - # download and install nvidia drivers, cuda, etc - wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run - sudo sh ~/nvidia-downloads/cuda_11.3.1_465.19.01_linux.run --silent - echo "Done installing CUDA." - pyenv versions - nvidia-smi - pyenv global 3.9.1 - -binary_common: &binary_common - parameters: - # Edit these defaults to do a release` - build_version: - description: "version number of release binary; by default, build a nightly" - type: string - default: "" - pytorch_version: - description: "PyTorch version to build against; by default, use a nightly" - type: string - default: "" - # Don't edit these - python_version: - description: "Python version to build against (e.g., 3.7)" - type: string - cu_version: - description: "CUDA version to build against, in CU format (e.g., cpu or cu100)" - type: string - wheel_docker_image: - description: "Wheel only: what docker image to use" - type: string - default: "pytorch/manylinux-cuda101" - conda_docker_image: - description: "what docker image to use for docker" - type: string - default: "pytorch/conda-cuda" - environment: - PYTHON_VERSION: << parameters.python_version >> - BUILD_VERSION: << parameters.build_version >> - PYTORCH_VERSION: << parameters.pytorch_version >> - CU_VERSION: << parameters.cu_version >> - TESTRUN_DOCKER_IMAGE: << parameters.conda_docker_image >> - -jobs: - main: - environment: - CUDA_VERSION: "11.3" - resource_class: gpu.nvidia.small.multi - machine: - image: linux-cuda-11:default - steps: - - checkout - - <<: *setupcuda - - run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7' - - run: pip3 install --progress-bar off torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html - # - run: conda create -p ~/conda_env python=3.7 numpy - # - run: conda activate ~/conda_env - # - run: conda install -c pytorch pytorch torchvision - - - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore' - - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/iopath' - - run: - name: build - command: | - export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 - python3 setup.py build_ext --inplace - - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python -m unittest discover -v -s tests -t . - - run: python3 setup.py bdist_wheel - - binary_linux_wheel: - <<: *binary_common - docker: - - image: << parameters.wheel_docker_image >> - auth: - username: $DOCKERHUB_USERNAME - password: $DOCKERHUB_TOKEN - resource_class: 2xlarge+ - steps: - - checkout - - run: MAX_JOBS=15 packaging/build_wheel.sh - - store_artifacts: - path: dist - - persist_to_workspace: - root: dist - paths: - - "*" - - binary_linux_conda: - <<: *binary_common - docker: - - image: "<< parameters.conda_docker_image >>" - auth: - username: $DOCKERHUB_USERNAME - password: $DOCKERHUB_TOKEN - resource_class: 2xlarge+ - steps: - - checkout - # This is building with cuda but no gpu present, - # so we aren't running the tests. - - run: - name: build - no_output_timeout: 40m - command: MAX_JOBS=15 TEST_FLAG=--no-test python3 packaging/build_conda.py - - store_artifacts: - path: /opt/conda/conda-bld/linux-64 - - persist_to_workspace: - root: /opt/conda/conda-bld/linux-64 - paths: - - "*" - - binary_linux_conda_cuda: - <<: *binary_common - machine: - image: linux-cuda-11:default - resource_class: gpu.nvidia.small.multi - steps: - - checkout - - - run: - name: Pull docker image - command: | - nvidia-smi - set -e - - { docker login -u="$DOCKERHUB_USERNAME" -p="$DOCKERHUB_TOKEN" ; } 2> /dev/null - - echo Pulling docker image $TESTRUN_DOCKER_IMAGE - docker pull $TESTRUN_DOCKER_IMAGE - - run: - name: Build and run tests - no_output_timeout: 40m - command: | - set -e - - cd ${HOME}/project/ - - export JUST_TESTRUN=1 - VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e CU_VERSION -e JUST_TESTRUN" - - docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${TESTRUN_DOCKER_IMAGE} python3 ./packaging/build_conda.py - -workflows: - version: 2 - build_and_test: - jobs: - # - main: - # context: DOCKERHUB_TOKEN - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda113 - context: DOCKERHUB_TOKEN - cu_version: cu113 - name: linux_conda_py38_cu113_pyt1120 - python_version: '3.8' - pytorch_version: 1.12.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py38_cu116_pyt1120 - python_version: '3.8' - pytorch_version: 1.12.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda113 - context: DOCKERHUB_TOKEN - cu_version: cu113 - name: linux_conda_py38_cu113_pyt1121 - python_version: '3.8' - pytorch_version: 1.12.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py38_cu116_pyt1121 - python_version: '3.8' - pytorch_version: 1.12.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py38_cu116_pyt1130 - python_version: '3.8' - pytorch_version: 1.13.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py38_cu117_pyt1130 - python_version: '3.8' - pytorch_version: 1.13.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py38_cu116_pyt1131 - python_version: '3.8' - pytorch_version: 1.13.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py38_cu117_pyt1131 - python_version: '3.8' - pytorch_version: 1.13.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py38_cu117_pyt200 - python_version: '3.8' - pytorch_version: 2.0.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py38_cu118_pyt200 - python_version: '3.8' - pytorch_version: 2.0.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py38_cu117_pyt201 - python_version: '3.8' - pytorch_version: 2.0.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py38_cu118_pyt201 - python_version: '3.8' - pytorch_version: 2.0.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py38_cu118_pyt210 - python_version: '3.8' - pytorch_version: 2.1.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py38_cu121_pyt210 - python_version: '3.8' - pytorch_version: 2.1.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py38_cu118_pyt211 - python_version: '3.8' - pytorch_version: 2.1.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py38_cu121_pyt211 - python_version: '3.8' - pytorch_version: 2.1.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py38_cu118_pyt212 - python_version: '3.8' - pytorch_version: 2.1.2 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py38_cu121_pyt212 - python_version: '3.8' - pytorch_version: 2.1.2 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py38_cu118_pyt220 - python_version: '3.8' - pytorch_version: 2.2.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py38_cu121_pyt220 - python_version: '3.8' - pytorch_version: 2.2.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda113 - context: DOCKERHUB_TOKEN - cu_version: cu113 - name: linux_conda_py39_cu113_pyt1120 - python_version: '3.9' - pytorch_version: 1.12.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py39_cu116_pyt1120 - python_version: '3.9' - pytorch_version: 1.12.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda113 - context: DOCKERHUB_TOKEN - cu_version: cu113 - name: linux_conda_py39_cu113_pyt1121 - python_version: '3.9' - pytorch_version: 1.12.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py39_cu116_pyt1121 - python_version: '3.9' - pytorch_version: 1.12.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py39_cu116_pyt1130 - python_version: '3.9' - pytorch_version: 1.13.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py39_cu117_pyt1130 - python_version: '3.9' - pytorch_version: 1.13.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py39_cu116_pyt1131 - python_version: '3.9' - pytorch_version: 1.13.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py39_cu117_pyt1131 - python_version: '3.9' - pytorch_version: 1.13.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py39_cu117_pyt200 - python_version: '3.9' - pytorch_version: 2.0.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py39_cu118_pyt200 - python_version: '3.9' - pytorch_version: 2.0.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py39_cu117_pyt201 - python_version: '3.9' - pytorch_version: 2.0.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py39_cu118_pyt201 - python_version: '3.9' - pytorch_version: 2.0.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py39_cu118_pyt210 - python_version: '3.9' - pytorch_version: 2.1.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py39_cu121_pyt210 - python_version: '3.9' - pytorch_version: 2.1.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py39_cu118_pyt211 - python_version: '3.9' - pytorch_version: 2.1.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py39_cu121_pyt211 - python_version: '3.9' - pytorch_version: 2.1.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py39_cu118_pyt212 - python_version: '3.9' - pytorch_version: 2.1.2 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py39_cu121_pyt212 - python_version: '3.9' - pytorch_version: 2.1.2 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py39_cu118_pyt220 - python_version: '3.9' - pytorch_version: 2.2.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py39_cu121_pyt220 - python_version: '3.9' - pytorch_version: 2.2.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda113 - context: DOCKERHUB_TOKEN - cu_version: cu113 - name: linux_conda_py310_cu113_pyt1120 - python_version: '3.10' - pytorch_version: 1.12.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py310_cu116_pyt1120 - python_version: '3.10' - pytorch_version: 1.12.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda113 - context: DOCKERHUB_TOKEN - cu_version: cu113 - name: linux_conda_py310_cu113_pyt1121 - python_version: '3.10' - pytorch_version: 1.12.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py310_cu116_pyt1121 - python_version: '3.10' - pytorch_version: 1.12.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py310_cu116_pyt1130 - python_version: '3.10' - pytorch_version: 1.13.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py310_cu117_pyt1130 - python_version: '3.10' - pytorch_version: 1.13.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda116 - context: DOCKERHUB_TOKEN - cu_version: cu116 - name: linux_conda_py310_cu116_pyt1131 - python_version: '3.10' - pytorch_version: 1.13.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py310_cu117_pyt1131 - python_version: '3.10' - pytorch_version: 1.13.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py310_cu117_pyt200 - python_version: '3.10' - pytorch_version: 2.0.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py310_cu118_pyt200 - python_version: '3.10' - pytorch_version: 2.0.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda117 - context: DOCKERHUB_TOKEN - cu_version: cu117 - name: linux_conda_py310_cu117_pyt201 - python_version: '3.10' - pytorch_version: 2.0.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py310_cu118_pyt201 - python_version: '3.10' - pytorch_version: 2.0.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py310_cu118_pyt210 - python_version: '3.10' - pytorch_version: 2.1.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py310_cu121_pyt210 - python_version: '3.10' - pytorch_version: 2.1.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py310_cu118_pyt211 - python_version: '3.10' - pytorch_version: 2.1.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py310_cu121_pyt211 - python_version: '3.10' - pytorch_version: 2.1.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py310_cu118_pyt212 - python_version: '3.10' - pytorch_version: 2.1.2 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py310_cu121_pyt212 - python_version: '3.10' - pytorch_version: 2.1.2 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py310_cu118_pyt220 - python_version: '3.10' - pytorch_version: 2.2.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py310_cu121_pyt220 - python_version: '3.10' - pytorch_version: 2.2.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py311_cu118_pyt210 - python_version: '3.11' - pytorch_version: 2.1.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py311_cu121_pyt210 - python_version: '3.11' - pytorch_version: 2.1.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py311_cu118_pyt211 - python_version: '3.11' - pytorch_version: 2.1.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py311_cu121_pyt211 - python_version: '3.11' - pytorch_version: 2.1.1 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py311_cu118_pyt212 - python_version: '3.11' - pytorch_version: 2.1.2 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py311_cu121_pyt212 - python_version: '3.11' - pytorch_version: 2.1.2 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py311_cu118_pyt220 - python_version: '3.11' - pytorch_version: 2.2.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py311_cu121_pyt220 - python_version: '3.11' - pytorch_version: 2.2.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda118 - context: DOCKERHUB_TOKEN - cu_version: cu118 - name: linux_conda_py312_cu118_pyt220 - python_version: '3.12' - pytorch_version: 2.2.0 - - binary_linux_conda: - conda_docker_image: pytorch/conda-builder:cuda121 - context: DOCKERHUB_TOKEN - cu_version: cu121 - name: linux_conda_py312_cu121_pyt220 - python_version: '3.12' - pytorch_version: 2.2.0 - - binary_linux_conda_cuda: - name: testrun_conda_cuda_py310_cu117_pyt201 - context: DOCKERHUB_TOKEN - python_version: "3.10" - pytorch_version: '2.0.1' - cu_version: "cu117" diff --git a/pytorch3d/.circleci/regenerate.py b/pytorch3d/.circleci/regenerate.py deleted file mode 100644 index 6fe9e5bfe9dd288be7dee8312f254d16ba2fb0f6..0000000000000000000000000000000000000000 --- a/pytorch3d/.circleci/regenerate.py +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -""" -This script is adapted from the torchvision one. -""" - -import os.path - -import jinja2 -import yaml -from packaging import version - - -# The CUDA versions which have pytorch conda packages available for linux for each -# version of pytorch. -CONDA_CUDA_VERSIONS = { - "1.12.0": ["cu113", "cu116"], - "1.12.1": ["cu113", "cu116"], - "1.13.0": ["cu116", "cu117"], - "1.13.1": ["cu116", "cu117"], - "2.0.0": ["cu117", "cu118"], - "2.0.1": ["cu117", "cu118"], - "2.1.0": ["cu118", "cu121"], - "2.1.1": ["cu118", "cu121"], - "2.1.2": ["cu118", "cu121"], - "2.2.0": ["cu118", "cu121"], -} - - -def conda_docker_image_for_cuda(cuda_version): - if len(cuda_version) != 5: - raise ValueError("Unknown cuda version") - return "pytorch/conda-builder:cuda" + cuda_version[2:] - - -def pytorch_versions_for_python(python_version): - if python_version in ["3.8", "3.9"]: - return list(CONDA_CUDA_VERSIONS) - if python_version == "3.10": - return [ - i - for i in CONDA_CUDA_VERSIONS - if version.Version(i) >= version.Version("1.11.0") - ] - if python_version == "3.11": - return [ - i - for i in CONDA_CUDA_VERSIONS - if version.Version(i) >= version.Version("2.1.0") - ] - if python_version == "3.12": - return [ - i - for i in CONDA_CUDA_VERSIONS - if version.Version(i) >= version.Version("2.2.0") - ] - - -def workflows(prefix="", filter_branch=None, upload=False, indentation=6): - w = [] - for btype in ["conda"]: - for python_version in ["3.8", "3.9", "3.10", "3.11", "3.12"]: - for pytorch_version in pytorch_versions_for_python(python_version): - for cu_version in CONDA_CUDA_VERSIONS[pytorch_version]: - w += workflow_pair( - btype=btype, - python_version=python_version, - pytorch_version=pytorch_version, - cu_version=cu_version, - prefix=prefix, - upload=upload, - filter_branch=filter_branch, - ) - - return indent(indentation, w) - - -def workflow_pair( - *, - btype, - python_version, - pytorch_version, - cu_version, - prefix="", - upload=False, - filter_branch, -): - - w = [] - py = python_version.replace(".", "") - pyt = pytorch_version.replace(".", "") - base_workflow_name = f"{prefix}linux_{btype}_py{py}_{cu_version}_pyt{pyt}" - - w.append( - generate_base_workflow( - base_workflow_name=base_workflow_name, - python_version=python_version, - pytorch_version=pytorch_version, - cu_version=cu_version, - btype=btype, - filter_branch=filter_branch, - ) - ) - - if upload: - w.append( - generate_upload_workflow( - base_workflow_name=base_workflow_name, - btype=btype, - cu_version=cu_version, - filter_branch=filter_branch, - ) - ) - - return w - - -def generate_base_workflow( - *, - base_workflow_name, - python_version, - cu_version, - pytorch_version, - btype, - filter_branch=None, -): - - d = { - "name": base_workflow_name, - "python_version": python_version, - "cu_version": cu_version, - "pytorch_version": pytorch_version, - "context": "DOCKERHUB_TOKEN", - } - - conda_docker_image = conda_docker_image_for_cuda(cu_version) - if conda_docker_image is not None: - d["conda_docker_image"] = conda_docker_image - - if filter_branch is not None: - d["filters"] = {"branches": {"only": filter_branch}} - - return {f"binary_linux_{btype}": d} - - -def generate_upload_workflow(*, base_workflow_name, btype, cu_version, filter_branch): - d = { - "name": f"{base_workflow_name}_upload", - "context": "org-member", - "requires": [base_workflow_name], - } - - if btype == "wheel": - d["subfolder"] = cu_version + "/" - - if filter_branch is not None: - d["filters"] = {"branches": {"only": filter_branch}} - - return {f"binary_{btype}_upload": d} - - -def indent(indentation, data_list): - if len(data_list) == 0: - return "" - return ("\n" + " " * indentation).join( - yaml.dump(data_list, default_flow_style=False).splitlines() - ) - - -if __name__ == "__main__": - d = os.path.dirname(__file__) - env = jinja2.Environment( - loader=jinja2.FileSystemLoader(d), - lstrip_blocks=True, - autoescape=False, - keep_trailing_newline=True, - ) - - with open(os.path.join(d, "config.yml"), "w") as f: - f.write(env.get_template("config.in.yml").render(workflows=workflows)) diff --git a/pytorch3d/.clang-format b/pytorch3d/.clang-format deleted file mode 100644 index 39b1b3d603ed0cf6b7f94c9c08067f148f35613f..0000000000000000000000000000000000000000 --- a/pytorch3d/.clang-format +++ /dev/null @@ -1,85 +0,0 @@ -AccessModifierOffset: -1 -AlignAfterOpenBracket: AlwaysBreak -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false -AlignEscapedNewlinesLeft: true -AlignOperands: false -AlignTrailingComments: false -AllowAllParametersOfDeclarationOnNextLine: false -AllowShortBlocksOnASingleLine: false -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: Empty -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: true -AlwaysBreakTemplateDeclarations: true -BinPackArguments: false -BinPackParameters: false -BraceWrapping: - AfterClass: false - AfterControlStatement: false - AfterEnum: false - AfterFunction: false - AfterNamespace: false - AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false - BeforeCatch: false - BeforeElse: false - IndentBraces: false -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Attach -BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false -BreakAfterJavaFieldAnnotations: false -BreakStringLiterals: false -ColumnLimit: 80 -CommentPragmas: '^ IWYU pragma:' -ConstructorInitializerAllOnOneLineOrOnePerLine: true -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: true -DerivePointerAlignment: false -DisableFormat: false -ForEachMacros: [ FOR_EACH, FOR_EACH_R, FOR_EACH_RANGE, ] -IncludeCategories: - - Regex: '^<.*\.h(pp)?>' - Priority: 1 - - Regex: '^<.*' - Priority: 2 - - Regex: '.*' - Priority: 3 -IndentCaseLabels: true -IndentWidth: 2 -IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: None -ObjCBlockIndentWidth: 2 -ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: false -PenaltyBreakBeforeFirstCallParameter: 1 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 200 -PointerAlignment: Left -ReflowComments: true -SortIncludes: true -SpaceAfterCStyleCast: false -SpaceBeforeAssignmentOperators: true -SpaceBeforeParens: ControlStatements -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInAngles: false -SpacesInContainerLiterals: true -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -Standard: Cpp11 -TabWidth: 8 -UseTab: Never diff --git a/pytorch3d/.flake8 b/pytorch3d/.flake8 deleted file mode 100644 index 7c9b9bd73cf4fc16b2454ee7c04640533453a7df..0000000000000000000000000000000000000000 --- a/pytorch3d/.flake8 +++ /dev/null @@ -1,9 +0,0 @@ -[flake8] -# B028 No explicit stacklevel argument found. -# B907 'foo' is manually surrounded by quotes, consider using the `!r` conversion flag. -# B905 `zip()` without an explicit `strict=` parameter. -ignore = E203, E266, E501, W503, E221, B028, B905, B907 -max-line-length = 88 -max-complexity = 18 -select = B,C,E,F,W,T4,B9 -exclude = build,__init__.py diff --git a/pytorch3d/.gitignore b/pytorch3d/.gitignore deleted file mode 100644 index 66444c655cac3bbc12f0ff0e90f2b3a643f7b16d..0000000000000000000000000000000000000000 --- a/pytorch3d/.gitignore +++ /dev/null @@ -1,20 +0,0 @@ -build/ -*.egg-info/ -**/__pycache__/ -*-checkpoint.ipynb -**/.ipynb_checkpoints -**/.ipynb_checkpoints/** - - -# Docusaurus site -website/yarn.lock -website/build/ -website/i18n/ -website/node_modules/* -website/npm-debug.log - -## Generated for tutorials -website/_tutorials/ -website/static/files/ -website/pages/tutorials/* -!website/pages/tutorials/index.js diff --git a/pytorch3d/INSTALL.md b/pytorch3d/INSTALL.md deleted file mode 100644 index 5439a4edce0c2193bf1d068428b9ccfa7957b2d8..0000000000000000000000000000000000000000 --- a/pytorch3d/INSTALL.md +++ /dev/null @@ -1,156 +0,0 @@ -# Installation - - -## Requirements - -### Core library - -The core library is written in PyTorch. Several components have underlying implementation in CUDA for improved performance. A subset of these components have CPU implementations in C++/PyTorch. It is advised to use PyTorch3D with GPU support in order to use all the features. - -- Linux or macOS or Windows -- Python 3.8, 3.9 or 3.10 -- PyTorch 1.12.0, 1.12.1, 1.13.0, 2.0.0, 2.0.1, 2.1.0, 2.1.1, 2.1.2 or 2.2.0. -- torchvision that matches the PyTorch installation. You can install them together as explained at pytorch.org to make sure of this. -- gcc & g++ β‰₯ 4.9 -- [fvcore](https://github.com/facebookresearch/fvcore) -- [ioPath](https://github.com/facebookresearch/iopath) -- If CUDA is to be used, use a version which is supported by the corresponding pytorch version and at least version 9.2. -- If CUDA older than 11.7 is to be used and you are building from source, the CUB library must be available. We recommend version 1.10.0. - -The runtime dependencies can be installed by running: -``` -conda create -n pytorch3d python=3.9 -conda activate pytorch3d -conda install pytorch=1.13.0 torchvision pytorch-cuda=11.6 -c pytorch -c nvidia -conda install -c fvcore -c iopath -c conda-forge fvcore iopath -``` - -For the CUB build time dependency, which you only need if you have CUDA older than 11.7, if you are using conda, you can continue with -``` -conda install -c bottler nvidiacub -``` -Otherwise download the CUB library from https://github.com/NVIDIA/cub/releases and unpack it to a folder of your choice. -Define the environment variable CUB_HOME before building and point it to the directory that contains `CMakeLists.txt` for CUB. -For example on Linux/Mac, -``` -curl -LO https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz -tar xzf 1.10.0.tar.gz -export CUB_HOME=$PWD/cub-1.10.0 -``` - -### Tests/Linting and Demos - -For developing on top of PyTorch3D or contributing, you will need to run the linter and tests. If you want to run any of the notebook tutorials as `docs/tutorials` or the examples in `docs/examples` you will also need matplotlib and OpenCV. -- scikit-image -- black -- usort -- flake8 -- matplotlib -- tdqm -- jupyter -- imageio -- plotly -- opencv-python - -These can be installed by running: -``` -# Demos and examples -conda install jupyter -pip install scikit-image matplotlib imageio plotly opencv-python - -# Tests/Linting -pip install black usort flake8 flake8-bugbear flake8-comprehensions -``` - -## Installing prebuilt binaries for PyTorch3D -After installing the above dependencies, run one of the following commands: - -### 1. Install with CUDA support from Anaconda Cloud, on Linux only - -``` -# Anaconda Cloud -conda install pytorch3d -c pytorch3d -``` - -Or, to install a nightly (non-official, alpha) build: -``` -# Anaconda Cloud -conda install pytorch3d -c pytorch3d-nightly -``` - -### 2. Install wheels for Linux -We have prebuilt wheels with CUDA for Linux for PyTorch 1.11.0, for each of the supported CUDA versions, -for Python 3.8 and 3.9. This is for ease of use on Google Colab. -These are installed in a special way. -For example, to install for Python 3.8, PyTorch 1.11.0 and CUDA 11.3 -``` -pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu113_pyt1110/download.html -``` - -In general, from inside IPython, or in Google Colab or a jupyter notebook, you can install with -``` -import sys -import torch -pyt_version_str=torch.__version__.split("+")[0].replace(".", "") -version_str="".join([ - f"py3{sys.version_info.minor}_cu", - torch.version.cuda.replace(".",""), - f"_pyt{pyt_version_str}" -]) -!pip install fvcore iopath -!pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html -``` - -## Building / installing from source. -CUDA support will be included if CUDA is available in pytorch or if the environmentΒ variable -`FORCE_CUDA` is set to `1`. - -### 1. Install from GitHub -``` -pip install "git+https://github.com/facebookresearch/pytorch3d.git" -``` -To install using the code of the released version instead of from the main branch, use the following instead. -``` -pip install "git+https://github.com/facebookresearch/pytorch3d.git@stable" -``` - -For CUDA builds with versions earlier than CUDA 11, set `CUB_HOME` before building as described above. - -**Install from Github on macOS:** -Some environment variables should be provided, like this. -``` -MACOSX_DEPLOYMENT_TARGET=10.14 CC=clang CXX=clang++ pip install "git+https://github.com/facebookresearch/pytorch3d.git" -``` - -### 2. Install from a local clone -``` -git clone https://github.com/facebookresearch/pytorch3d.git -cd pytorch3d && pip install -e . -``` -To rebuild after installing from a local clone run, `rm -rf build/ **/*.so` then `pip install -e .`. You often need to rebuild pytorch3d after reinstalling PyTorch. For CUDA builds with versions earlier than CUDA 11, set `CUB_HOME` before building as described above. - -**Install from local clone on macOS:** -``` -MACOSX_DEPLOYMENT_TARGET=10.14 CC=clang CXX=clang++ pip install -e . -``` - -**Install from local clone on Windows:** - -Depending on the version of PyTorch, changes to some PyTorch headers may be needed before compilation. These are often discussed in issues in this repository. - -After any necessary patching, you can go to "x64 Native Tools Command Prompt for VS 2019" to compile and install -``` -cd pytorch3d -python3 setup.py install -``` - -After installing, you can run **unit tests** -``` -python3 -m unittest discover -v -s tests -t . -``` - -# FAQ - -### Can I use Docker? - -We don't provide a docker file but see [#113](https://github.com/facebookresearch/pytorch3d/issues/113) for a docker file shared by a user (NOTE: this has not been tested by the PyTorch3D team). diff --git a/pytorch3d/LICENSE b/pytorch3d/LICENSE deleted file mode 100644 index c55382ff0992d90ae5ecb2cd9ac624ccd20bda4d..0000000000000000000000000000000000000000 --- a/pytorch3d/LICENSE +++ /dev/null @@ -1,30 +0,0 @@ -BSD License - -For PyTorch3D software - -Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - * Neither the name Meta nor the names of its contributors may be used to - endorse or promote products derived from this software without specific - prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pytorch3d/LICENSE-3RD-PARTY b/pytorch3d/LICENSE-3RD-PARTY deleted file mode 100644 index f55b7dce68b9ba07e56d43a1d5e7134aa5fd4b6f..0000000000000000000000000000000000000000 --- a/pytorch3d/LICENSE-3RD-PARTY +++ /dev/null @@ -1,71 +0,0 @@ -SRN license ( https://github.com/vsitzmann/scene-representation-networks/ ): - -MIT License - -Copyright (c) 2019 Vincent Sitzmann - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - -IDR license ( github.com/lioryariv/idr ): - -MIT License - -Copyright (c) 2020 Lior Yariv - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - -NeRF https://github.com/bmild/nerf/ - -Copyright (c) 2020 bmild - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/pytorch3d/README.md b/pytorch3d/README.md deleted file mode 100644 index 9bf32fe3790ed34f5b8f3878ed8666423cd4b067..0000000000000000000000000000000000000000 --- a/pytorch3d/README.md +++ /dev/null @@ -1,183 +0,0 @@ - - -[![CircleCI](https://circleci.com/gh/facebookresearch/pytorch3d.svg?style=svg)](https://circleci.com/gh/facebookresearch/pytorch3d) -[![Anaconda-Server Badge](https://anaconda.org/pytorch3d/pytorch3d/badges/version.svg)](https://anaconda.org/pytorch3d/pytorch3d) - -# Introduction - -PyTorch3D provides efficient, reusable components for 3D Computer Vision research with [PyTorch](https://pytorch.org). - -Key features include: - -- Data structure for storing and manipulating triangle meshes -- Efficient operations on triangle meshes (projective transformations, graph convolution, sampling, loss functions) -- A differentiable mesh renderer -- Implicitron, see [its README](projects/implicitron_trainer), a framework for new-view synthesis via implicit representations. ([blog post](https://ai.facebook.com/blog/implicitron-a-new-modular-extensible-framework-for-neural-implicit-representations-in-pytorch3d/)) - -PyTorch3D is designed to integrate smoothly with deep learning methods for predicting and manipulating 3D data. -For this reason, all operators in PyTorch3D: - -- Are implemented using PyTorch tensors -- Can handle minibatches of hetereogenous data -- Can be differentiated -- Can utilize GPUs for acceleration - -Within FAIR, PyTorch3D has been used to power research projects such as [Mesh R-CNN](https://arxiv.org/abs/1906.02739). - -See our [blog post](https://ai.facebook.com/blog/-introducing-pytorch3d-an-open-source-library-for-3d-deep-learning/) to see more demos and learn about PyTorch3D. - -## Installation - -For detailed instructions refer to [INSTALL.md](INSTALL.md). - -## License - -PyTorch3D is released under the [BSD License](LICENSE). - -## Tutorials - -Get started with PyTorch3D by trying one of the tutorial notebooks. - -||| -|:-----------------------------------------------------------------------------------------------------------:|:--------------------------------------------------:| -| [Deform a sphere mesh to dolphin](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/deform_source_mesh_to_target_mesh.ipynb)| [Bundle adjustment](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/bundle_adjustment.ipynb) | - -| | -|:------------------------------------------------------------:|:--------------------------------------------------:| -| [Render textured meshes](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/render_textured_meshes.ipynb)| [Camera position optimization](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/camera_position_optimization_with_differentiable_rendering.ipynb)| - -| | -|:------------------------------------------------------------:|:--------------------------------------------------:| -| [Render textured pointclouds](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/render_colored_points.ipynb)| [Fit a mesh with texture](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/fit_textured_mesh.ipynb)| - -| | -|:------------------------------------------------------------:|:--------------------------------------------------:| -| [Render DensePose data](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/render_densepose.ipynb)| [Load & Render ShapeNet data](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/dataloaders_ShapeNetCore_R2N2.ipynb)| - -| | -|:------------------------------------------------------------:|:--------------------------------------------------:| -| [Fit Textured Volume](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/fit_textured_volume.ipynb)| [Fit A Simple Neural Radiance Field](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/fit_simple_neural_radiance_field.ipynb)| - -| | -|:------------------------------------------------------------:|:--------------------------------------------------:| -| [Fit Textured Volume in Implicitron](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/implicitron_volumes.ipynb)| [Implicitron Config System](https://github.com/facebookresearch/pytorch3d/blob/main/docs/tutorials/implicitron_config_system.ipynb)| - - - - - -## Documentation - -Learn more about the API by reading the PyTorch3D [documentation](https://pytorch3d.readthedocs.org/). - -We also have deep dive notes on several API components: - -- [Heterogeneous Batching](https://github.com/facebookresearch/pytorch3d/tree/main/docs/notes/batching.md) -- [Mesh IO](https://github.com/facebookresearch/pytorch3d/tree/main/docs/notes/meshes_io.md) -- [Differentiable Rendering](https://github.com/facebookresearch/pytorch3d/tree/main/docs/notes/renderer_getting_started.md) - -### Overview Video - -We have created a short (~14 min) video tutorial providing an overview of the PyTorch3D codebase including several code examples. Click on the image below to watch the video on YouTube: - - - -## Development - -We welcome new contributions to PyTorch3D and we will be actively maintaining this library! Please refer to [CONTRIBUTING.md](./.github/CONTRIBUTING.md) for full instructions on how to run the code, tests and linter, and submit your pull requests. - -## Development and Compatibility - -- `main` branch: actively developed, without any guarantee, Anything can be broken at any time - - REMARK: this includes nightly builds which are built from `main` - - HINT: the commit history can help locate regressions or changes -- backward-compatibility between releases: no guarantee. Best efforts to communicate breaking changes and facilitate migration of code or data (incl. models). - -## Contributors - -PyTorch3D is written and maintained by the Facebook AI Research Computer Vision Team. - -In alphabetical order: - -* Amitav Baruah -* Steve Branson -* Krzysztof Chalupka -* Jiali Duan -* Luya Gao -* Georgia Gkioxari -* Taylor Gordon -* Justin Johnson -* Patrick Labatut -* Christoph Lassner -* Wan-Yen Lo -* David Novotny -* Nikhila Ravi -* Jeremy Reizenstein -* Dave Schnizlein -* Roman Shapovalov -* Olivia Wiles - -## Citation - -If you find PyTorch3D useful in your research, please cite our tech report: - -```bibtex -@article{ravi2020pytorch3d, - author = {Nikhila Ravi and Jeremy Reizenstein and David Novotny and Taylor Gordon - and Wan-Yen Lo and Justin Johnson and Georgia Gkioxari}, - title = {Accelerating 3D Deep Learning with PyTorch3D}, - journal = {arXiv:2007.08501}, - year = {2020}, -} -``` - -If you are using the pulsar backend for sphere-rendering (the `PulsarPointRenderer` or `pytorch3d.renderer.points.pulsar.Renderer`), please cite the tech report: - -```bibtex -@article{lassner2020pulsar, - author = {Christoph Lassner and Michael Zollh\"ofer}, - title = {Pulsar: Efficient Sphere-based Neural Rendering}, - journal = {arXiv:2004.07484}, - year = {2020}, -} -``` - -## News - -Please see below for a timeline of the codebase updates in reverse chronological order. We are sharing updates on the releases as well as research projects which are built with PyTorch3D. The changelogs for the releases are available under [`Releases`](https://github.com/facebookresearch/pytorch3d/releases), and the builds can be installed using `conda` as per the instructions in [INSTALL.md](INSTALL.md). - -**[Oct 31st 2023]:** PyTorch3D [v0.7.5](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.7.5) released. - -**[May 10th 2023]:** PyTorch3D [v0.7.4](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.7.4) released. - -**[Apr 5th 2023]:** PyTorch3D [v0.7.3](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.7.3) released. - -**[Dec 19th 2022]:** PyTorch3D [v0.7.2](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.7.2) released. - -**[Oct 23rd 2022]:** PyTorch3D [v0.7.1](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.7.1) released. - -**[Aug 10th 2022]:** PyTorch3D [v0.7.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.7.0) released with Implicitron and MeshRasterizerOpenGL. - -**[Apr 28th 2022]:** PyTorch3D [v0.6.2](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.6.2) released - -**[Dec 16th 2021]:** PyTorch3D [v0.6.1](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.6.1) released - -**[Oct 6th 2021]:** PyTorch3D [v0.6.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.6.0) released - -**[Aug 5th 2021]:** PyTorch3D [v0.5.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.5.0) released - -**[Feb 9th 2021]:** PyTorch3D [v0.4.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.4.0) released with support for implicit functions, volume rendering and a [reimplementation of NeRF](https://github.com/facebookresearch/pytorch3d/tree/main/projects/nerf). - -**[November 2nd 2020]:** PyTorch3D [v0.3.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.3.0) released, integrating the pulsar backend. - -**[Aug 28th 2020]:** PyTorch3D [v0.2.5](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.2.5) released - -**[July 17th 2020]:** PyTorch3D tech report published on ArXiv: https://arxiv.org/abs/2007.08501 - -**[April 24th 2020]:** PyTorch3D [v0.2.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.2.0) released - -**[March 25th 2020]:** [SynSin](https://arxiv.org/abs/1912.08804) codebase released using PyTorch3D: https://github.com/facebookresearch/synsin - -**[March 8th 2020]:** PyTorch3D [v0.1.1](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.1.1) bug fix release - -**[Jan 23rd 2020]:** PyTorch3D [v0.1.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.1.0) released. [Mesh R-CNN](https://arxiv.org/abs/1906.02739) codebase released: https://github.com/facebookresearch/meshrcnn diff --git a/pytorch3d/dev/linter.sh b/pytorch3d/dev/linter.sh deleted file mode 100644 index 43c64084d31229ba18560997735637030cdd2a2f..0000000000000000000000000000000000000000 --- a/pytorch3d/dev/linter.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Run this script at project root by "./dev/linter.sh" before you commit - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -DIR=$(dirname "${DIR}") - -if [[ -f "${DIR}/TARGETS" ]] -then - pyfmt "${DIR}" -else -# run usort externally only - echo "Running usort..." - usort "${DIR}" -fi - -echo "Running black..." -black "${DIR}" - -echo "Running flake..." -flake8 "${DIR}" || true - -echo "Running clang-format ..." -clangformat=$(command -v clang-format-8 || echo clang-format) -find "${DIR}" -regex ".*\.\(cpp\|c\|cc\|cu\|cuh\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 "${clangformat}" -i - -# Run arc and pyre internally only. -if [[ -f "${DIR}/TARGETS" ]] -then - (cd "${DIR}"; command -v arc > /dev/null && arc lint) || true - - echo "Running pyre..." - echo "To restart/kill pyre server, run 'pyre restart' or 'pyre kill' in fbcode/" - ( cd ~/fbsource/fbcode; pyre -l vision/fair/pytorch3d/ ) -fi diff --git a/pytorch3d/dev/run_tutorials.sh b/pytorch3d/dev/run_tutorials.sh deleted file mode 100644 index 304e471b252cd707ba77e87cfab4fbc0afb1ac56..0000000000000000000000000000000000000000 --- a/pytorch3d/dev/run_tutorials.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# This script is for running some of the tutorials using the nightly build in -# an isolated environment. It is designed to be run in docker. - -# If you run this script in this directory with -# sudo docker run --runtime=nvidia -it --rm -v $PWD/../docs/tutorials:/notebooks -v $PWD:/loc pytorch/conda-cuda bash /loc/run_tutorials.sh | tee log.txt -# it should execute some tutorials with the nightly build and resave them, and -# save a log in the current directory. - -# We use nbconvert. runipy would be an alternative but it currently doesn't -# work well with plotly. - -set -e - -conda init bash -# shellcheck source=/dev/null -source ~/.bashrc -conda create -y -n myenv python=3.8 matplotlib ipython ipywidgets nbconvert -conda activate myenv -conda install -y -c fvcore -c iopath -c conda-forge fvcore iopath -conda install -y -c pytorch pytorch=1.6.0 cudatoolkit=10.1 torchvision -conda install -y -c pytorch3d-nightly pytorch3d -pip install plotly scikit-image - -for notebook in /notebooks/*.ipynb -do - name=$(basename "$notebook") - - if [[ "$name" == "dataloaders_ShapeNetCore_R2N2.ipynb" ]] - then - #skip as data not easily available - continue - fi - if [[ "$name" == "render_densepose.ipynb" ]] - then - #skip as data not easily available - continue - fi - - #comment the lines which install torch, torchvision and pytorch3d - sed -Ei '/(torchvision)|(pytorch3d)/ s/!pip/!#pip/' "$notebook" - #Don't let tqdm use widgets - sed -i 's/from tqdm.notebook import tqdm/from tqdm import tqdm/' "$notebook" - - echo - echo "### ### ###" - echo "starting $name" - time jupyter nbconvert --to notebook --inplace --ExecutePreprocessor.kernel_name=python3 --execute "$notebook" || true - echo "ending $name" -done diff --git a/pytorch3d/dev/test_list.py b/pytorch3d/dev/test_list.py deleted file mode 100644 index f00facd8ddae2414c431cbae9ba5f9a810a0e15a..0000000000000000000000000000000000000000 --- a/pytorch3d/dev/test_list.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import ast -from pathlib import Path -from typing import List - - -""" -This module outputs a list of tests for completion. -It has no dependencies. -""" - - -def get_test_files() -> List[Path]: - root = Path(__file__).parent.parent - dirs = ["tests", "projects/implicitron_trainer"] - return [i for dir in dirs for i in (root / dir).glob("**/test*.py")] - - -def tests_from_file(path: Path, base: str) -> List[str]: - """ - Returns all the tests in the given file, in format - expected as arguments when running the tests. - e.g. - file_stem - file_stem.TestFunctionality - file_stem.TestFunctionality.test_f - file_stem.TestFunctionality.test_g - """ - with open(path) as f: - node = ast.parse(f.read()) - out = [base] - for cls in node.body: - if not isinstance(cls, ast.ClassDef): - continue - if not cls.name.startswith("Test"): - continue - class_base = base + "." + cls.name - out.append(class_base) - for method in cls.body: - if not isinstance(method, ast.FunctionDef): - continue - if not method.name.startswith("test"): - continue - out.append(class_base + "." + method.name) - return out - - -def main() -> None: - files = get_test_files() - test_root = Path(__file__).parent.parent - all_tests = [] - for f in files: - file_base = str(f.relative_to(test_root))[:-3].replace("/", ".") - all_tests.extend(tests_from_file(f, file_base)) - for test in sorted(all_tests): - print(test) - - -if __name__ == "__main__": - main() diff --git a/pytorch3d/dist/pytorch3d-0.7.6-cp311-cp311-manylinux1_x86_64.whl b/pytorch3d/dist/pytorch3d-0.7.6-cp311-cp311-manylinux1_x86_64.whl deleted file mode 100644 index 89f0ff808c0c96119718a29bbbb773e542a7b4f5..0000000000000000000000000000000000000000 --- a/pytorch3d/dist/pytorch3d-0.7.6-cp311-cp311-manylinux1_x86_64.whl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1a4896a170cd3b23c65b9263fac1cc63c5377ba1554e19983b210233c466984e -size 868463 diff --git a/pytorch3d/packaging/build_conda.py b/pytorch3d/packaging/build_conda.py deleted file mode 100644 index 3ef730b239e6352997fae8cf187704f874896533..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/build_conda.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import os.path -import runpy -import subprocess -from typing import List - -# required env vars: -# CU_VERSION: E.g. cu112 -# JUST_TESTRUN: 1 to not set nvcc flags -# PYTORCH_VERSION: e.g. 1.12.0 -# PYTHON_VERSION: e.g. 3.9 - -# should be run from pytorch3d root - -CU_VERSION = os.environ["CU_VERSION"] -PYTORCH_VERSION = os.environ["PYTORCH_VERSION"] -pytorch_major_minor = tuple(int(i) for i in PYTORCH_VERSION.split(".")[:2]) -source_root_dir = os.environ["PWD"] - - -def version_constraint(version): - """ - Given version "11.3" returns " >=11.3,<11.4" - """ - last_part = version.rindex(".") + 1 - upper = version[:last_part] + str(1 + int(version[last_part:])) - return f" >={version},<{upper}" - - -def get_cuda_major_minor(): - if CU_VERSION == "cpu": - raise ValueError("fn only for cuda builds") - if len(CU_VERSION) != 5 or CU_VERSION[:2] != "cu": - raise ValueError(f"Bad CU_VERSION {CU_VERSION}") - major = CU_VERSION[2:4] - minor = CU_VERSION[4] - return major, minor - - -def setup_cuda(): - if CU_VERSION == "cpu": - return - major, minor = get_cuda_major_minor() - os.environ["CUDA_HOME"] = f"/usr/local/cuda-{major}.{minor}/" - os.environ["FORCE_CUDA"] = "1" - - basic_nvcc_flags = ( - "-gencode=arch=compute_50,code=sm_50 " - "-gencode=arch=compute_60,code=sm_60 " - "-gencode=arch=compute_70,code=sm_70 " - "-gencode=arch=compute_75,code=sm_75 " - "-gencode=arch=compute_50,code=compute_50" - ) - if CU_VERSION == "cu102": - nvcc_flags = "-gencode=arch=compute_35,code=sm_35 " + basic_nvcc_flags - elif CU_VERSION < ("cu118"): - nvcc_flags = ( - "-gencode=arch=compute_35,code=sm_35 " - + "-gencode=arch=compute_80,code=sm_80 " - + "-gencode=arch=compute_86,code=sm_86 " - + basic_nvcc_flags - ) - else: - nvcc_flags = ( - "-gencode=arch=compute_80,code=sm_80 " - + "-gencode=arch=compute_86,code=sm_86 " - + "-gencode=arch=compute_90,code=sm_90 " - + basic_nvcc_flags - ) - - if os.environ.get("JUST_TESTRUN", "0") != "1": - os.environ["NVCC_FLAGS"] = nvcc_flags - - -def setup_conda_pytorch_constraint() -> List[str]: - pytorch_constraint = f"- pytorch=={PYTORCH_VERSION}" - os.environ["CONDA_PYTORCH_CONSTRAINT"] = pytorch_constraint - os.environ["CONDA_PYTORCH_BUILD_CONSTRAINT"] = pytorch_constraint - os.environ["PYTORCH_VERSION_NODOT"] = PYTORCH_VERSION.replace(".", "") - - if pytorch_major_minor < (1, 13): - return ["-c", "pytorch"] - else: - return ["-c", "pytorch", "-c", "nvidia"] - - -def setup_conda_cudatoolkit_constraint(): - if CU_VERSION == "cpu": - os.environ["CONDA_CPUONLY_FEATURE"] = "- cpuonly" - os.environ["CONDA_CUDATOOLKIT_CONSTRAINT"] = "" - return - os.environ["CONDA_CPUONLY_FEATURE"] = "" - - if CU_VERSION in ("cu102", "cu110"): - os.environ["CONDA_CUB_CONSTRAINT"] = "- nvidiacub" - else: - os.environ["CONDA_CUB_CONSTRAINT"] = "" - - major, minor = get_cuda_major_minor() - version_clause = version_constraint(f"{major}.{minor}") - if pytorch_major_minor < (1, 13): - toolkit = f"- cudatoolkit {version_clause}" - else: - toolkit = f"- pytorch-cuda {version_clause}" - os.environ["CONDA_CUDATOOLKIT_CONSTRAINT"] = toolkit - - -def do_build(start_args: List[str]): - args = start_args.copy() - - test_flag = os.environ.get("TEST_FLAG") - if test_flag is not None: - args.append(test_flag) - - args.extend(["-c", "bottler", "-c", "fvcore", "-c", "iopath", "-c", "conda-forge"]) - args.append("--no-anaconda-upload") - args.extend(["--python", os.environ["PYTHON_VERSION"]]) - args.append("packaging/pytorch3d") - print(args) - subprocess.check_call(args) - - -if __name__ == "__main__": - args = ["conda", "build"] - setup_cuda() - - init_path = source_root_dir + "/pytorch3d/__init__.py" - build_version = runpy.run_path(init_path)["__version__"] - os.environ["BUILD_VERSION"] = build_version - - os.environ["SOURCE_ROOT_DIR"] = source_root_dir - args += setup_conda_pytorch_constraint() - setup_conda_cudatoolkit_constraint() - do_build(args) diff --git a/pytorch3d/packaging/build_wheel.sh b/pytorch3d/packaging/build_wheel.sh deleted file mode 100644 index afe5a0e81359f681302628f670fb16f44cf65851..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/build_wheel.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -set -ex - -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -. "$script_dir/pkg_helpers.bash" - -VERSION=$(python -c "exec(open('${script_dir}/../pytorch3d/__init__.py').read()); print(__version__)") - -export BUILD_TYPE=wheel -setup_env "$VERSION" -setup_wheel_python -pip_install numpy -setup_pip_pytorch_version -download_nvidiacub_if_needed -python setup.py clean -IS_WHEEL=1 python setup.py bdist_wheel diff --git a/pytorch3d/packaging/conda/build_pytorch3d.sh b/pytorch3d/packaging/conda/build_pytorch3d.sh deleted file mode 100644 index 6be532fb84b3d36561cf015268b8e8f43c9228c8..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/conda/build_pytorch3d.sh +++ /dev/null @@ -1,218 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -if [[ -x "/remote/anaconda_token" ]]; then - . /remote/anaconda_token || true -fi - -set -ex - -# Function to retry functions that sometimes timeout or have flaky failures -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} - -# Parse arguments and determmine version -########################################################### - -if [ "$#" -ne 3 ]; then - echo "Illegal number of parameters. Pass cuda version, pytorch3d version, pytorch3d build number" - echo "CUDA version should be Mm with no dot, e.g. '80'" - echo "DESIRED_PYTHON should be M.m, e.g. '2.7'" - exit 1 -fi - -desired_cuda="$1" -build_version="$2" -build_number="$3" - -if [[ "$desired_cuda" != cpu ]]; then - desired_cuda="$(echo $desired_cuda | tr -d cuda. )" -fi -echo "Building cuda version $desired_cuda and pytorch3d version: $build_version build_number: $build_number" - -if [[ "$desired_cuda" == 'cpu' ]]; then - cpu_only=1 - cuver="cpu" -else - # Switch desired_cuda to be M.m to be consistent with other scripts in - # pytorch/builder - export FORCE_CUDA=1 - cuda_nodot="$desired_cuda" - - if [[ ${#cuda_nodot} -eq 2 ]]; then - desired_cuda="${desired_cuda:0:1}.${desired_cuda:1:1}" - elif [[ ${#cuda_nodot} -eq 3 ]]; then - desired_cuda="${desired_cuda:0:2}.${desired_cuda:2:1}" - else - echo "unknown cuda version $cuda_nodot" - exit 1 - fi - - cuver="cu$cuda_nodot" -fi - -export PYTORCH3D_BUILD_VERSION=$build_version -export PYTORCH3D_BUILD_NUMBER=$build_number - -if [[ -z "$DESIRED_PYTHON" ]]; then - DESIRED_PYTHON=('3.5' '3.6' '3.7') -fi - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" - -if [[ -z "$WIN_PACKAGE_WORK_DIR" ]]; then - WIN_PACKAGE_WORK_DIR="$(echo $(pwd -W) | tr '/' '\\')\\tmp_conda_$(date +%H%M%S)" -fi - -mkdir -p "$WIN_PACKAGE_WORK_DIR" || true -pytorch3d_rootdir="$(realpath ${WIN_PACKAGE_WORK_DIR})/pytorch3d-src" -git config --system core.longpaths true - -if [[ ! -d "$pytorch3d_rootdir" ]]; then - rm -rf "$pytorch3d_rootdir" - git clone SOURCE_DIR/../.. "$pytorch3d_rootdir" - -fi - -cd "$SOURCE_DIR" - -export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda" -export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe" -rm -rf "$tmp_conda" -rm -f "$miniconda_exe" -curl -sSk https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "$miniconda_exe" -"$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe" -pushd $tmp_conda -export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH" -popd -retry conda install -yq conda-build - -ANACONDA_USER=pytorch-nightly -conda config --set anaconda_upload no - - -if [[ "$desired_cuda" == 'cpu' ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="" - export CONDA_CPUONLY_FEATURE="- cpuonly # [not osx]" - export CUDA_VERSION="None" -else - export CONDA_CPUONLY_FEATURE="" - . ./switch_cuda_version.sh $desired_cuda - if [[ "$desired_cuda" == "10.1" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]" - elif [[ "$desired_cuda" == "10.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]" - elif [[ "$desired_cuda" == "9.2" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]" - elif [[ "$desired_cuda" == "9.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.0,<9.1 # [not osx]" - elif [[ "$desired_cuda" == "8.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=8.0,<8.1 # [not osx]" - else - echo "unhandled desired_cuda: $desired_cuda" - exit 1 - fi -fi - -if [[ -z "$PYTORCH_VERSION" ]]; then - export CONDA_CHANNEL_FLAGS="-c pytorch-nightly" - export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \ - python -c "import os, sys, json, re; cuver = '$cuver'; \ - cuver = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ - print(re.sub(r'\\+.*$', '', \ - [x['version'] for x in json.load(sys.stdin)['pytorch'] \ - if (x['platform'] == 'darwin' or cuver in x['fn']) \ - and 'py' + os.environ['DESIRED_PYTHON'] in x['fn']][-1]))")" - if [[ -z "$PYTORCH_VERSION" ]]; then - echo "PyTorch version auto detection failed" - echo "No package found for desired_cuda=$desired_cuda and DESIRED_PYTHON=$DESIRED_PYTHON" - exit 1 - fi -else - export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly" -fi -if [[ "$desired_cuda" == 'cpu' ]]; then - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION" -else - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}" -fi - -export PYTORCH_VERSION_NODOT=${PYTORCH_VERSION//./} - -# Loop through all Python versions to build a package for each -for py_ver in "${DESIRED_PYTHON[@]}"; do - build_string="py${py_ver}_${build_string_suffix}" - folder_tag="${build_string}_$(date +'%Y%m%d')" - - # Create the conda package into this temporary folder. This is so we can find - # the package afterwards, as there's no easy way to extract the final filename - # from conda-build - output_folder="out_$folder_tag" - rm -rf "$output_folder" - mkdir "$output_folder" - - export VSTOOLCHAIN_PACKAGE=vs2017 - - # We need to build the compiler activation scripts first on Windows - time VSDEVCMD_ARGS=${VSDEVCMD_ARGS[@]} \ - conda build -c "$ANACONDA_USER" \ - --no-anaconda-upload \ - --output-folder "$output_folder" \ - ../$VSTOOLCHAIN_PACKAGE - - cp ../$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml ../pytorch3d/conda_build_config.yaml - - conda config --set anaconda_upload no - echo "Calling conda-build at $(date)" - if [[ "$desired_cuda" == "9.2" ]]; then - time CMAKE_ARGS=${CMAKE_ARGS[@]} \ - BUILD_VERSION="$PYTORCH3D_BUILD_VERSION" \ - CU_VERSION="$cuver" \ - SOURCE_ROOT_DIR="$pytorch3d_rootdir" \ - conda build -c "$ANACONDA_USER" \ - -c defaults \ - -c conda-forge \ - -c "numba/label/dev" \ - --no-anaconda-upload \ - --python "$py_ver" \ - --output-folder "$output_folder" \ - --no-verify \ - --no-test \ - ../pytorch3d - else - time CMAKE_ARGS=${CMAKE_ARGS[@]} \ - BUILD_VERSION="$PYTORCH3D_BUILD_VERSION" \ - CU_VERSION="$cuver" \ - SOURCE_ROOT_DIR="$pytorch3d_rootdir" \ - conda build -c "$ANACONDA_USER" \ - -c defaults \ - -c conda-forge \ - --no-anaconda-upload \ - --python "$py_ver" \ - --output-folder "$output_folder" \ - --no-verify \ - --no-test \ - ../pytorch3d - fi - echo "Finished conda-build at $(date)" - - # Extract the package for testing - ls -lah "$output_folder" - built_package="$(find $output_folder/ -name '*pytorch3d*.tar.bz2')" - - # Copy the built package to the host machine for persistence before testing - if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then - mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true - cp "$built_package" "$PYTORCH_FINAL_PACKAGE_DIR/" - fi -done - - -set +e diff --git a/pytorch3d/packaging/conda/install_conda.bat b/pytorch3d/packaging/conda/install_conda.bat deleted file mode 100644 index c9aebe988f4dac55189ac7ae81e390ba9388684f..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/conda/install_conda.bat +++ /dev/null @@ -1,7 +0,0 @@ -@REM Copyright (c) Meta Platforms, Inc. and affiliates. -@REM All rights reserved. -@REM -@REM This source code is licensed under the BSD-style license found in the -@REM LICENSE file in the root directory of this source tree. - -start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda% diff --git a/pytorch3d/packaging/conda/switch_cuda_version.sh b/pytorch3d/packaging/conda/switch_cuda_version.sh deleted file mode 100644 index e30f2c7b7b765f8f7966d7c42f887c37c210a208..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/conda/switch_cuda_version.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -if [[ "$OSTYPE" == "msys" ]]; then - CUDA_DIR="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v$1" -else - CUDA_DIR="/usr/local/cuda-$1" -fi - -if ! ls "$CUDA_DIR" -then - echo "folder $CUDA_DIR not found to switch" -fi - -echo "Switching symlink to $CUDA_DIR" -mkdir -p /usr/local -rm -fr /usr/local/cuda -ln -s "$CUDA_DIR" /usr/local/cuda - -if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_VERSION=`ls /usr/local/cuda/bin/cudart64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2` - export CUDNN_VERSION=`ls /usr/local/cuda/bin/cudnn64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2` -else - export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) - export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) -fi - -ls -alh /usr/local/cuda - -echo "CUDA_VERSION=$CUDA_VERSION" -echo "CUDNN_VERSION=$CUDNN_VERSION" diff --git a/pytorch3d/packaging/cub_conda/README.md b/pytorch3d/packaging/cub_conda/README.md deleted file mode 100644 index fbf71eb4fb126dde214ebf60f99aa80738155983..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/cub_conda/README.md +++ /dev/null @@ -1,26 +0,0 @@ -## For building conda package for NVIDIA CUB - -CUB is required for building PyTorch3D so it makes sense -to provide a conda package to make its header files available. -This directory is used to do that, it is independent of the rest -of this repo. - -Make sure you are in a conda environment with -anaconda-client and conda-build installed. - -From this directory, build the package with the following. -``` -mkdir -p ./out -conda build --no-anaconda-upload --output-folder ./out cub -``` - -You can then upload the package with the following. -``` -retry () { - # run a command, and try again if it fails - $* || (echo && sleep 8 && echo retrying && $*) -} - -file=out/linux-64/nvidiacub-1.10.0-0.tar.bz2 -retry anaconda --verbose -t ${TOKEN} upload -u pytorch3d --force ${file} --no-progress -``` diff --git a/pytorch3d/packaging/cub_conda/cub/meta.yaml b/pytorch3d/packaging/cub_conda/cub/meta.yaml deleted file mode 100644 index 7ebb038a58deeecfcabb662ad32d77129336604a..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/cub_conda/cub/meta.yaml +++ /dev/null @@ -1,12 +0,0 @@ -package: - name: nvidiacub - version: 1.10.0 -source: - url: https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz - folder: source -build: - script: mkdir $PREFIX/include && cp -r source/cub $PREFIX/include/cub - -about: - home: https://github.com/NVIDIA/cub - summary: CUB provides state-of-the-art, reusable software components for every layer of the CUDA programming model. diff --git a/pytorch3d/packaging/linux_wheels/README.md b/pytorch3d/packaging/linux_wheels/README.md deleted file mode 100644 index d8871ffb517e877eccaff6ff5a13cc087513d30d..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/linux_wheels/README.md +++ /dev/null @@ -1,31 +0,0 @@ -## Building Linux pip Packages - -1. Make sure this directory is on a filesystem which docker can -use - e.g. not NFS. If you are using a local hard drive there is -nothing to do here. - -2. You may want to `docker pull pytorch/conda-cuda:latest`. - -3. Run `bash go.sh` in this directory. This takes ages -and writes packages to `inside/output`. - -4. You can upload the packages to s3, along with basic html files -which enable them to be used, with `bash after.sh`. - - -In particular, if you are in a jupyter/colab notebook you can -then install using these wheels with the following series of -commands. - -``` -import sys -import torch -pyt_version_str=torch.__version__.split("+")[0].replace(".", "") -version_str="".join([ - f"py3{sys.version_info.minor}_cu", - torch.version.cuda.replace(".",""), - f"_pyt{pyt_version_str}" -]) -!pip install fvcore iopath -!pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html -``` diff --git a/pytorch3d/packaging/linux_wheels/after.sh b/pytorch3d/packaging/linux_wheels/after.sh deleted file mode 100644 index ccfce6473e1f819244b2f3cc77bb45f66e6735f8..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/linux_wheels/after.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -set -ex -sudo chown -R "$USER" output -python publish.py diff --git a/pytorch3d/packaging/linux_wheels/go.sh b/pytorch3d/packaging/linux_wheels/go.sh deleted file mode 100644 index 1929a4671ad4f2abe28bf8f2fbc725d5b0d98cc9..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/linux_wheels/go.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Some directory to persist downloaded conda packages -conda_cache=/raid/$USER/building_conda_cache - -mkdir -p "$conda_cache" - -sudo docker run --rm -v "$conda_cache:/conda_cache" -v "$PWD/../../:/inside" -e SELECTED_CUDA=cu113 pytorch/conda-builder:cuda113 bash inside/packaging/linux_wheels/inside.sh -sudo docker run --rm -v "$conda_cache:/conda_cache" -v "$PWD/../../:/inside" -e SELECTED_CUDA=cu115 pytorch/conda-builder:cuda115 bash inside/packaging/linux_wheels/inside.sh -sudo docker run --rm -v "$conda_cache:/conda_cache" -v "$PWD/../../:/inside" -e SELECTED_CUDA=cu116 pytorch/conda-builder:cuda116 bash inside/packaging/linux_wheels/inside.sh -sudo docker run --rm -v "$conda_cache:/conda_cache" -v "$PWD/../../:/inside" -e SELECTED_CUDA=cu117 pytorch/conda-builder:cuda117 bash inside/packaging/linux_wheels/inside.sh -sudo docker run --rm -v "$conda_cache:/conda_cache" -v "$PWD/../../:/inside" -e SELECTED_CUDA=cu118 pytorch/conda-builder:cuda118 bash inside/packaging/linux_wheels/inside.sh diff --git a/pytorch3d/packaging/linux_wheels/inside.sh b/pytorch3d/packaging/linux_wheels/inside.sh deleted file mode 100644 index a7b8951a00ad60b3ebce16ba08993d8fd3abcf37..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/linux_wheels/inside.sh +++ /dev/null @@ -1,163 +0,0 @@ -#!/bin/bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -set -ex - -conda init bash -# shellcheck source=/dev/null -source ~/.bashrc - -cd /inside -VERSION=$(python -c "exec(open('pytorch3d/__init__.py').read()); print(__version__)") - -export BUILD_VERSION=$VERSION -export FORCE_CUDA=1 -export MAX_JOBS=8 -export CONDA_PKGS_DIRS=/conda_cache - -if false -then - # We used to have to do this for old versions of CUDA - wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz - tar xzf 1.10.0.tar.gz - CUB_HOME=$(realpath ./cub-1.10.0) - export CUB_HOME - echo "CUB_HOME is now $CUB_HOME" -fi - -# As a rule, we want to build for any combination of dependencies which is supported by -# PyTorch3D and not older than the current Google Colab set up. - -PYTHON_VERSIONS="3.8 3.9 3.10" -# the keys are pytorch versions -declare -A CONDA_CUDA_VERSIONS=( -# ["1.11.0"]="cu113" -# ["1.12.0"]="cu113" -# ["1.12.1"]="cu113" -# ["1.13.0"]="cu116" -# ["1.13.1"]="cu116 cu117" -# ["2.0.0"]="cu117 cu118" - ["2.0.1"]="cu117 cu118" -) - - - -for python_version in $PYTHON_VERSIONS -do - for pytorch_version in "${!CONDA_CUDA_VERSIONS[@]}" - do - if [[ "3.7 3.8 3.9" != *$python_version* ]] && [[ "1.7.0 1.7.1 1.8.0 1.8.1 1.9.0 1.9.1 1.10.0 1.10.1 1.10.2" == *$pytorch_version* ]] - then - #python 3.10 and later not supported by pytorch 1.10.2 and before - continue - fi - - extra_channel="-c nvidia" - cudatools="pytorch-cuda" - if [[ "1.11.0" == "$pytorch_version" ]] - then - extra_channel="" - cudatools="cudatoolkit" - fi - if [[ "1.12.0" == "$pytorch_version" ]] || [[ "1.12.1" == "$pytorch_version" ]] - then - extra_channel="-c conda-forge" - cudatools="cudatoolkit" - fi - - for cu_version in ${CONDA_CUDA_VERSIONS[$pytorch_version]} - do - if [[ $SELECTED_CUDA != "$cu_version" ]] - then - continue - fi - - case "$cu_version" in - cu118) - export CUDA_HOME=/usr/local/cuda-11.8/ - export CUDA_TAG=11.8 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu117) - export CUDA_HOME=/usr/local/cuda-11.7/ - export CUDA_TAG=11.7 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu116) - export CUDA_HOME=/usr/local/cuda-11.6/ - export CUDA_TAG=11.6 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu115) - export CUDA_HOME=/usr/local/cuda-11.5/ - export CUDA_TAG=11.5 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu113) - export CUDA_HOME=/usr/local/cuda-11.3/ - export CUDA_TAG=11.3 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu112) - export CUDA_HOME=/usr/local/cuda-11.2/ - export CUDA_TAG=11.2 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu111) - export CUDA_HOME=/usr/local/cuda-11.1/ - export CUDA_TAG=11.1 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu110) - export CUDA_HOME=/usr/local/cuda-11.0/ - export CUDA_TAG=11.0 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_50,code=compute_50" - ;; - cu102) - export CUDA_HOME=/usr/local/cuda-10.2/ - export CUDA_TAG=10.2 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" - ;; - cu101) - export CUDA_HOME=/usr/local/cuda-10.1/ - export CUDA_TAG=10.1 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" - ;; - *) - echo "Unrecognized cu_version=$cu_version" - exit 1 - ;; - esac - tag=py"${python_version//./}"_"${cu_version}"_pyt"${pytorch_version//./}" - - outdir="/inside/packaging/linux_wheels/output/$tag" - if [[ -d "$outdir" ]] - then - continue - fi - - conda create -y -n "$tag" "python=$python_version" - conda activate "$tag" - # shellcheck disable=SC2086 - conda install -y -c pytorch $extra_channel "pytorch=$pytorch_version" "$cudatools=$CUDA_TAG" - pip install fvcore iopath - echo "python version" "$python_version" "pytorch version" "$pytorch_version" "cuda version" "$cu_version" "tag" "$tag" - - rm -rf dist - - python setup.py clean - python setup.py bdist_wheel - - rm -rf "$outdir" - mkdir -p "$outdir" - cp dist/*whl "$outdir" - - conda deactivate - done - done -done -echo "DONE" diff --git a/pytorch3d/packaging/linux_wheels/publish.py b/pytorch3d/packaging/linux_wheels/publish.py deleted file mode 100644 index 6d0da93c740bf7c2ae63b8c659c7d217346b5a76..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/linux_wheels/publish.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import subprocess -from pathlib import Path -from typing import List - - -dest = "s3://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/" - -output = Path("output") - - -def aws_s3_cmd(args) -> List[str]: - """ - This function returns the full args for subprocess to do a command - with aws. - """ - cmd_args = ["aws", "s3", "--profile", "saml"] + args - return cmd_args - - -def fs3_exists(path) -> bool: - """ - Returns True if the path exists inside dest on S3. - In fact, will also return True if there is a file which has the given - path as a prefix, but we are careful about this. - """ - out = subprocess.check_output(aws_s3_cmd(["ls", path])) - return len(out) != 0 - - -def get_html_wrappers() -> None: - for directory in sorted(output.iterdir()): - output_wrapper = directory / "download.html" - assert not output_wrapper.exists() - dest_wrapper = dest + directory.name + "/download.html" - if fs3_exists(dest_wrapper): - subprocess.check_call(aws_s3_cmd(["cp", dest_wrapper, str(output_wrapper)])) - - -def write_html_wrappers() -> None: - html = """ - $
- """ - - for directory in sorted(output.iterdir()): - files = list(directory.glob("*.whl")) - assert len(files) == 1, files - [wheel] = files - - this_html = html.replace("$", wheel.name) - output_wrapper = directory / "download.html" - if output_wrapper.exists(): - contents = output_wrapper.read_text() - if this_html not in contents: - with open(output_wrapper, "a") as f: - f.write(this_html) - else: - output_wrapper.write_text(this_html) - - -def to_aws() -> None: - for directory in output.iterdir(): - for file in directory.iterdir(): - print(file) - subprocess.check_call( - aws_s3_cmd(["cp", str(file), dest + str(file.relative_to(output))]) - ) - - -if __name__ == "__main__": - # Uncomment this for subsequent releases. - # get_html_wrappers() - write_html_wrappers() - to_aws() - - -# see all files with -# aws s3 --profile saml ls --recursive s3://dl.fbaipublicfiles.com/pytorch3d/ - -# empty current with -# aws s3 --profile saml rm --recursive -# s3://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/ diff --git a/pytorch3d/packaging/pkg_helpers.bash b/pytorch3d/packaging/pkg_helpers.bash deleted file mode 100644 index e22643ecc59c721edb72a19186d242d77781bdbc..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/pkg_helpers.bash +++ /dev/null @@ -1,390 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# shellcheck shell=bash -# A set of useful bash functions for common functionality we need to do in -# many build scripts - -# Setup CUDA environment variables, based on CU_VERSION -# -# Inputs: -# CU_VERSION (cu92, cu100, cu101, cu102) -# NO_CUDA_PACKAGE (bool) -# BUILD_TYPE (conda, wheel) -# -# Outputs: -# VERSION_SUFFIX (e.g., "") -# PYTORCH_VERSION_SUFFIX (e.g., +cpu) -# WHEEL_DIR (e.g., cu100/) -# CUDA_HOME (e.g., /usr/local/cuda-9.2, respected by torch.utils.cpp_extension) -# FORCE_CUDA (respected by pytorch3d setup.py) -# NVCC_FLAGS (respected by pytorch3d setup.py) -# -# Precondition: CUDA versions are installed in their conventional locations in -# /usr/local/cuda-* -# -# NOTE: Why VERSION_SUFFIX versus PYTORCH_VERSION_SUFFIX? If you're building -# a package with CUDA on a platform we support CUDA on, VERSION_SUFFIX == -# PYTORCH_VERSION_SUFFIX and everyone is happy. However, if you are building a -# package with only CPU bits (e.g., torchaudio), then VERSION_SUFFIX is always -# empty, but PYTORCH_VERSION_SUFFIX is +cpu (because that's how you get a CPU -# version of a Python package. But that doesn't apply if you're on OS X, -# since the default CU_VERSION on OS X is cpu. -setup_cuda() { - - # First, compute version suffixes. By default, assume no version suffixes - export VERSION_SUFFIX="" - export PYTORCH_VERSION_SUFFIX="" - export WHEEL_DIR="" - # Wheel builds need suffixes (but not if they're on OS X, which never has suffix) - if [[ "$BUILD_TYPE" == "wheel" ]] && [[ "$(uname)" != Darwin ]]; then - # The default CUDA has no suffix - if [[ "$CU_VERSION" != "cu102" ]]; then - export PYTORCH_VERSION_SUFFIX="+$CU_VERSION" - fi - # Match the suffix scheme of pytorch, unless this package does not have - # CUDA builds (in which case, use default) - if [[ -z "$NO_CUDA_PACKAGE" ]]; then - export VERSION_SUFFIX="$PYTORCH_VERSION_SUFFIX" - export WHEEL_DIR="$CU_VERSION/" - fi - fi - - # Now work out the CUDA settings - case "$CU_VERSION" in - cu116) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.6" - else - export CUDA_HOME=/usr/local/cuda-11.6/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu115) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.5" - else - export CUDA_HOME=/usr/local/cuda-11.5/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu113) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3" - else - export CUDA_HOME=/usr/local/cuda-11.3/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu112) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2" - else - export CUDA_HOME=/usr/local/cuda-11.2/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu111) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1" - else - export CUDA_HOME=/usr/local/cuda-11.1/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" - ;; - cu110) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.0" - else - export CUDA_HOME=/usr/local/cuda-11.0/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_50,code=compute_50" - ;; - cu102) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" - else - export CUDA_HOME=/usr/local/cuda-10.2/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" - ;; - cu101) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.1" - else - export CUDA_HOME=/usr/local/cuda-10.1/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" - ;; - cu100) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.0" - else - export CUDA_HOME=/usr/local/cuda-10.0/ - fi - export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" - ;; - cu92) - if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.2" - else - export CUDA_HOME=/usr/local/cuda-9.2/ - fi - export FORCE_CUDA=1 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50" - ;; - cpu) - ;; - *) - echo "Unrecognized CU_VERSION=$CU_VERSION" - exit 1 - ;; - esac -} - -# Populate build version if necessary, and add version suffix -# -# Inputs: -# BUILD_VERSION (e.g., 0.2.0 or empty) -# VERSION_SUFFIX (e.g., +cpu) -# -# Outputs: -# BUILD_VERSION (e.g., 0.2.0.dev20190807+cpu) -# -# Fill BUILD_VERSION if it doesn't exist already with a nightly string -# Usage: setup_build_version 0.2.0 -setup_build_version() { - if [[ -z "$BUILD_VERSION" ]]; then - export BUILD_VERSION="$1.dev$(date "+%Y%m%d")$VERSION_SUFFIX" - else - export BUILD_VERSION="$BUILD_VERSION$VERSION_SUFFIX" - fi -} - -# Set some useful variables for OS X, if applicable -setup_macos() { - if [[ "$(uname)" == Darwin ]]; then - export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ - fi -} - -# Top-level entry point for things every package will need to do -# -# Usage: setup_env 0.2.0 -setup_env() { - setup_cuda - setup_build_version "$1" - setup_macos -} - -# Function to retry functions that sometimes timeout or have flaky failures -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} - -# Inputs: -# PYTHON_VERSION (2.7, 3.5, 3.6, 3.7) -# UNICODE_ABI (bool) -# -# Outputs: -# PATH modified to put correct Python version in PATH -# -# Precondition: If Linux, you are in a soumith/manylinux-cuda* Docker image -setup_wheel_python() { - if [[ "$(uname)" == Darwin ]]; then - eval "$(conda shell.bash hook)" - conda env remove -n "env$PYTHON_VERSION" || true - conda create -yn "env$PYTHON_VERSION" python="$PYTHON_VERSION" - conda activate "env$PYTHON_VERSION" - else - case "$PYTHON_VERSION" in - 2.7) - if [[ -n "$UNICODE_ABI" ]]; then - python_abi=cp27-cp27mu - else - python_abi=cp27-cp27m - fi - ;; - 3.5) python_abi=cp35-cp35m ;; - 3.6) python_abi=cp36-cp36m ;; - 3.7) python_abi=cp37-cp37m ;; - 3.8) python_abi=cp38-cp38 ;; - *) - echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION" - exit 1 - ;; - esac - export PATH="/opt/python/$python_abi/bin:$PATH" - fi -} - -# Install with pip a bit more robustly than the default -pip_install() { - retry pip install --progress-bar off "$@" -} - -# Install torch with pip, respecting PYTORCH_VERSION, and record the installed -# version into PYTORCH_VERSION, if applicable -setup_pip_pytorch_version() { - if [[ -z "$PYTORCH_VERSION" ]]; then - # Install latest prerelease version of torch, per our nightlies, consistent - # with the requested cuda version - pip_install --pre torch -f "https://download.pytorch.org/whl/nightly/${WHEEL_DIR}torch_nightly.html" - if [[ "$CUDA_VERSION" == "cpu" ]]; then - # CUDA and CPU are ABI compatible on the CPU-only parts, so strip - # in this case - export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" - else - export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version: *//')" - fi - else - pip_install "torch==$PYTORCH_VERSION$CUDA_SUFFIX" \ - -f https://download.pytorch.org/whl/torch_stable.html \ - -f https://download.pytorch.org/whl/nightly/torch_nightly.html - fi -} - -# Fill PYTORCH_VERSION with the latest conda nightly version, and -# CONDA_CHANNEL_FLAGS with appropriate flags to retrieve these versions -# -# You MUST have populated CUDA_SUFFIX before hand. -setup_conda_pytorch_constraint() { - if [[ -z "$PYTORCH_VERSION" ]]; then - export CONDA_CHANNEL_FLAGS="-c pytorch-nightly" - export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \ - python -c "import os, sys, json, re; cuver = os.environ.get('CU_VERSION'); \ - cuver_1 = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ - cuver_2 = (cuver[:-1] + '.' + cuver[-1]).replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ - print(re.sub(r'\\+.*$', '', \ - [x['version'] for x in json.load(sys.stdin)['pytorch'] \ - if (x['platform'] == 'darwin' or cuver_1 in x['fn'] or cuver_2 in x['fn']) \ - and 'py' + os.environ['PYTHON_VERSION'] in x['fn']][-1]))")" - if [[ -z "$PYTORCH_VERSION" ]]; then - echo "PyTorch version auto detection failed" - echo "No package found for CU_VERSION=$CU_VERSION and PYTHON_VERSION=$PYTHON_VERSION" - exit 1 - fi - else - export CONDA_CHANNEL_FLAGS="-c pytorch" - fi - if [[ "$CU_VERSION" == cpu ]]; then - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION${PYTORCH_VERSION_SUFFIX}" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION" - else - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}" - fi - export PYTORCH_VERSION_NODOT=${PYTORCH_VERSION//./} -} - -# Translate CUDA_VERSION into CUDA_CUDATOOLKIT_CONSTRAINT -setup_conda_cudatoolkit_constraint() { - export CONDA_CPUONLY_FEATURE="" - export CONDA_CUB_CONSTRAINT="" - if [[ "$(uname)" == Darwin ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="" - else - case "$CU_VERSION" in - cu116) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.6,<11.7 # [not osx]" - ;; - cu115) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.5,<11.6 # [not osx]" - ;; - cu113) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.3,<11.4 # [not osx]" - ;; - cu112) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.2,<11.3 # [not osx]" - ;; - cu111) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.1,<11.2 # [not osx]" - ;; - cu110) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.0,<11.1 # [not osx]" - # Even though cudatoolkit 11.0 provides CUB we need our own, to control the - # version, because the built-in 1.9.9 in the cudatoolkit causes problems. - export CONDA_CUB_CONSTRAINT="- nvidiacub" - ;; - cu102) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.2,<10.3 # [not osx]" - export CONDA_CUB_CONSTRAINT="- nvidiacub" - ;; - cu101) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]" - export CONDA_CUB_CONSTRAINT="- nvidiacub" - ;; - cu100) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]" - export CONDA_CUB_CONSTRAINT="- nvidiacub" - ;; - cu92) - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]" - export CONDA_CUB_CONSTRAINT="- nvidiacub" - ;; - cpu) - export CONDA_CUDATOOLKIT_CONSTRAINT="" - export CONDA_CPUONLY_FEATURE="- cpuonly" - ;; - *) - echo "Unrecognized CU_VERSION=$CU_VERSION" - exit 1 - ;; - esac - fi -} - -# Build the proper compiler package before building the final package -setup_visual_studio_constraint() { - if [[ "$OSTYPE" == "msys" ]]; then - export VSTOOLCHAIN_PACKAGE=vs2019 - export VSDEVCMD_ARGS='' - # shellcheck disable=SC2086 - conda build $CONDA_CHANNEL_FLAGS --no-anaconda-upload packaging/$VSTOOLCHAIN_PACKAGE - cp packaging/$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml packaging/pytorch3d/conda_build_config.yaml - fi -} - -download_nvidiacub_if_needed() { - case "$CU_VERSION" in - cu110|cu102|cu101|cu100|cu92) - echo "Downloading cub" - wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz - tar xzf 1.10.0.tar.gz - CUB_HOME=$(realpath ./cub-1.10.0) - export CUB_HOME - echo "CUB_HOME is now $CUB_HOME" - ;; - esac - # We don't need CUB for a cpu build or if cuda is 11.1 or higher -} diff --git a/pytorch3d/packaging/pytorch3d/meta.yaml b/pytorch3d/packaging/pytorch3d/meta.yaml deleted file mode 100644 index 8604127d4104be38fa5d4153f15ee05e6bf6bb91..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/pytorch3d/meta.yaml +++ /dev/null @@ -1,59 +0,0 @@ -package: - name: pytorch3d - version: "{{ environ.get('BUILD_VERSION') }}" - -source: - path: "{{ environ.get('SOURCE_ROOT_DIR') }}" - -requirements: - build: - - {{ compiler('c') }} # [win] - {{ environ.get('CONDA_CUB_CONSTRAINT') }} - - host: - - python - - setuptools - {{ environ.get('CONDA_PYTORCH_BUILD_CONSTRAINT') }} - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} - {{ environ.get('CONDA_CPUONLY_FEATURE') }} - - run: - - python - - numpy >=1.11 - - torchvision >=0.5 - - fvcore - - iopath - {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} - {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} - -build: - string: py{{py}}_{{ environ['CU_VERSION'] }}_pyt{{ environ['PYTORCH_VERSION_NODOT']}} - script: python setup.py install --single-version-externally-managed --record=record.txt # [not win] - script_env: - - CUDA_HOME - - FORCE_CUDA - - NVCC_FLAGS - - MAX_JOBS - features: - {{ environ.get('CONDA_CPUONLY_FEATURE') }} - -test: - imports: - - pytorch3d - source_files: - - tests - - docs - requires: - - imageio - - hydra-core - - accelerate - commands: - #pytest . - python -m unittest discover -v -s tests -t . - - -about: - home: https://github.com/facebookresearch/pytorch3d - license: BSD - license_file: LICENSE - summary: '3d Geometry for pytorch' diff --git a/pytorch3d/packaging/vs2017/activate.bat b/pytorch3d/packaging/vs2017/activate.bat deleted file mode 100644 index 55928c1e141f12753275a8c7f1768fdc0432780a..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/vs2017/activate.bat +++ /dev/null @@ -1,50 +0,0 @@ -@REM Copyright (c) Meta Platforms, Inc. and affiliates. -@REM All rights reserved. -@REM -@REM This source code is licensed under the BSD-style license found in the -@REM LICENSE file in the root directory of this source tree. - -:: Set env vars that tell distutils to use the compiler that we put on path -SET DISTUTILS_USE_SDK=1 -SET MSSdk=1 - -SET "VS_VERSION=15.0" -SET "VS_MAJOR=15" -SET "VS_YEAR=2017" - -set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out" -set "MSYS2_ENV_CONV_EXCL=CL" - -:: For Python 3.5+, ensure that we link with the dynamic runtime. See -:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info -set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll" - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VSINSTALLDIR=%%i\" - goto :vswhere - ) -) - -:vswhere - -:: Shorten PATH to avoid the `input line too long` error. -SET MyPath=%PATH% - -setlocal EnableDelayedExpansion - -SET TempPath="%MyPath:;=";"%" -SET var= -FOR %%a IN (%TempPath%) DO ( - IF EXIST %%~sa ( - SET "var=!var!;%%~sa" - ) -) - -set "TempPath=!var:~1!" -endlocal & set "PATH=%TempPath%" - -:: Shorten current directory too -FOR %%A IN (.) DO CD "%%~sA" - -:: other things added by install_activate.bat at package build time diff --git a/pytorch3d/packaging/vs2017/conda_build_config.yaml b/pytorch3d/packaging/vs2017/conda_build_config.yaml deleted file mode 100644 index 5188bb0ebecf72aefb1c2e779458998216e4d479..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/vs2017/conda_build_config.yaml +++ /dev/null @@ -1,24 +0,0 @@ -blas_impl: - - mkl # [x86_64] -c_compiler: - - vs2017 # [win] -cxx_compiler: - - vs2017 # [win] -python: - - 3.5 - - 3.6 -# This differs from target_platform in that it determines what subdir the compiler -# will target, not what subdir the compiler package will be itself. -# For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 -# code on win-64 miniconda. -cross_compiler_target_platform: - - win-64 # [win] -target_platform: - - win-64 # [win] -vc: - - 14 -zip_keys: - - # [win] - - vc # [win] - - c_compiler # [win] - - cxx_compiler # [win] diff --git a/pytorch3d/packaging/vs2017/install_activate.bat b/pytorch3d/packaging/vs2017/install_activate.bat deleted file mode 100644 index 7d4e4cc31bac3cf0b168706b37c1faed703f6e57..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/vs2017/install_activate.bat +++ /dev/null @@ -1,35 +0,0 @@ -@REM Copyright (c) Meta Platforms, Inc. and affiliates. -@REM All rights reserved. -@REM -@REM This source code is licensed under the BSD-style license found in the -@REM LICENSE file in the root directory of this source tree. - -set YEAR=2017 -set VER=15 - -mkdir "%PREFIX%\etc\conda\activate.d" -COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - -IF "%cross_compiler_target_platform%" == "win-64" ( - set "target_platform=amd64" - echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - IF "%VSDEVCMD_ARGS%" == "" ( - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) ELSE ( - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) else ( - set "target_platform=x86" - echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd - ) diff --git a/pytorch3d/packaging/vs2017/install_runtime.bat b/pytorch3d/packaging/vs2017/install_runtime.bat deleted file mode 100644 index 9e7806657fb926fabdc501754cf26e1862678f70..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/vs2017/install_runtime.bat +++ /dev/null @@ -1,55 +0,0 @@ -@REM Copyright (c) Meta Platforms, Inc. and affiliates. -@REM All rights reserved. -@REM -@REM This source code is licensed under the BSD-style license found in the -@REM LICENSE file in the root directory of this source tree. - -set VC_PATH=x86 -if "%ARCH%"=="64" ( - set VC_PATH=x64 -) - -set MSC_VER=2017 - -rem :: This should always be present for VC installed with VS. Not sure about VC installed with Visual C++ Build Tools 2015 -rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO ( -rem set SP=%%A -rem ) - -rem if not "%SP%" == "%PKG_VERSION%" ( -rem echo "Version detected from registry: %SP%" -rem echo "does not match version of package being built (%PKG_VERSION%)" -rem echo "Do you have current updates for VS 2015 installed?" -rem exit 1 -rem ) - - -REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below! -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%LIBRARY_BIN%" *.dll /E -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%PREFIX%" *.dll /E -if %ERRORLEVEL% GEQ 8 exit 1 - -REM ========== This one comes from visual studio 2017 -set "VC_VER=141" - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto :eof - ) -) - -@setlocal -call "%VS15VARSALL%" x64 - -set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%" - -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -@endlocal diff --git a/pytorch3d/packaging/vs2017/meta.yaml b/pytorch3d/packaging/vs2017/meta.yaml deleted file mode 100644 index 34f4860ba850120f59f3f499e21e2a4b429e03cc..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/vs2017/meta.yaml +++ /dev/null @@ -1,45 +0,0 @@ -{% set vcver="14.1" %} -{% set vcfeature="14" %} -{% set vsyear="2017" %} -{% set fullver="15.4.27004.2010" %} - -package: - name: vs{{ vsyear }} - version: {{ fullver }} - -build: - skip: True [not win] - script_env: - - VSDEVCMD_ARGS # [win] - -outputs: - - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }} - script: install_activate.bat - track_features: - # VS 2017 is binary-compatible with VS 2015/vc14. Tools are "v141". - strong: - - vc{{ vcfeature }} - run_exports: - - vc {{ vcver }} - about: - summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler - license: BSD 3-clause - - name: vs{{ vsyear }}_runtime - script: install_runtime.bat - - name: vc - version: {{ vcver }} - track_features: - - vc{{ vcfeature }} - requirements: - run: - - {{ pin_subpackage('vs' ~ vsyear ~ '_runtime') }} - about: - home: https://github.com/conda/conda/wiki/VC-features - license: Modified BSD License (3-clause) - license_family: BSD - summary: A meta-package to track VC features. - description: | - This metapackage is used to activate vc features without - depending on Python. - doc_url: https://github.com/conda/conda/wiki/VC-features - dev_url: https://github.com/conda/conda/wiki/VC-features diff --git a/pytorch3d/packaging/vs2019/activate.bat b/pytorch3d/packaging/vs2019/activate.bat deleted file mode 100644 index fd4f5706e339bb50442c6c3d63d484bf7628db72..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/vs2019/activate.bat +++ /dev/null @@ -1,50 +0,0 @@ -@REM Copyright (c) Meta Platforms, Inc. and affiliates. -@REM All rights reserved. -@REM -@REM This source code is licensed under the BSD-style license found in the -@REM LICENSE file in the root directory of this source tree. - -:: Set env vars that tell distutils to use the compiler that we put on path -SET DISTUTILS_USE_SDK=1 -SET MSSdk=1 - -SET "VS_VERSION=16.0" -SET "VS_MAJOR=16" -SET "VS_YEAR=2019" - -set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out" -set "MSYS2_ENV_CONV_EXCL=CL" - -:: For Python 3.5+, ensure that we link with the dynamic runtime. See -:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info -set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll" - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VSINSTALLDIR=%%i\" - goto :vswhere - ) -) - -:vswhere - -:: Shorten PATH to avoid the `input line too long` error. -SET MyPath=%PATH% - -setlocal EnableDelayedExpansion - -SET TempPath="%MyPath:;=";"%" -SET var= -FOR %%a IN (%TempPath%) DO ( - IF EXIST %%~sa ( - SET "var=!var!;%%~sa" - ) -) - -set "TempPath=!var:~1!" -endlocal & set "PATH=%TempPath%" - -:: Shorten current directory too -FOR %%A IN (.) DO CD "%%~sA" - -:: other things added by install_activate.bat at package build time diff --git a/pytorch3d/packaging/vs2019/conda_build_config.yaml b/pytorch3d/packaging/vs2019/conda_build_config.yaml deleted file mode 100644 index 358052ec012940bb56778d167bcd69302d255846..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/vs2019/conda_build_config.yaml +++ /dev/null @@ -1,24 +0,0 @@ -blas_impl: - - mkl # [x86_64] -c_compiler: - - vs2019 # [win] -cxx_compiler: - - vs2019 # [win] -python: - - 3.5 - - 3.6 -# This differs from target_platform in that it determines what subdir the compiler -# will target, not what subdir the compiler package will be itself. -# For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 -# code on win-64 miniconda. -cross_compiler_target_platform: - - win-64 # [win] -target_platform: - - win-64 # [win] -vc: - - 14 -zip_keys: - - # [win] - - vc # [win] - - c_compiler # [win] - - cxx_compiler # [win] diff --git a/pytorch3d/packaging/vs2019/install_activate.bat b/pytorch3d/packaging/vs2019/install_activate.bat deleted file mode 100644 index ee7ccdc679777691de2ac9c883e4a01118a29ec3..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/vs2019/install_activate.bat +++ /dev/null @@ -1,35 +0,0 @@ -@REM Copyright (c) Meta Platforms, Inc. and affiliates. -@REM All rights reserved. -@REM -@REM This source code is licensed under the BSD-style license found in the -@REM LICENSE file in the root directory of this source tree. - -set YEAR=2019 -set VER=16 - -mkdir "%PREFIX%\etc\conda\activate.d" -COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - -IF "%cross_compiler_target_platform%" == "win-64" ( - set "target_platform=amd64" - echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - IF "%VSDEVCMD_ARGS%" == "" ( - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) ELSE ( - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) - echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - ) else ( - set "target_platform=x86" - echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" - echo popd - ) diff --git a/pytorch3d/packaging/vs2019/install_runtime.bat b/pytorch3d/packaging/vs2019/install_runtime.bat deleted file mode 100644 index 1c842cfe350db46f09c9ac79dcd3bc22965d3315..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/vs2019/install_runtime.bat +++ /dev/null @@ -1,55 +0,0 @@ -@REM Copyright (c) Meta Platforms, Inc. and affiliates. -@REM All rights reserved. -@REM -@REM This source code is licensed under the BSD-style license found in the -@REM LICENSE file in the root directory of this source tree. - -set VC_PATH=x86 -if "%ARCH%"=="64" ( - set VC_PATH=x64 -) - -set MSC_VER=2019 - -rem :: This should always be present for VC installed with VS. Not sure about VC installed with Visual C++ Build Tools 2015 -rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO ( -rem set SP=%%A -rem ) - -rem if not "%SP%" == "%PKG_VERSION%" ( -rem echo "Version detected from registry: %SP%" -rem echo "does not match version of package being built (%PKG_VERSION%)" -rem echo "Do you have current updates for VS 2015 installed?" -rem exit 1 -rem ) - - -REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below! -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%LIBRARY_BIN%" *.dll /E -robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%PREFIX%" *.dll /E -if %ERRORLEVEL% GEQ 8 exit 1 - -REM ========== This one comes from visual studio 2019 -set "VC_VER=142" - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto :eof - ) -) - -@setlocal -call "%VS15VARSALL%" x64 - -set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%" - -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E -if %ERRORLEVEL% LSS 8 exit 0 -@endlocal diff --git a/pytorch3d/packaging/vs2019/meta.yaml b/pytorch3d/packaging/vs2019/meta.yaml deleted file mode 100644 index e3f8b4714818e1fe5754a30ceb2070ff000991fd..0000000000000000000000000000000000000000 --- a/pytorch3d/packaging/vs2019/meta.yaml +++ /dev/null @@ -1,45 +0,0 @@ -{% set vcver="14.2" %} -{% set vcfeature="14" %} -{% set vsyear="2019" %} -{% set fullver="15.4.27004.2010" %} - -package: - name: vs{{ vsyear }} - version: {{ fullver }} - -build: - skip: True [not win] - script_env: - - VSDEVCMD_ARGS # [win] - -outputs: - - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }} - script: install_activate.bat - track_features: - # VS 2019 is binary-compatible with VS 2017/vc 14.1 and 2015/vc14. Tools are "v142". - strong: - - vc{{ vcfeature }} - run_exports: - - vc {{ vcver }} - about: - summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler - license: BSD 3-clause - - name: vs{{ vsyear }}_runtime - script: install_runtime.bat - - name: vc - version: {{ vcver }} - track_features: - - vc{{ vcfeature }} - requirements: - run: - - {{ pin_subpackage('vs' ~ vsyear ~ '_runtime') }} - about: - home: https://github.com/conda/conda/wiki/VC-features - license: Modified BSD License (3-clause) - license_family: BSD - summary: A meta-package to track VC features. - description: | - This metapackage is used to activate vc features without - depending on Python. - doc_url: https://github.com/conda/conda/wiki/VC-features - dev_url: https://github.com/conda/conda/wiki/VC-features diff --git a/pytorch3d/projects/__init__.py b/pytorch3d/projects/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/projects/implicitron_trainer/README.md b/pytorch3d/projects/implicitron_trainer/README.md deleted file mode 100644 index f8f875eb00ef2bdfd6504173c5e62f0c2d06151d..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/README.md +++ /dev/null @@ -1,455 +0,0 @@ -# Introduction - -Implicitron is a PyTorch3D-based framework for new-view synthesis via modeling the neural-network based representations. - -# License - -Implicitron is distributed as part of PyTorch3D under the [BSD license](https://github.com/facebookresearch/pytorch3d/blob/main/LICENSE). -It includes code from the [NeRF](https://github.com/bmild/nerf), [SRN](http://github.com/vsitzmann/scene-representation-networks) and [IDR](http://github.com/lioryariv/idr) repos. -See [LICENSE-3RD-PARTY](https://github.com/facebookresearch/pytorch3d/blob/main/LICENSE-3RD-PARTY) for their licenses. - - -# Installation - -There are three ways to set up Implicitron, depending on the flexibility level required. -If you only want to train or evaluate models as they are implemented changing only the parameters, you can just install the package. -Implicitron also provides a flexible API that supports user-defined plug-ins; -if you want to re-implement some of the components without changing the high-level pipeline, you need to create a custom launcher script. -The most flexible option, though, is cloning PyTorch3D repo and building it from sources, which allows changing the code in arbitrary ways. -Below, we descibe all three options in more details. - - -## [Option 1] Running an executable from the package - -This option allows you to use the code as is without changing the implementations. -Only configuration can be changed (see [Configuration system](#configuration-system)). - -For this setup, install the dependencies and PyTorch3D from conda following [the guide](https://github.com/facebookresearch/pytorch3d/blob/master/INSTALL.md#1-install-with-cuda-support-from-anaconda-cloud-on-linux-only). Then, install implicitron-specific dependencies: - -```shell -pip install "hydra-core>=1.1" visdom lpips matplotlib accelerate -``` - -Runner executable is available as `pytorch3d_implicitron_runner` shell command. -See [Running](#running) section below for examples of training and evaluation commands. - - -## [Option 2] Supporting custom implementations - -To plug in custom implementations, for example, of renderer or implicit-function protocols, you need to create your own runner script and import the plug-in implementations there. -First, install PyTorch3D and Implicitron dependencies as described in the previous section. -Then, implement the custom script; copying `pytorch3d/projects/implicitron_trainer` is a good place to start. -See [Custom plugins](#custom-plugins) for more information on how to import implementations and enable them in the configs. - - -## [Option 3] Cloning PyTorch3D repo - -This is the most flexible way to set up Implicitron as it allows changing the code directly. -It allows modifying the high-level rendering pipeline or implementing yet-unsupported loss functions. -Please follow the instructions to [install PyTorch3D from a local clone](https://github.com/facebookresearch/pytorch3d/blob/main/INSTALL.md#2-install-from-a-local-clone). -Then, install Implicitron-specific dependencies: - -```shell -pip install "hydra-core>=1.1" visdom lpips matplotlib accelerate -``` - -You are still encouraged to implement custom plugins as above where possible as it makes reusing the code easier. -The executable is located in `pytorch3d/projects/implicitron_trainer`. - -> **_NOTE:_** Both `pytorch3d_implicitron_runner` and `pytorch3d_implicitron_visualizer` -executables (mentioned below) are not available when using local clone. -Instead users should use the python scripts `experiment.py` and `visualize_reconstruction.py` (see the [Running](Running) section below). - - -# Running - -This section assumes that you use the executable provided by the installed package -(Option 1 / Option 2 in [#Installation](Installation) above), -i.e. `pytorch3d_implicitron_runner` and `pytorch3d_implicitron_visualizer` are available. - -> **_NOTE:_** If the executables are not available (e.g. when using a local clone - Option 3 in [#Installation](Installation)), -users should directly use the `experiment.py` and `visualize_reconstruction.py` python scripts -which correspond to the executables as follows: -- `pytorch3d_implicitron_runner` corresponds to `/projects/implicitron_trainer/experiment.py` -- `pytorch3d_implicitron_visualizer` corresponds to `/projects/implicitron_trainer/visualize_reconstruction.py` - -For instance, in order to directly execute training with the python script, users can call: -```shell -cd /projects/ -python -m implicitron_trainer.experiment ` -``` - -If you have a custom `experiment.py` or `visualize_reconstruction.py` script -(as in the Option 2 [above](#Installation)), replace the executable with the path to your script. - -## Training - -To run training, pass a yaml config file, followed by a list of overridden arguments. -For example, to train NeRF on the first skateboard sequence from CO3D dataset, you can run: -```shell -dataset_args=data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args -pytorch3d_implicitron_runner --config-path ./configs/ --config-name repro_singleseq_nerf \ - $dataset_args.dataset_root= $dataset_args.category='skateboard' \ - $dataset_args.test_restrict_sequence_id=0 test_when_finished=True exp_dir= -``` - -Here, `--config-path` points to the config path relative to `pytorch3d_implicitron_runner` location; -`--config-name` picks the config (in this case, `repro_singleseq_nerf.yaml`); -`test_when_finished` will launch evaluation script once training is finished. -Replace `` with the location where the dataset in Implicitron format is stored -and `` with a directory where checkpoints will be dumped during training. -Other configuration parameters can be overridden in the same way. -See [Configuration system](#configuration-system) section for more information on this. - -### Visdom logging - -Note that the training script logs its progress to Visdom. Make sure to start a visdom server before the training commences: -``` -python -m visdom.server -``` -> In case a Visdom server is not started, the console will get flooded with `requests.exceptions.ConnectionError` errors signalling that a Visdom server is not available. Note that these errors will NOT interrupt the program and the training will still continue without issues. - -## Evaluation - -To run evaluation on the latest checkpoint after (or during) training, simply add `eval_only=True` to your training command. - -E.g. for executing the evaluation on the NeRF skateboard sequence, you can run: -```shell -dataset_args=data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args -pytorch3d_implicitron_runner --config-path ./configs/ --config-name repro_singleseq_nerf \ - $dataset_args.dataset_root= $dataset_args.category='skateboard' \ - $dataset_args.test_restrict_sequence_id=0 exp_dir= eval_only=True -``` -Evaluation prints the metrics to `stdout` and dumps them to a json file in `exp_dir`. - -## Visualisation - -The script produces a video of renders by a trained model assuming a pre-defined camera trajectory. -In order for it to work, `ffmpeg` needs to be installed: - -```shell -conda install ffmpeg -``` - -Here is an example of calling the script: -```shell -pytorch3d_implicitron_visualizer exp_dir= \ - visdom_show_preds=True n_eval_cameras=40 render_size="[64,64]" video_size="[256,256]" -``` - -The argument `n_eval_cameras` sets the number of renderring viewpoints sampled on a trajectory, which defaults to a circular fly-around; -`render_size` sets the size of a render passed to the model, which can be resized to `video_size` before writing. - -Rendered videos of images, masks, and depth maps will be saved to `/video`. - - -# Configuration system - -We use hydra and OmegaConf to parse the configs. -The config schema and default values are defined by the dataclasses implementing the modules. -More specifically, if a class derives from `Configurable`, its fields can be set in config yaml files or overridden in CLI. -For example, `GenericModel` has a field `render_image_width` with the default value 400. -If it is specified in the yaml config file or in CLI command, the new value will be used. - -Configurables can form hierarchies. -For example, `GenericModel` has a field `raysampler: RaySampler`, which is also Configurable. -In the config, inner parameters can be propagated using `_args` postfix, e.g. to change `raysampler.n_pts_per_ray_training` (the number of sampled points per ray), the node `raysampler_args.n_pts_per_ray_training` should be specified. - -### Top-level configuration class: `Experiment` - -The root of the hierarchy is defined by `Experiment` Configurable in `/projects/implicitron_trainer/experiment.py`. - -It has top-level fields like `seed`, which seeds the random number generator. -Additionally, it has non-leaf nodes like `model_factory_ImplicitronModelFactory_args.model_GenericModel_args`, which dispatches the config parameters to `GenericModel`. -Thus, changing the model parameters may be achieved in two ways: either by editing the config file, e.g. -```yaml -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - render_image_width: 800 - raysampler_args: - n_pts_per_ray_training: 128 -``` - -or, equivalently, by adding the following to `pytorch3d_implicitron_runner` arguments: - -```shell -model_args=model_factory_ImplicitronModelFactory_args.model_GenericModel_args -$model_args.render_image_width=800 $model_args.raysampler_args.n_pts_per_ray_training=128 -``` - -See the documentation in `pytorch3d/implicitron/tools/config.py` for more details. - -## Replaceable implementations - -Sometimes changing the model parameters does not provide enough flexibility, and you want to provide a new implementation for a building block. -The configuration system also supports it! -Abstract classes like `BaseRenderer` derive from `ReplaceableBase` instead of `Configurable`. -This means that other Configurables can refer to them using the base type, while the specific implementation is chosen in the config using `_class_type`-postfixed node. -In that case, `_args` node name has to include the implementation type. -More specifically, to change renderer settings, the config will look like this: -```yaml -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - renderer_class_type: LSTMRenderer - renderer_LSTMRenderer_args: - num_raymarch_steps: 10 - hidden_size: 16 -``` - -See the documentation in `pytorch3d/implicitron/tools/config.py` for more details on the configuration system. - -## Custom plugins - -If you have an idea for another implementation of a replaceable component, it can be plugged in without changing the core code. -For that, you need to set up Implicitron through option 2 or 3 above. -Let's say you want to implement a renderer that accumulates opacities similar to an X-ray machine. -First, create a module `x_ray_renderer.py` with a class deriving from `BaseRenderer`: - -```python -from pytorch3d.implicitron.tools.config import registry - -@registry.register -class XRayRenderer(BaseRenderer, torch.nn.Module): - n_pts_per_ray: int = 64 - - def __post_init__(self): - # custom initialization - - def forward( - self, - ray_bundle, - implicit_functions=[], - evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION, - **kwargs, - ) -> RendererOutput: - ... -``` - -Please note `@registry.register` decorator that registers the plug-in as an implementation of `Renderer`. -IMPORTANT: In order for it to run, the class (or its enclosing module) has to be imported in your launch script. -Additionally, this has to be done before parsing the root configuration class `ExperimentConfig`. -Simply add `import .x_ray_renderer` in the beginning of `experiment.py`. - -After that, you should be able to change the config with: -```yaml -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - renderer_class_type: XRayRenderer - renderer_XRayRenderer_args: - n_pts_per_ray: 128 -``` - -to replace the implementation and potentially override the parameters. - -# Code and config structure - -The main object for this trainer loop is `Experiment`. It has four top-level replaceable components. - -* `data_source`: This is a `DataSourceBase` which defaults to `ImplicitronDataSource`. -It constructs the data sets and dataloaders. -* `model_factory`: This is a `ModelFactoryBase` which defaults to `ImplicitronModelFactory`. -It constructs the model, which is usually an instance of `OverfitModel` (for NeRF-style training with overfitting to one scene) or `GenericModel` (that is able to generalize to multiple scenes by NeRFormer-style conditioning on other scene views), and can load its weights from a checkpoint. -* `optimizer_factory`: This is an `OptimizerFactoryBase` which defaults to `ImplicitronOptimizerFactory`. -It constructs the optimizer and can load its weights from a checkpoint. -* `training_loop`: This is a `TrainingLoopBase` which defaults to `ImplicitronTrainingLoop` and defines the main training loop. - -As per above, the config structure is parsed automatically from the module hierarchy. -In particular, for ImplicitronModelFactory with generic model, model parameters are contained in the `model_factory_ImplicitronModelFactory_args.model_GenericModel_args` node, and dataset parameters in `data_source_ImplicitronDataSource_args` node. - -Here is the class structure of GenericModel (single-line edges show aggregation, while double lines show available implementations): -``` -model_GenericModel_args: GenericModel -β””-- global_encoder_*_args: GlobalEncoderBase - β•˜== SequenceAutodecoder - β””-- autodecoder_args: Autodecoder - β•˜== HarmonicTimeEncoder -β””-- raysampler_*_args: RaySampler - β•˜== AdaptiveRaysampler - β•˜== NearFarRaysampler -β””-- renderer_*_args: BaseRenderer - β•˜== MultiPassEmissionAbsorptionRenderer - β•˜== LSTMRenderer - β•˜== SignedDistanceFunctionRenderer - β””-- ray_tracer_args: RayTracing - β””-- ray_normal_coloring_network_args: RayNormalColoringNetwork -β””-- implicit_function_*_args: ImplicitFunctionBase - β•˜== NeuralRadianceFieldImplicitFunction - β•˜== SRNImplicitFunction - β””-- raymarch_function_args: SRNRaymarchFunction - β””-- pixel_generator_args: SRNPixelGenerator - β•˜== SRNHyperNetImplicitFunction - β””-- hypernet_args: SRNRaymarchHyperNet - β””-- pixel_generator_args: SRNPixelGenerator - β•˜== IdrFeatureField -β””-- image_feature_extractor_*_args: FeatureExtractorBase - β•˜== ResNetFeatureExtractor -β””-- view_pooler_args: ViewPooler - β””-- view_sampler_args: ViewSampler - β””-- feature_aggregator_*_args: FeatureAggregatorBase - β•˜== IdentityFeatureAggregator - β•˜== AngleWeightedIdentityFeatureAggregator - β•˜== AngleWeightedReductionFeatureAggregator - β•˜== ReductionFeatureAggregator -``` - -Here is the class structure of OverfitModel: - -``` -model_OverfitModel_args: OverfitModel -β””-- raysampler_*_args: RaySampler - β•˜== AdaptiveRaysampler - β•˜== NearFarRaysampler -β””-- renderer_*_args: BaseRenderer - β•˜== MultiPassEmissionAbsorptionRenderer - β•˜== LSTMRenderer - β•˜== SignedDistanceFunctionRenderer - β””-- ray_tracer_args: RayTracing - β””-- ray_normal_coloring_network_args: RayNormalColoringNetwork -β””-- implicit_function_*_args: ImplicitFunctionBase - β•˜== NeuralRadianceFieldImplicitFunction - β•˜== SRNImplicitFunction - β””-- raymarch_function_args: SRNRaymarchFunction - β””-- pixel_generator_args: SRNPixelGenerator - β•˜== SRNHyperNetImplicitFunction - β””-- hypernet_args: SRNRaymarchHyperNet - β””-- pixel_generator_args: SRNPixelGenerator - β•˜== IdrFeatureField -β””-- coarse_implicit_function_*_args: ImplicitFunctionBase - β•˜== NeuralRadianceFieldImplicitFunction - β•˜== SRNImplicitFunction - β””-- raymarch_function_args: SRNRaymarchFunction - β””-- pixel_generator_args: SRNPixelGenerator - β•˜== SRNHyperNetImplicitFunction - β””-- hypernet_args: SRNRaymarchHyperNet - β””-- pixel_generator_args: SRNPixelGenerator - β•˜== IdrFeatureField -``` - -OverfitModel has been introduced to create a simple class to disantagle Nerfs which the overfit pattern -from the GenericModel. - - -Please look at the annotations of the respective classes or functions for the lists of hyperparameters. -`tests/experiment.yaml` shows every possible option if you have no user-defined classes. - - -# Implementations of existing methods - -We provide configuration files that implement several existing works. - -The configuration files live in `pytorch3d/projects/implicitron_trainer/configs`. - - -## NeRF - -The following config file corresponds to training of a vanilla NeRF on Blender Synthetic dataset -(see https://arxiv.org/abs/2003.08934 for details of the method): - -`./configs/repro_singleseq_nerf_blender.yaml` - - -### Downloading Blender-Synthetic -Training requires the Blender Synthetic dataset. -To download the dataset, visit the [gdrive bucket](https://drive.google.com/file/d/18JxhpWD-4ZmuFKLzKlAw-w5PpzZxXOcG/view?usp=share_link) -and click Download. -Then unpack the downloaded .zip file to a folder which we call ``. - - -### Launching NeRF training -In order to train NeRF on the "drums" scene, execute the following command: -```shell -export BLENDER_DATASET_ROOT="" \ -export BLENDER_SINGLESEQ_CLASS="drums" \ -pytorch3d_implicitron_runner --config-path ./configs/ --config-name repro_singleseq_nerf_blender -``` - -Note that the training scene is selected by setting the environment variable `BLENDER_SINGLESEQ_CLASS` -appropriately (one of `"chair"`, `"drums"`, `"ficus"`, `"hotdog"`, `"lego"`, `"materials"`, `"mic"`, `"ship"`). - -By default, the training outputs will be stored to `"./data/nerf_blender_repro/$BLENDER_SINGLESEQ_CLASS/"` - - -### Visualizing trained NeRF -```shell -pytorch3d_implicitron_visualizer exp_dir= \ - visdom_show_preds=True n_eval_cameras=40 render_size="[64,64]" video_size="[256,256]" -``` -where `` corresponds to the directory with the training outputs (defaults to `"./data/nerf_blender_repro/$BLENDER_SINGLESEQ_CLASS/"`). - -The script will output a rendered video of the learned radiance field to `"./data/nerf_blender_repro/$BLENDER_SINGLESEQ_CLASS/"` (requires `ffmpeg`). - -> **_NOTE:_** Recall that, if `pytorch3d_implicitron_runner`/`pytorch3d_implicitron_visualizer` are not available, replace the calls -with `cd /projects/; python -m implicitron_trainer.[experiment|visualize_reconstruction]` - - -## CO3D experiments - -Common Objects in 3D (CO3D) is a large-scale dataset of videos of rigid objects grouped into 50 common categories. -Implicitron provides implementations and config files to reproduce the results from [the paper](https://arxiv.org/abs/2109.00512). -Please follow [the link](https://github.com/facebookresearch/co3d#automatic-batch-download) for the instructions to download the dataset. -In training and evaluation scripts, use the download location as ``. -It is also possible to define environment variable `CO3D_DATASET_ROOT` instead of specifying it. -To reproduce the experiments from the paper, use the following configs. - -For single-sequence experiments: - -| Method | config file | -|-----------------|-------------------------------------| -| NeRF | repro_singleseq_nerf.yaml | -| NeRF + WCE | repro_singleseq_nerf_wce.yaml | -| NerFormer | repro_singleseq_nerformer.yaml | -| IDR | repro_singleseq_idr.yaml | -| SRN | repro_singleseq_srn_noharm.yaml | -| SRN + Ξ³ | repro_singleseq_srn.yaml | -| SRN + WCE | repro_singleseq_srn_wce_noharm.yaml | -| SRN + WCE + Ξ³ | repro_singleseq_srn_wce_noharm.yaml | - -For multi-sequence autodecoder experiments (without generalization to new sequences): - -| Method | config file | -|-----------------|--------------------------------------------| -| NeRF + AD | repro_multiseq_nerf_ad.yaml | -| SRN + AD | repro_multiseq_srn_ad_hypernet_noharm.yaml | -| SRN + Ξ³ + AD | repro_multiseq_srn_ad_hypernet.yaml | - -For multi-sequence experiments (with generalization to new sequences): - -| Method | config file | -|-----------------|--------------------------------------| -| NeRF + WCE | repro_multiseq_nerf_wce.yaml | -| NerFormer | repro_multiseq_nerformer.yaml | -| SRN + WCE | repro_multiseq_srn_wce_noharm.yaml | -| SRN + WCE + Ξ³ | repro_multiseq_srn_wce.yaml | - - -## CO3Dv2 experiments - -The following config files implement training on the second version of CO3D, `CO3Dv2`. - -In order to launch trainings, set the `CO3DV2_DATASET_ROOT` environment variable -to the root folder of the dataset (note that the name of the env. variable differs from the CO3Dv1 version). - -Single-sequence experiments: - -| Method | config file | -|-----------------|-------------------------------------| -| NeRF | repro_singleseq_v2_nerf.yaml | -| NerFormer | repro_singleseq_v2_nerformer.yaml | -| IDR | repro_singleseq_v2_idr.yaml | -| SRN | repro_singleseq_v2_srn_noharm.yaml | - -Multi-sequence autodecoder experiments (without generalization to new sequences): - -| Method | config file | -|-----------------|--------------------------------------------| -| NeRF + AD | repro_multiseq_v2_nerf_ad.yaml | -| SRN + Ξ³ + AD | repro_multiseq_v2_srn_ad_hypernet.yaml | - -Multi-sequence experiments (with generalization to new sequences): - -| Method | config file | -|-----------------|----------------------------------------| -| NeRF + WCE | repro_multiseq_v2_nerf_wce.yaml | -| NerFormer | repro_multiseq_v2_nerformer.yaml | -| SRN + WCE | repro_multiseq_v2_srn_wce_noharm.yaml | -| SRN + WCE + Ξ³ | repro_multiseq_v2_srn_wce.yaml | diff --git a/pytorch3d/projects/implicitron_trainer/__init__.py b/pytorch3d/projects/implicitron_trainer/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/projects/implicitron_trainer/configs/overfit_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/overfit_base.yaml deleted file mode 100644 index d5cc0cccbad843c750a4653c3b72fbe90a57547e..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/overfit_base.yaml +++ /dev/null @@ -1,79 +0,0 @@ -defaults: -- default_config -- _self_ -exp_dir: ./data/exps/overfit_base/ -training_loop_ImplicitronTrainingLoop_args: - visdom_port: 8097 - visualize_interval: 0 - max_epochs: 1000 -data_source_ImplicitronDataSource_args: - data_loader_map_provider_class_type: SequenceDataLoaderMapProvider - dataset_map_provider_class_type: JsonIndexDatasetMapProvider - data_loader_map_provider_SequenceDataLoaderMapProvider_args: - dataset_length_train: 1000 - dataset_length_val: 1 - num_workers: 8 - dataset_map_provider_JsonIndexDatasetMapProvider_args: - dataset_root: ${oc.env:CO3D_DATASET_ROOT} - n_frames_per_sequence: -1 - test_on_train: true - test_restrict_sequence_id: 0 - dataset_JsonIndexDataset_args: - load_point_clouds: false - mask_depths: false - mask_images: false -model_factory_ImplicitronModelFactory_args: - model_class_type: "OverfitModel" - model_OverfitModel_args: - loss_weights: - loss_mask_bce: 1.0 - loss_prev_stage_mask_bce: 1.0 - loss_autodecoder_norm: 0.01 - loss_rgb_mse: 1.0 - loss_prev_stage_rgb_mse: 1.0 - output_rasterized_mc: false - chunk_size_grid: 102400 - render_image_height: 400 - render_image_width: 400 - share_implicit_function_across_passes: false - implicit_function_class_type: "NeuralRadianceFieldImplicitFunction" - implicit_function_NeuralRadianceFieldImplicitFunction_args: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_xyz: 256 - n_hidden_neurons_dir: 128 - n_layers_xyz: 8 - append_xyz: - - 5 - coarse_implicit_function_class_type: "NeuralRadianceFieldImplicitFunction" - coarse_implicit_function_NeuralRadianceFieldImplicitFunction_args: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_xyz: 256 - n_hidden_neurons_dir: 128 - n_layers_xyz: 8 - append_xyz: - - 5 - raysampler_AdaptiveRaySampler_args: - n_rays_per_image_sampled_from_mask: 1024 - scene_extent: 8.0 - n_pts_per_ray_training: 64 - n_pts_per_ray_evaluation: 64 - stratified_point_sampling_training: true - stratified_point_sampling_evaluation: false - renderer_MultiPassEmissionAbsorptionRenderer_args: - n_pts_per_ray_fine_training: 64 - n_pts_per_ray_fine_evaluation: 64 - append_coarse_samples_to_fine: true - density_noise_std_train: 1.0 -optimizer_factory_ImplicitronOptimizerFactory_args: - breed: Adam - weight_decay: 0.0 - lr_policy: MultiStepLR - multistep_lr_milestones: [] - lr: 0.0005 - gamma: 0.1 - momentum: 0.9 - betas: - - 0.9 - - 0.999 diff --git a/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_base.yaml deleted file mode 100644 index 0349fd27a1ab25d7155f1d05c6258545acd6a5f7..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_base.yaml +++ /dev/null @@ -1,42 +0,0 @@ -defaults: -- overfit_base -- _self_ -data_source_ImplicitronDataSource_args: - data_loader_map_provider_SequenceDataLoaderMapProvider_args: - batch_size: 1 - dataset_length_train: 1000 - dataset_length_val: 1 - num_workers: 8 - dataset_map_provider_JsonIndexDatasetMapProvider_args: - assert_single_seq: true - n_frames_per_sequence: -1 - test_restrict_sequence_id: 0 - test_on_train: false -model_factory_ImplicitronModelFactory_args: - model_class_type: "OverfitModel" - model_OverfitModel_args: - render_image_height: 800 - render_image_width: 800 - log_vars: - - loss_rgb_psnr_fg - - loss_rgb_psnr - - loss_eikonal - - loss_prev_stage_rgb_psnr - - loss_mask_bce - - loss_prev_stage_mask_bce - - loss_rgb_mse - - loss_prev_stage_rgb_mse - - loss_depth_abs - - loss_depth_abs_fg - - loss_kl - - loss_mask_neg_iou - - objective - - epoch - - sec/it -optimizer_factory_ImplicitronOptimizerFactory_args: - lr: 0.0005 - multistep_lr_milestones: - - 200 - - 300 -training_loop_ImplicitronTrainingLoop_args: - max_epochs: 400 diff --git a/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_nerf_blender.yaml b/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_nerf_blender.yaml deleted file mode 100644 index c61d759f382beb27da12d8e9655599f367161fd9..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/overfit_singleseq_nerf_blender.yaml +++ /dev/null @@ -1,56 +0,0 @@ -defaults: -- overfit_singleseq_base -- _self_ -exp_dir: "./data/overfit_nerf_blender_repro/${oc.env:BLENDER_SINGLESEQ_CLASS}" -data_source_ImplicitronDataSource_args: - data_loader_map_provider_SequenceDataLoaderMapProvider_args: - dataset_length_train: 100 - dataset_map_provider_class_type: BlenderDatasetMapProvider - dataset_map_provider_BlenderDatasetMapProvider_args: - base_dir: ${oc.env:BLENDER_DATASET_ROOT}/${oc.env:BLENDER_SINGLESEQ_CLASS} - n_known_frames_for_test: null - object_name: ${oc.env:BLENDER_SINGLESEQ_CLASS} - path_manager_factory_class_type: PathManagerFactory - path_manager_factory_PathManagerFactory_args: - silence_logs: true - -model_factory_ImplicitronModelFactory_args: - model_class_type: "OverfitModel" - model_OverfitModel_args: - mask_images: false - raysampler_class_type: AdaptiveRaySampler - raysampler_AdaptiveRaySampler_args: - n_pts_per_ray_training: 64 - n_pts_per_ray_evaluation: 64 - n_rays_per_image_sampled_from_mask: 4096 - stratified_point_sampling_training: true - stratified_point_sampling_evaluation: false - scene_extent: 2.0 - scene_center: - - 0.0 - - 0.0 - - 0.0 - renderer_MultiPassEmissionAbsorptionRenderer_args: - density_noise_std_train: 0.0 - n_pts_per_ray_fine_training: 128 - n_pts_per_ray_fine_evaluation: 128 - raymarcher_EmissionAbsorptionRaymarcher_args: - blend_output: false - loss_weights: - loss_rgb_mse: 1.0 - loss_prev_stage_rgb_mse: 1.0 - loss_mask_bce: 0.0 - loss_prev_stage_mask_bce: 0.0 - loss_autodecoder_norm: 0.00 - -optimizer_factory_ImplicitronOptimizerFactory_args: - exponential_lr_step_size: 3001 - lr_policy: LinearExponential - linear_exponential_lr_milestone: 200 - -training_loop_ImplicitronTrainingLoop_args: - max_epochs: 6000 - metric_print_interval: 10 - store_checkpoints_purge: 3 - test_when_finished: true - validation_interval: 100 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_base.yaml deleted file mode 100644 index 9d6af2608fe23be8924a354e3cf5f20d690bdac9..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_base.yaml +++ /dev/null @@ -1,80 +0,0 @@ -defaults: -- default_config -- _self_ -exp_dir: ./data/exps/base/ -training_loop_ImplicitronTrainingLoop_args: - visdom_port: 8097 - visualize_interval: 0 - max_epochs: 1000 -data_source_ImplicitronDataSource_args: - data_loader_map_provider_class_type: SequenceDataLoaderMapProvider - dataset_map_provider_class_type: JsonIndexDatasetMapProvider - data_loader_map_provider_SequenceDataLoaderMapProvider_args: - dataset_length_train: 1000 - dataset_length_val: 1 - num_workers: 8 - dataset_map_provider_JsonIndexDatasetMapProvider_args: - dataset_root: ${oc.env:CO3D_DATASET_ROOT} - n_frames_per_sequence: -1 - test_on_train: true - test_restrict_sequence_id: 0 - dataset_JsonIndexDataset_args: - load_point_clouds: false - mask_depths: false - mask_images: false -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - loss_weights: - loss_mask_bce: 1.0 - loss_prev_stage_mask_bce: 1.0 - loss_autodecoder_norm: 0.01 - loss_rgb_mse: 1.0 - loss_prev_stage_rgb_mse: 1.0 - output_rasterized_mc: false - chunk_size_grid: 102400 - render_image_height: 400 - render_image_width: 400 - num_passes: 2 - implicit_function_NeuralRadianceFieldImplicitFunction_args: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_xyz: 256 - n_hidden_neurons_dir: 128 - n_layers_xyz: 8 - append_xyz: - - 5 - raysampler_AdaptiveRaySampler_args: - n_rays_per_image_sampled_from_mask: 1024 - scene_extent: 8.0 - n_pts_per_ray_training: 64 - n_pts_per_ray_evaluation: 64 - stratified_point_sampling_training: true - stratified_point_sampling_evaluation: false - renderer_MultiPassEmissionAbsorptionRenderer_args: - n_pts_per_ray_fine_training: 64 - n_pts_per_ray_fine_evaluation: 64 - append_coarse_samples_to_fine: true - density_noise_std_train: 1.0 - view_pooler_args: - view_sampler_args: - masked_sampling: false - image_feature_extractor_ResNetFeatureExtractor_args: - stages: - - 1 - - 2 - - 3 - - 4 - proj_dim: 16 - image_rescale: 0.32 - first_max_pool: false -optimizer_factory_ImplicitronOptimizerFactory_args: - breed: Adam - weight_decay: 0.0 - lr_policy: MultiStepLR - multistep_lr_milestones: [] - lr: 0.0005 - gamma: 0.1 - momentum: 0.9 - betas: - - 0.9 - - 0.999 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_normed.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_normed.yaml deleted file mode 100644 index b2154c8bfa130d90073f70b7d54ac540a9e557ef..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_normed.yaml +++ /dev/null @@ -1,18 +0,0 @@ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - image_feature_extractor_class_type: ResNetFeatureExtractor - image_feature_extractor_ResNetFeatureExtractor_args: - add_images: true - add_masks: true - first_max_pool: true - image_rescale: 0.375 - l2_norm: true - name: resnet34 - normalize_image: true - pretrained: true - stages: - - 1 - - 2 - - 3 - - 4 - proj_dim: 32 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_transformer.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_transformer.yaml deleted file mode 100644 index 8d24495bbb15ad8d8770dadf5147ec49d2706b08..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_transformer.yaml +++ /dev/null @@ -1,18 +0,0 @@ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - image_feature_extractor_class_type: ResNetFeatureExtractor - image_feature_extractor_ResNetFeatureExtractor_args: - add_images: true - add_masks: true - first_max_pool: false - image_rescale: 0.375 - l2_norm: true - name: resnet34 - normalize_image: true - pretrained: true - stages: - - 1 - - 2 - - 3 - - 4 - proj_dim: 16 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_unnormed.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_unnormed.yaml deleted file mode 100644 index 2d4eb3f861089e96bf63b9b0bced5bed7943f134..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_feat_extractor_unnormed.yaml +++ /dev/null @@ -1,19 +0,0 @@ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - image_feature_extractor_class_type: ResNetFeatureExtractor - image_feature_extractor_ResNetFeatureExtractor_args: - stages: - - 1 - - 2 - - 3 - first_max_pool: false - proj_dim: -1 - l2_norm: false - image_rescale: 0.375 - name: resnet34 - normalize_image: true - pretrained: true - view_pooler_args: - feature_aggregator_AngleWeightedReductionFeatureAggregator_args: - reduction_functions: - - AVG diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_base.yaml deleted file mode 100644 index 578fe1a2ccfef253ed268fc84eaf202a1c88c91c..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_base.yaml +++ /dev/null @@ -1,38 +0,0 @@ -defaults: -- repro_base.yaml -- _self_ -data_source_ImplicitronDataSource_args: - data_loader_map_provider_SequenceDataLoaderMapProvider_args: - batch_size: 10 - dataset_length_train: 1000 - dataset_length_val: 1 - num_workers: 8 - train_conditioning_type: SAME - val_conditioning_type: SAME - test_conditioning_type: SAME - images_per_seq_options: - - 2 - - 3 - - 4 - - 5 - - 6 - - 7 - - 8 - - 9 - - 10 - dataset_map_provider_JsonIndexDatasetMapProvider_args: - assert_single_seq: false - task_str: multisequence - n_frames_per_sequence: -1 - test_on_train: true - test_restrict_sequence_id: 0 -optimizer_factory_ImplicitronOptimizerFactory_args: - multistep_lr_milestones: - - 1000 -training_loop_ImplicitronTrainingLoop_args: - max_epochs: 3000 - evaluator_ImplicitronEvaluator_args: - camera_difficulty_bin_breaks: - - 0.666667 - - 0.833334 - is_multisequence: true diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_co3dv2_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_co3dv2_base.yaml deleted file mode 100644 index 9eb9bd9030a5fbc0b48006416137762d89ac2757..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_co3dv2_base.yaml +++ /dev/null @@ -1,8 +0,0 @@ -data_source_ImplicitronDataSource_args: - dataset_map_provider_class_type: JsonIndexDatasetMapProviderV2 - dataset_map_provider_JsonIndexDatasetMapProviderV2_args: - category: teddybear - subset_name: fewview_dev -training_loop_ImplicitronTrainingLoop_args: - evaluator_ImplicitronEvaluator_args: - is_multisequence: true diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_idr_ad.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_idr_ad.yaml deleted file mode 100644 index f6bb1fe40ca47fb9456b74932e380b43a97e8d43..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_idr_ad.yaml +++ /dev/null @@ -1,65 +0,0 @@ -defaults: -- repro_multiseq_base.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - loss_weights: - loss_mask_bce: 100.0 - loss_kl: 0.0 - loss_rgb_mse: 1.0 - loss_eikonal: 0.1 - chunk_size_grid: 65536 - num_passes: 1 - output_rasterized_mc: true - sampling_mode_training: mask_sample - global_encoder_class_type: SequenceAutodecoder - global_encoder_SequenceAutodecoder_args: - autodecoder_args: - n_instances: 20000 - init_scale: 1.0 - encoding_dim: 256 - implicit_function_IdrFeatureField_args: - n_harmonic_functions_xyz: 6 - bias: 0.6 - d_in: 3 - d_out: 1 - dims: - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - geometric_init: true - pooled_feature_dim: 0 - skip_in: - - 6 - weight_norm: true - renderer_SignedDistanceFunctionRenderer_args: - ray_tracer_args: - line_search_step: 0.5 - line_step_iters: 3 - n_secant_steps: 8 - n_steps: 100 - sdf_threshold: 5.0e-05 - ray_normal_coloring_network_args: - d_in: 9 - d_out: 3 - dims: - - 512 - - 512 - - 512 - - 512 - mode: idr - n_harmonic_functions_dir: 4 - pooled_feature_dim: 0 - weight_norm: true - raysampler_AdaptiveRaySampler_args: - n_rays_per_image_sampled_from_mask: 1024 - n_pts_per_ray_training: 0 - n_pts_per_ray_evaluation: 0 - scene_extent: 8.0 - renderer_class_type: SignedDistanceFunctionRenderer - implicit_function_class_type: IdrFeatureField diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_ad.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_ad.yaml deleted file mode 100644 index aa4291d3503cd731255a364db19f82b6f707f729..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_ad.yaml +++ /dev/null @@ -1,12 +0,0 @@ -defaults: -- repro_multiseq_base.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - chunk_size_grid: 16000 - view_pooler_enabled: false - global_encoder_class_type: SequenceAutodecoder - global_encoder_SequenceAutodecoder_args: - autodecoder_args: - n_instances: 20000 - encoding_dim: 256 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_wce.yaml deleted file mode 100644 index fa366d46ac4a2f09a437cf2632e5735aee34d5fa..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerf_wce.yaml +++ /dev/null @@ -1,12 +0,0 @@ -defaults: -- repro_multiseq_base.yaml -- repro_feat_extractor_unnormed.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - chunk_size_grid: 16000 - view_pooler_enabled: true - raysampler_AdaptiveRaySampler_args: - n_rays_per_image_sampled_from_mask: 850 -training_loop_ImplicitronTrainingLoop_args: - clip_grad: 1.0 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer.yaml deleted file mode 100644 index 9aa9f4c5fd0839bc4e3c6fc74f3db3190d559fb5..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer.yaml +++ /dev/null @@ -1,18 +0,0 @@ -defaults: -- repro_multiseq_base.yaml -- repro_feat_extractor_transformer.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - chunk_size_grid: 16000 - raysampler_AdaptiveRaySampler_args: - n_rays_per_image_sampled_from_mask: 800 - n_pts_per_ray_training: 32 - n_pts_per_ray_evaluation: 32 - renderer_MultiPassEmissionAbsorptionRenderer_args: - n_pts_per_ray_fine_training: 16 - n_pts_per_ray_fine_evaluation: 16 - implicit_function_class_type: NeRFormerImplicitFunction - view_pooler_enabled: true - view_pooler_args: - feature_aggregator_class_type: IdentityFeatureAggregator diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer_angle_w.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer_angle_w.yaml deleted file mode 100644 index 9c9a30fe79dd25afded6cffb80c29610a45803c0..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_nerformer_angle_w.yaml +++ /dev/null @@ -1,7 +0,0 @@ -defaults: -- repro_multiseq_nerformer.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - view_pooler_args: - feature_aggregator_class_type: AngleWeightedIdentityFeatureAggregator diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet.yaml deleted file mode 100644 index 1b4a2ef2d17d5a7a2d868b1603c996e2fb3ad7b2..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet.yaml +++ /dev/null @@ -1,35 +0,0 @@ -defaults: -- repro_multiseq_base.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - chunk_size_grid: 16000 - view_pooler_enabled: false - n_train_target_views: -1 - num_passes: 1 - loss_weights: - loss_rgb_mse: 200.0 - loss_prev_stage_rgb_mse: 0.0 - loss_mask_bce: 1.0 - loss_prev_stage_mask_bce: 0.0 - loss_autodecoder_norm: 0.001 - depth_neg_penalty: 10000.0 - global_encoder_class_type: SequenceAutodecoder - global_encoder_SequenceAutodecoder_args: - autodecoder_args: - encoding_dim: 256 - n_instances: 20000 - raysampler_class_type: NearFarRaySampler - raysampler_NearFarRaySampler_args: - n_rays_per_image_sampled_from_mask: 2048 - min_depth: 0.05 - max_depth: 0.05 - n_pts_per_ray_training: 1 - n_pts_per_ray_evaluation: 1 - stratified_point_sampling_training: false - stratified_point_sampling_evaluation: false - renderer_class_type: LSTMRenderer - implicit_function_class_type: SRNHyperNetImplicitFunction -optimizer_factory_ImplicitronOptimizerFactory_args: - breed: Adam - lr: 5.0e-05 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet_noharm.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet_noharm.yaml deleted file mode 100644 index 9f29cbbe82ede4f4610949849433a67f91aff07f..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet_noharm.yaml +++ /dev/null @@ -1,11 +0,0 @@ -defaults: -- repro_multiseq_srn_ad_hypernet.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - num_passes: 1 - implicit_function_SRNHyperNetImplicitFunction_args: - pixel_generator_args: - n_harmonic_functions: 0 - hypernet_args: - n_harmonic_functions: 0 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce.yaml deleted file mode 100644 index 4a72c32621d063276a2b765d34e1edd707c87eac..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce.yaml +++ /dev/null @@ -1,31 +0,0 @@ -defaults: -- repro_multiseq_base.yaml -- repro_feat_extractor_normed.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - chunk_size_grid: 32000 - num_passes: 1 - n_train_target_views: -1 - loss_weights: - loss_rgb_mse: 200.0 - loss_prev_stage_rgb_mse: 0.0 - loss_mask_bce: 1.0 - loss_prev_stage_mask_bce: 0.0 - loss_autodecoder_norm: 0.0 - depth_neg_penalty: 10000.0 - raysampler_class_type: NearFarRaySampler - raysampler_NearFarRaySampler_args: - n_rays_per_image_sampled_from_mask: 2048 - min_depth: 0.05 - max_depth: 0.05 - n_pts_per_ray_training: 1 - n_pts_per_ray_evaluation: 1 - stratified_point_sampling_training: false - stratified_point_sampling_evaluation: false - renderer_class_type: LSTMRenderer - implicit_function_class_type: SRNImplicitFunction - view_pooler_enabled: true -optimizer_factory_ImplicitronOptimizerFactory_args: - breed: Adam - lr: 5.0e-05 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce_noharm.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce_noharm.yaml deleted file mode 100644 index d2ea11e367e6b169895546286c80c939724a4754..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_srn_wce_noharm.yaml +++ /dev/null @@ -1,11 +0,0 @@ -defaults: -- repro_multiseq_srn_wce.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - num_passes: 1 - implicit_function_SRNImplicitFunction_args: - pixel_generator_args: - n_harmonic_functions: 0 - raymarch_function_args: - n_harmonic_functions: 0 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerf_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerf_wce.yaml deleted file mode 100644 index 0f3ac0553a9a05574626c1228873cd8ac370ec5a..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerf_wce.yaml +++ /dev/null @@ -1,4 +0,0 @@ -defaults: -- repro_multiseq_nerf_wce.yaml -- repro_multiseq_co3dv2_base.yaml -- _self_ diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerformer.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerformer.yaml deleted file mode 100644 index ee7ef332310d444b377798faaf7b67e8575d5b0f..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_nerformer.yaml +++ /dev/null @@ -1,4 +0,0 @@ -defaults: -- repro_multiseq_nerformer.yaml -- repro_multiseq_co3dv2_base.yaml -- _self_ diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_ad_hypernet.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_ad_hypernet.yaml deleted file mode 100644 index bdb544f3217e329a8940b117ceb2f47cdc501692..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_ad_hypernet.yaml +++ /dev/null @@ -1,4 +0,0 @@ -defaults: -- repro_multiseq_srn_ad_hypernet.yaml -- repro_multiseq_co3dv2_base.yaml -- _self_ diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_wce.yaml deleted file mode 100644 index b8ae36746035bc35c93867fc01399c61476e14a6..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_multiseq_v2_srn_wce.yaml +++ /dev/null @@ -1,4 +0,0 @@ -defaults: -- repro_multiseq_srn_wce.yaml -- repro_multiseq_co3dv2_base.yaml -- _self_ diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_base.yaml deleted file mode 100644 index 572fc7d5e71323f61c9b099c56b7f7aeb900b614..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_base.yaml +++ /dev/null @@ -1,41 +0,0 @@ -defaults: -- repro_base -- _self_ -data_source_ImplicitronDataSource_args: - data_loader_map_provider_SequenceDataLoaderMapProvider_args: - batch_size: 1 - dataset_length_train: 1000 - dataset_length_val: 1 - num_workers: 8 - dataset_map_provider_JsonIndexDatasetMapProvider_args: - assert_single_seq: true - n_frames_per_sequence: -1 - test_restrict_sequence_id: 0 - test_on_train: false -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - render_image_height: 800 - render_image_width: 800 - log_vars: - - loss_rgb_psnr_fg - - loss_rgb_psnr - - loss_eikonal - - loss_prev_stage_rgb_psnr - - loss_mask_bce - - loss_prev_stage_mask_bce - - loss_rgb_mse - - loss_prev_stage_rgb_mse - - loss_depth_abs - - loss_depth_abs_fg - - loss_kl - - loss_mask_neg_iou - - objective - - epoch - - sec/it -optimizer_factory_ImplicitronOptimizerFactory_args: - lr: 0.0005 - multistep_lr_milestones: - - 200 - - 300 -training_loop_ImplicitronTrainingLoop_args: - max_epochs: 400 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_co3dv2_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_co3dv2_base.yaml deleted file mode 100644 index 54e1e2a42037013e0a55f8ad13ca11973d68d6b7..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_co3dv2_base.yaml +++ /dev/null @@ -1,8 +0,0 @@ -data_source_ImplicitronDataSource_args: - dataset_map_provider_class_type: JsonIndexDatasetMapProviderV2 - dataset_map_provider_JsonIndexDatasetMapProviderV2_args: - category: teddybear - subset_name: manyview_dev_0 -training_loop_ImplicitronTrainingLoop_args: - evaluator_ImplicitronEvaluator_args: - is_multisequence: false diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_idr.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_idr.yaml deleted file mode 100644 index 7224b9d5d9cecd791262a50dde5432cac0d7ed88..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_idr.yaml +++ /dev/null @@ -1,57 +0,0 @@ -defaults: -- repro_singleseq_base -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - loss_weights: - loss_mask_bce: 100.0 - loss_kl: 0.0 - loss_rgb_mse: 1.0 - loss_eikonal: 0.1 - chunk_size_grid: 65536 - num_passes: 1 - view_pooler_enabled: false - implicit_function_IdrFeatureField_args: - n_harmonic_functions_xyz: 6 - bias: 0.6 - d_in: 3 - d_out: 1 - dims: - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - geometric_init: true - pooled_feature_dim: 0 - skip_in: - - 6 - weight_norm: true - renderer_SignedDistanceFunctionRenderer_args: - ray_tracer_args: - line_search_step: 0.5 - line_step_iters: 3 - n_secant_steps: 8 - n_steps: 100 - sdf_threshold: 5.0e-05 - ray_normal_coloring_network_args: - d_in: 9 - d_out: 3 - dims: - - 512 - - 512 - - 512 - - 512 - mode: idr - n_harmonic_functions_dir: 4 - pooled_feature_dim: 0 - weight_norm: true - raysampler_AdaptiveRaySampler_args: - n_rays_per_image_sampled_from_mask: 1024 - n_pts_per_ray_training: 0 - n_pts_per_ray_evaluation: 0 - renderer_class_type: SignedDistanceFunctionRenderer - implicit_function_class_type: IdrFeatureField diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf.yaml deleted file mode 100644 index fd85af5e7af23f5acd2abec6dae3255e7087cd7c..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf.yaml +++ /dev/null @@ -1,3 +0,0 @@ -defaults: -- repro_singleseq_base -- _self_ diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_blender.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_blender.yaml deleted file mode 100644 index 2a92a92c1f20ea48a2b655211655dafa4e894c23..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_blender.yaml +++ /dev/null @@ -1,55 +0,0 @@ -defaults: -- repro_singleseq_base -- _self_ -exp_dir: "./data/nerf_blender_repro/${oc.env:BLENDER_SINGLESEQ_CLASS}" -data_source_ImplicitronDataSource_args: - data_loader_map_provider_SequenceDataLoaderMapProvider_args: - dataset_length_train: 100 - dataset_map_provider_class_type: BlenderDatasetMapProvider - dataset_map_provider_BlenderDatasetMapProvider_args: - base_dir: ${oc.env:BLENDER_DATASET_ROOT}/${oc.env:BLENDER_SINGLESEQ_CLASS} - n_known_frames_for_test: null - object_name: ${oc.env:BLENDER_SINGLESEQ_CLASS} - path_manager_factory_class_type: PathManagerFactory - path_manager_factory_PathManagerFactory_args: - silence_logs: true - -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - mask_images: false - raysampler_class_type: AdaptiveRaySampler - raysampler_AdaptiveRaySampler_args: - n_pts_per_ray_training: 64 - n_pts_per_ray_evaluation: 64 - n_rays_per_image_sampled_from_mask: 4096 - stratified_point_sampling_training: true - stratified_point_sampling_evaluation: false - scene_extent: 2.0 - scene_center: - - 0.0 - - 0.0 - - 0.0 - renderer_MultiPassEmissionAbsorptionRenderer_args: - density_noise_std_train: 0.0 - n_pts_per_ray_fine_training: 128 - n_pts_per_ray_fine_evaluation: 128 - raymarcher_EmissionAbsorptionRaymarcher_args: - blend_output: false - loss_weights: - loss_rgb_mse: 1.0 - loss_prev_stage_rgb_mse: 1.0 - loss_mask_bce: 0.0 - loss_prev_stage_mask_bce: 0.0 - loss_autodecoder_norm: 0.00 - -optimizer_factory_ImplicitronOptimizerFactory_args: - exponential_lr_step_size: 3001 - lr_policy: LinearExponential - linear_exponential_lr_milestone: 200 - -training_loop_ImplicitronTrainingLoop_args: - max_epochs: 6000 - metric_print_interval: 10 - store_checkpoints_purge: 3 - test_when_finished: true - validation_interval: 100 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_wce.yaml deleted file mode 100644 index 38212e35707e2c26b93d3aa593e76579c483ca91..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerf_wce.yaml +++ /dev/null @@ -1,10 +0,0 @@ -defaults: -- repro_singleseq_wce_base.yaml -- repro_feat_extractor_unnormed.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - chunk_size_grid: 16000 - view_pooler_enabled: true - raysampler_AdaptiveRaySampler_args: - n_rays_per_image_sampled_from_mask: 850 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerformer.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerformer.yaml deleted file mode 100644 index 8983c26f34309fe35d41d43a87f53ddd564db3a5..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_nerformer.yaml +++ /dev/null @@ -1,18 +0,0 @@ -defaults: -- repro_singleseq_wce_base.yaml -- repro_feat_extractor_transformer.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - chunk_size_grid: 16000 - view_pooler_enabled: true - implicit_function_class_type: NeRFormerImplicitFunction - raysampler_AdaptiveRaySampler_args: - n_rays_per_image_sampled_from_mask: 800 - n_pts_per_ray_training: 32 - n_pts_per_ray_evaluation: 32 - renderer_MultiPassEmissionAbsorptionRenderer_args: - n_pts_per_ray_fine_training: 16 - n_pts_per_ray_fine_evaluation: 16 - view_pooler_args: - feature_aggregator_class_type: IdentityFeatureAggregator diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn.yaml deleted file mode 100644 index 1f60f0b9480348a6660b90244600e7d59622470a..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn.yaml +++ /dev/null @@ -1,29 +0,0 @@ -defaults: -- repro_singleseq_base.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - num_passes: 1 - chunk_size_grid: 32000 - view_pooler_enabled: false - loss_weights: - loss_rgb_mse: 200.0 - loss_prev_stage_rgb_mse: 0.0 - loss_mask_bce: 1.0 - loss_prev_stage_mask_bce: 0.0 - loss_autodecoder_norm: 0.0 - depth_neg_penalty: 10000.0 - raysampler_class_type: NearFarRaySampler - raysampler_NearFarRaySampler_args: - n_rays_per_image_sampled_from_mask: 2048 - min_depth: 0.05 - max_depth: 0.05 - n_pts_per_ray_training: 1 - n_pts_per_ray_evaluation: 1 - stratified_point_sampling_training: false - stratified_point_sampling_evaluation: false - renderer_class_type: LSTMRenderer - implicit_function_class_type: SRNImplicitFunction -optimizer_factory_ImplicitronOptimizerFactory_args: - breed: Adam - lr: 5.0e-05 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_noharm.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_noharm.yaml deleted file mode 100644 index 28b7570c8c9f49f3ecc5a45056c1467b3b3b2130..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_noharm.yaml +++ /dev/null @@ -1,11 +0,0 @@ -defaults: -- repro_singleseq_srn.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - num_passes: 1 - implicit_function_SRNImplicitFunction_args: - pixel_generator_args: - n_harmonic_functions: 0 - raymarch_function_args: - n_harmonic_functions: 0 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce.yaml deleted file mode 100644 index d190c28084f905a08d106976b45de7eb8560b3a0..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce.yaml +++ /dev/null @@ -1,30 +0,0 @@ -defaults: -- repro_singleseq_wce_base -- repro_feat_extractor_normed.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - num_passes: 1 - chunk_size_grid: 32000 - view_pooler_enabled: true - loss_weights: - loss_rgb_mse: 200.0 - loss_prev_stage_rgb_mse: 0.0 - loss_mask_bce: 1.0 - loss_prev_stage_mask_bce: 0.0 - loss_autodecoder_norm: 0.0 - depth_neg_penalty: 10000.0 - raysampler_class_type: NearFarRaySampler - raysampler_NearFarRaySampler_args: - n_rays_per_image_sampled_from_mask: 2048 - min_depth: 0.05 - max_depth: 0.05 - n_pts_per_ray_training: 1 - n_pts_per_ray_evaluation: 1 - stratified_point_sampling_training: false - stratified_point_sampling_evaluation: false - renderer_class_type: LSTMRenderer - implicit_function_class_type: SRNImplicitFunction -optimizer_factory_ImplicitronOptimizerFactory_args: - breed: Adam - lr: 5.0e-05 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce_noharm.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce_noharm.yaml deleted file mode 100644 index 3fc1254bd14e42266a1b8894d19bf081edced575..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_srn_wce_noharm.yaml +++ /dev/null @@ -1,11 +0,0 @@ -defaults: -- repro_singleseq_srn_wce.yaml -- _self_ -model_factory_ImplicitronModelFactory_args: - model_GenericModel_args: - num_passes: 1 - implicit_function_SRNImplicitFunction_args: - pixel_generator_args: - n_harmonic_functions: 0 - raymarch_function_args: - n_harmonic_functions: 0 diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_idr.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_idr.yaml deleted file mode 100644 index 4b73e40797d30f70420e213588fa46f110895cde..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_idr.yaml +++ /dev/null @@ -1,4 +0,0 @@ -defaults: -- repro_singleseq_idr.yaml -- repro_singleseq_co3dv2_base.yaml -- _self_ diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerf.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerf.yaml deleted file mode 100644 index 89999cde6b2869bb4ba773e6f09819bdc4554cd4..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerf.yaml +++ /dev/null @@ -1,4 +0,0 @@ -defaults: -- repro_singleseq_nerf.yaml -- repro_singleseq_co3dv2_base.yaml -- _self_ diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerformer.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerformer.yaml deleted file mode 100644 index 510589a0c048f1f915da6b0e4c57dfbc3f8f29b5..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_nerformer.yaml +++ /dev/null @@ -1,4 +0,0 @@ -defaults: -- repro_singleseq_nerformer.yaml -- repro_singleseq_co3dv2_base.yaml -- _self_ diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_srn_noharm.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_srn_noharm.yaml deleted file mode 100644 index 8964a4a21e41286e9587cc2209a786b54482ab44..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_v2_srn_noharm.yaml +++ /dev/null @@ -1,4 +0,0 @@ -defaults: -- repro_singleseq_srn_noharm.yaml -- repro_singleseq_co3dv2_base.yaml -- _self_ diff --git a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_wce_base.yaml b/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_wce_base.yaml deleted file mode 100644 index f5b174c04a9b48646151509bdd22db24bc495702..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/configs/repro_singleseq_wce_base.yaml +++ /dev/null @@ -1,22 +0,0 @@ -defaults: -- repro_singleseq_base -- _self_ -data_source_ImplicitronDataSource_args: - data_loader_map_provider_SequenceDataLoaderMapProvider_args: - batch_size: 10 - dataset_length_train: 1000 - dataset_length_val: 1 - num_workers: 8 - train_conditioning_type: SAME - val_conditioning_type: SAME - test_conditioning_type: SAME - images_per_seq_options: - - 2 - - 3 - - 4 - - 5 - - 6 - - 7 - - 8 - - 9 - - 10 diff --git a/pytorch3d/projects/implicitron_trainer/experiment.py b/pytorch3d/projects/implicitron_trainer/experiment.py deleted file mode 100644 index 797660c8007bd9ac4446b3716375e7dac9028c60..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/experiment.py +++ /dev/null @@ -1,297 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -"""" -This file is the entry point for launching experiments with Implicitron. - -Launch Training ---------------- -Experiment config .yaml files are located in the -`projects/implicitron_trainer/configs` folder. To launch an experiment, -specify the name of the file. Specific config values can also be overridden -from the command line, for example: - -``` -./experiment.py --config-name base_config.yaml override.param.one=42 override.param.two=84 -``` - -Main functions ---------------- -- The Experiment class defines `run` which creates the model, optimizer, and other - objects used in training, then starts TrainingLoop's `run` function. -- TrainingLoop takes care of the actual training logic: forward and backward passes, - evaluation and testing, as well as model checkpointing, visualization, and metric - printing. - -Outputs --------- -The outputs of the experiment are saved and logged in multiple ways: - - Checkpoints: - Model, optimizer and stats are stored in the directory - named by the `exp_dir` key from the config file / CLI parameters. - - Stats - Stats are logged and plotted to the file "train_stats.pdf" in the - same directory. The stats are also saved as part of the checkpoint file. - - Visualizations - Predictions are plotted to a visdom server running at the - port specified by the `visdom_server` and `visdom_port` keys in the - config file. - -""" -import logging -import os -import warnings - -from dataclasses import field - -import hydra - -import torch -from accelerate import Accelerator -from omegaconf import DictConfig, OmegaConf -from packaging import version - -from pytorch3d.implicitron.dataset.data_source import ( - DataSourceBase, - ImplicitronDataSource, -) -from pytorch3d.implicitron.models.base_model import ImplicitronModelBase - -from pytorch3d.implicitron.models.renderer.multipass_ea import ( - MultiPassEmissionAbsorptionRenderer, -) -from pytorch3d.implicitron.models.renderer.ray_sampler import AdaptiveRaySampler -from pytorch3d.implicitron.tools.config import ( - Configurable, - expand_args_fields, - remove_unused_components, - run_auto_creation, -) - -from .impl.model_factory import ModelFactoryBase -from .impl.optimizer_factory import OptimizerFactoryBase -from .impl.training_loop import TrainingLoopBase -from .impl.utils import seed_all_random_engines - -logger = logging.getLogger(__name__) - -# workaround for https://github.com/facebookresearch/hydra/issues/2262 -_RUN = hydra.types.RunMode.RUN - -if version.parse(hydra.__version__) < version.Version("1.1"): - raise ValueError( - f"Hydra version {hydra.__version__} is too old." - " (Implicitron requires version 1.1 or later.)" - ) - -try: - # only makes sense in FAIR cluster - import pytorch3d.implicitron.fair_cluster.slurm # noqa: F401 -except ModuleNotFoundError: - pass - -no_accelerate = os.environ.get("PYTORCH3D_NO_ACCELERATE") is not None - - -class Experiment(Configurable): # pyre-ignore: 13 - """ - This class is at the top level of Implicitron's config hierarchy. Its - members are high-level components necessary for training an implicit rende- - ring network. - - Members: - data_source: An object that produces datasets and dataloaders. - model_factory: An object that produces an implicit rendering model as - well as its corresponding Stats object. - optimizer_factory: An object that produces the optimizer and lr - scheduler. - training_loop: An object that runs training given the outputs produced - by the data_source, model_factory and optimizer_factory. - seed: A random seed to ensure reproducibility. - detect_anomaly: Whether torch.autograd should detect anomalies. Useful - for debugging, but might slow down the training. - exp_dir: Root experimentation directory. Checkpoints and training stats - will be saved here. - """ - - data_source: DataSourceBase - data_source_class_type: str = "ImplicitronDataSource" - model_factory: ModelFactoryBase - model_factory_class_type: str = "ImplicitronModelFactory" - optimizer_factory: OptimizerFactoryBase - optimizer_factory_class_type: str = "ImplicitronOptimizerFactory" - training_loop: TrainingLoopBase - training_loop_class_type: str = "ImplicitronTrainingLoop" - - seed: int = 42 - detect_anomaly: bool = False - exp_dir: str = "./data/default_experiment/" - - hydra: dict = field( - default_factory=lambda: { - "run": {"dir": "."}, # Make hydra not change the working dir. - "output_subdir": None, # disable storing the .hydra logs - "mode": _RUN, - } - ) - - def __post_init__(self): - seed_all_random_engines( - self.seed - ) # Set all random engine seeds for reproducibility - - run_auto_creation(self) - - def run(self) -> None: - # Initialize the accelerator if desired. - if no_accelerate: - accelerator = None - device = torch.device("cuda:0") - else: - accelerator = Accelerator(device_placement=False) - logger.info(accelerator.state) - device = accelerator.device - - logger.info(f"Running experiment on device: {device}") - os.makedirs(self.exp_dir, exist_ok=True) - - # set the debug mode - if self.detect_anomaly: - logger.info("Anomaly detection!") - torch.autograd.set_detect_anomaly(self.detect_anomaly) - - # Initialize the datasets and dataloaders. - datasets, dataloaders = self.data_source.get_datasets_and_dataloaders() - - # Init the model and the corresponding Stats object. - model = self.model_factory( - accelerator=accelerator, - exp_dir=self.exp_dir, - ) - - stats = self.training_loop.load_stats( - log_vars=model.log_vars, - exp_dir=self.exp_dir, - resume=self.model_factory.resume, - resume_epoch=self.model_factory.resume_epoch, # pyre-ignore [16] - ) - start_epoch = stats.epoch + 1 - - model.to(device) - - # Init the optimizer and LR scheduler. - optimizer, scheduler = self.optimizer_factory( - accelerator=accelerator, - exp_dir=self.exp_dir, - last_epoch=start_epoch, - model=model, - resume=self.model_factory.resume, - resume_epoch=self.model_factory.resume_epoch, - ) - - # Wrap all modules in the distributed library - # Note: we don't pass the scheduler to prepare as it - # doesn't need to be stepped at each optimizer step - train_loader = dataloaders.train - val_loader = dataloaders.val - test_loader = dataloaders.test - if accelerator is not None: - ( - model, - optimizer, - train_loader, - val_loader, - ) = accelerator.prepare(model, optimizer, train_loader, val_loader) - - # Enter the main training loop. - self.training_loop.run( - train_loader=train_loader, - val_loader=val_loader, - test_loader=test_loader, - # pyre-ignore[6] - train_dataset=datasets.train, - model=model, - optimizer=optimizer, - scheduler=scheduler, - accelerator=accelerator, - device=device, - exp_dir=self.exp_dir, - stats=stats, - seed=self.seed, - ) - - -def _setup_envvars_for_cluster() -> bool: - """ - Prepares to run on cluster if relevant. - Returns whether FAIR cluster in use. - """ - # TODO: How much of this is needed in general? - - try: - import submitit - except ImportError: - return False - - try: - # Only needed when launching on cluster with slurm and submitit - job_env = submitit.JobEnvironment() - except RuntimeError: - return False - - os.environ["LOCAL_RANK"] = str(job_env.local_rank) - os.environ["RANK"] = str(job_env.global_rank) - os.environ["WORLD_SIZE"] = str(job_env.num_tasks) - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "42918" - logger.info( - "Num tasks %s, global_rank %s" - % (str(job_env.num_tasks), str(job_env.global_rank)) - ) - - return True - - -def dump_cfg(cfg: DictConfig) -> None: - remove_unused_components(cfg) - # dump the exp config to the exp dir - os.makedirs(cfg.exp_dir, exist_ok=True) - try: - cfg_filename = os.path.join(cfg.exp_dir, "expconfig.yaml") - OmegaConf.save(config=cfg, f=cfg_filename) - except PermissionError: - warnings.warn("Can't dump config due to insufficient permissions!") - - -expand_args_fields(Experiment) -cs = hydra.core.config_store.ConfigStore.instance() -cs.store(name="default_config", node=Experiment) - - -@hydra.main(config_path="./configs/", config_name="default_config") -def experiment(cfg: DictConfig) -> None: - # CUDA_VISIBLE_DEVICES must have been set. - - if "CUDA_DEVICE_ORDER" not in os.environ: - os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" - - if not _setup_envvars_for_cluster(): - logger.info("Running locally") - - # TODO: The following may be needed for hydra/submitit it to work - expand_args_fields(ImplicitronModelBase) - expand_args_fields(AdaptiveRaySampler) - expand_args_fields(MultiPassEmissionAbsorptionRenderer) - expand_args_fields(ImplicitronDataSource) - - experiment = Experiment(**cfg) - dump_cfg(cfg) - experiment.run() - - -if __name__ == "__main__": - experiment() diff --git a/pytorch3d/projects/implicitron_trainer/impl/__init__.py b/pytorch3d/projects/implicitron_trainer/impl/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/impl/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/projects/implicitron_trainer/impl/model_factory.py b/pytorch3d/projects/implicitron_trainer/impl/model_factory.py deleted file mode 100644 index 9c8ea9da3026dd63f0cfbdfe9352a777d591db3c..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/impl/model_factory.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import logging -import os -from typing import Optional - -import torch.optim - -from accelerate import Accelerator -from pytorch3d.implicitron.models.base_model import ImplicitronModelBase -from pytorch3d.implicitron.tools import model_io -from pytorch3d.implicitron.tools.config import ( - registry, - ReplaceableBase, - run_auto_creation, -) -from pytorch3d.implicitron.tools.stats import Stats - -logger = logging.getLogger(__name__) - - -class ModelFactoryBase(ReplaceableBase): - - resume: bool = True # resume from the last checkpoint - - def __call__(self, **kwargs) -> ImplicitronModelBase: - """ - Initialize the model (possibly from a previously saved state). - - Returns: An instance of ImplicitronModelBase. - """ - raise NotImplementedError() - - def load_stats(self, **kwargs) -> Stats: - """ - Initialize or load a Stats object. - """ - raise NotImplementedError() - - -@registry.register -class ImplicitronModelFactory(ModelFactoryBase): # pyre-ignore [13] - """ - A factory class that initializes an implicit rendering model. - - Members: - model: An ImplicitronModelBase object. - resume: If True, attempt to load the last checkpoint from `exp_dir` - passed to __call__. Failure to do so will return a model with ini- - tial weights unless `force_resume` is True. - resume_epoch: If `resume` is True: Resume a model at this epoch, or if - `resume_epoch` <= 0, then resume from the latest checkpoint. - force_resume: If True, throw a FileNotFoundError if `resume` is True but - a model checkpoint cannot be found. - - """ - - model: ImplicitronModelBase - model_class_type: str = "GenericModel" - resume: bool = True - resume_epoch: int = -1 - force_resume: bool = False - - def __post_init__(self): - run_auto_creation(self) - - def __call__( - self, - exp_dir: str, - accelerator: Optional[Accelerator] = None, - ) -> ImplicitronModelBase: - """ - Returns an instance of `ImplicitronModelBase`, possibly loaded from a - checkpoint (if self.resume, self.resume_epoch specify so). - - Args: - exp_dir: Root experiment directory. - accelerator: An Accelerator object. - - Returns: - model: The model with optionally loaded weights from checkpoint - - Raise: - FileNotFoundError if `force_resume` is True but checkpoint not found. - """ - # Determine the network outputs that should be logged - if hasattr(self.model, "log_vars"): - log_vars = list(self.model.log_vars) - else: - log_vars = ["objective"] - - if self.resume_epoch > 0: - # Resume from a certain epoch - model_path = model_io.get_checkpoint(exp_dir, self.resume_epoch) - if not os.path.isfile(model_path): - raise ValueError(f"Cannot find model from epoch {self.resume_epoch}.") - else: - # Retrieve the last checkpoint - model_path = model_io.find_last_checkpoint(exp_dir) - - if model_path is not None: - logger.info(f"Found previous model {model_path}") - if self.force_resume or self.resume: - logger.info("Resuming.") - - map_location = None - if accelerator is not None and not accelerator.is_local_main_process: - map_location = { - "cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index - } - model_state_dict = torch.load( - model_io.get_model_path(model_path), map_location=map_location - ) - - try: - self.model.load_state_dict(model_state_dict, strict=True) - except RuntimeError as e: - logger.error(e) - logger.info( - "Cannot load state dict in strict mode! -> trying non-strict" - ) - self.model.load_state_dict(model_state_dict, strict=False) - self.model.log_vars = log_vars - else: - logger.info("Not resuming -> starting from scratch.") - elif self.force_resume: - raise FileNotFoundError(f"Cannot find a checkpoint in {exp_dir}!") - - return self.model diff --git a/pytorch3d/projects/implicitron_trainer/impl/optimizer_factory.py b/pytorch3d/projects/implicitron_trainer/impl/optimizer_factory.py deleted file mode 100644 index 1ec3165384dcd12c25429fba9f449f306181ff59..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/impl/optimizer_factory.py +++ /dev/null @@ -1,337 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import inspect -import logging -import os -from collections import defaultdict -from dataclasses import field -from typing import Any, Dict, List, Optional, Tuple - -import torch.optim - -from accelerate import Accelerator - -from pytorch3d.implicitron.models.base_model import ImplicitronModelBase -from pytorch3d.implicitron.tools import model_io -from pytorch3d.implicitron.tools.config import ( - registry, - ReplaceableBase, - run_auto_creation, -) - -logger = logging.getLogger(__name__) - - -class OptimizerFactoryBase(ReplaceableBase): - def __call__( - self, model: ImplicitronModelBase, **kwargs - ) -> Tuple[torch.optim.Optimizer, Any]: - """ - Initialize the optimizer and lr scheduler. - - Args: - model: The model with optionally loaded weights. - - Returns: - An optimizer module (optionally loaded from a checkpoint) and - a learning rate scheduler module (should be a subclass of torch.optim's - lr_scheduler._LRScheduler). - """ - raise NotImplementedError() - - -@registry.register -class ImplicitronOptimizerFactory(OptimizerFactoryBase): - """ - A factory that initializes the optimizer and lr scheduler. - - Members: - betas: Beta parameters for the Adam optimizer. - breed: The type of optimizer to use. We currently support SGD, Adagrad - and Adam. - exponential_lr_step_size: With Exponential policy only, - lr = lr * gamma ** (epoch/step_size) - gamma: Multiplicative factor of learning rate decay. - lr: The value for the initial learning rate. - lr_policy: The policy to use for learning rate. We currently support - MultiStepLR and Exponential policies. - momentum: A momentum value (for SGD only). - multistep_lr_milestones: With MultiStepLR policy only: list of - increasing epoch indices at which the learning rate is modified. - momentum: Momentum factor for SGD optimizer. - weight_decay: The optimizer weight_decay (L2 penalty on model weights). - foreach: Whether to use new "foreach" implementation of optimizer where - available (e.g. requires PyTorch 1.12.0 for Adam) - group_learning_rates: Parameters or modules can be assigned to parameter - groups. This dictionary has names of those parameter groups as keys - and learning rates as values. All parameter group names have to be - defined in this dictionary. Parameters which do not have predefined - parameter group are put into "default" parameter group which has - `lr` as its learning rate. - """ - - betas: Tuple[float, ...] = (0.9, 0.999) - breed: str = "Adam" - exponential_lr_step_size: int = 250 - gamma: float = 0.1 - lr: float = 0.0005 - lr_policy: str = "MultiStepLR" - momentum: float = 0.9 - multistep_lr_milestones: tuple = () - weight_decay: float = 0.0 - linear_exponential_lr_milestone: int = 200 - linear_exponential_start_gamma: float = 0.1 - foreach: Optional[bool] = True - group_learning_rates: Dict[str, float] = field(default_factory=lambda: {}) - - def __post_init__(self): - run_auto_creation(self) - - def __call__( - self, - last_epoch: int, - model: ImplicitronModelBase, - accelerator: Optional[Accelerator] = None, - exp_dir: Optional[str] = None, - resume: bool = True, - resume_epoch: int = -1, - **kwargs, - ) -> Tuple[torch.optim.Optimizer, Any]: - """ - Initialize the optimizer (optionally from a checkpoint) and the lr scheduluer. - - Args: - last_epoch: If the model was loaded from checkpoint this will be the - number of the last epoch that was saved. - model: The model with optionally loaded weights. - accelerator: An optional Accelerator instance. - exp_dir: Root experiment directory. - resume: If True, attempt to load optimizer checkpoint from exp_dir. - Failure to do so will return a newly initialized optimizer. - resume_epoch: If `resume` is True: Resume optimizer at this epoch. If - `resume_epoch` <= 0, then resume from the latest checkpoint. - Returns: - An optimizer module (optionally loaded from a checkpoint) and - a learning rate scheduler module (should be a subclass of torch.optim's - lr_scheduler._LRScheduler). - """ - # Get the parameters to optimize - if hasattr(model, "_get_param_groups"): # use the model function - p_groups = model._get_param_groups(self.lr, wd=self.weight_decay) - else: - p_groups = [ - {"params": params, "lr": self._get_group_learning_rate(group)} - for group, params in self._get_param_groups(model).items() - ] - - # Intialize the optimizer - optimizer_kwargs: Dict[str, Any] = { - "lr": self.lr, - "weight_decay": self.weight_decay, - } - if self.breed == "SGD": - optimizer_class = torch.optim.SGD - optimizer_kwargs["momentum"] = self.momentum - elif self.breed == "Adagrad": - optimizer_class = torch.optim.Adagrad - elif self.breed == "Adam": - optimizer_class = torch.optim.Adam - optimizer_kwargs["betas"] = self.betas - else: - raise ValueError(f"No such solver type {self.breed}") - - if "foreach" in inspect.signature(optimizer_class.__init__).parameters: - optimizer_kwargs["foreach"] = self.foreach - optimizer = optimizer_class(p_groups, **optimizer_kwargs) - logger.info(f"Solver type = {self.breed}") - - # Load state from checkpoint - optimizer_state = self._get_optimizer_state( - exp_dir, - accelerator, - resume_epoch=resume_epoch, - resume=resume, - ) - if optimizer_state is not None: - logger.info("Setting loaded optimizer state.") - optimizer.load_state_dict(optimizer_state) - - # Initialize the learning rate scheduler - if self.lr_policy.casefold() == "MultiStepLR".casefold(): - scheduler = torch.optim.lr_scheduler.MultiStepLR( - optimizer, - milestones=self.multistep_lr_milestones, - gamma=self.gamma, - ) - elif self.lr_policy.casefold() == "Exponential".casefold(): - scheduler = torch.optim.lr_scheduler.LambdaLR( - optimizer, - lambda epoch: self.gamma ** (epoch / self.exponential_lr_step_size), - verbose=False, - ) - elif self.lr_policy.casefold() == "LinearExponential".casefold(): - # linear learning rate progression between epochs 0 to - # self.linear_exponential_lr_milestone, followed by exponential - # lr decay for the rest of the epochs - def _get_lr(epoch: int): - m = self.linear_exponential_lr_milestone - if epoch < m: - w = (m - epoch) / m - gamma = w * self.linear_exponential_start_gamma + (1 - w) - else: - epoch_rest = epoch - m - gamma = self.gamma ** (epoch_rest / self.exponential_lr_step_size) - return gamma - - scheduler = torch.optim.lr_scheduler.LambdaLR( - optimizer, _get_lr, verbose=False - ) - else: - raise ValueError("no such lr policy %s" % self.lr_policy) - - # When loading from checkpoint, this will make sure that the - # lr is correctly set even after returning. - for _ in range(last_epoch): - scheduler.step() - - optimizer.zero_grad() - - return optimizer, scheduler - - def _get_optimizer_state( - self, - exp_dir: Optional[str], - accelerator: Optional[Accelerator] = None, - resume: bool = True, - resume_epoch: int = -1, - ) -> Optional[Dict[str, Any]]: - """ - Load an optimizer state from a checkpoint. - - resume: If True, attempt to load the last checkpoint from `exp_dir` - passed to __call__. Failure to do so will return a newly initialized - optimizer. - resume_epoch: If `resume` is True: Resume optimizer at this epoch. If - `resume_epoch` <= 0, then resume from the latest checkpoint. - """ - if exp_dir is None or not resume: - return None - if resume_epoch > 0: - save_path = model_io.get_checkpoint(exp_dir, resume_epoch) - if not os.path.isfile(save_path): - raise FileNotFoundError( - f"Cannot find optimizer from epoch {resume_epoch}." - ) - else: - save_path = model_io.find_last_checkpoint(exp_dir) - optimizer_state = None - if save_path is not None: - logger.info(f"Found previous optimizer state {save_path} -> resuming.") - opt_path = model_io.get_optimizer_path(save_path) - - if os.path.isfile(opt_path): - map_location = None - if accelerator is not None and not accelerator.is_local_main_process: - map_location = { - "cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index - } - optimizer_state = torch.load(opt_path, map_location) - else: - raise FileNotFoundError(f"Optimizer state {opt_path} does not exist.") - return optimizer_state - - def _get_param_groups( - self, module: torch.nn.Module - ) -> Dict[str, List[torch.nn.Parameter]]: - """ - Recursively visits all the modules inside the `module` and sorts all the - parameters in parameter groups. - - Uses `param_groups` dictionary member, where keys are names of individual - parameters or module members and values are the names of the parameter groups - for those parameters or members. "self" key is used to denote the parameter groups - at the module level. Possible keys, including the "self" key do not have to - be defined. By default all parameters have the learning rate defined in the - optimizer. This can be overridden by setting the parameter group in `param_groups` - member of a specific module. Values are a parameter group name. The keys - specify what parameters will be affected as follows: - - β€œself”: All the parameters of the module and its child modules - - name of a parameter: A parameter with that name. - - name of a module member: All the parameters of the module and its - child modules. - This is useful if members do not have `param_groups`, for - example torch.nn.Linear. - - .: recursive. Same as if - was used in param_groups of that submodule/member. - - Args: - module: module from which to extract the parameters and their parameter - groups - Returns: - dictionary with parameter groups as keys and lists of parameters as values - """ - - param_groups = defaultdict(list) - - def traverse(module, default_group: str, mapping: Dict[str, str]) -> None: - """ - Visitor for module to assign its parameters to the relevant member of - param_groups. - - Args: - module: the module being visited in a depth-first search - default_group: the param group to assign parameters to unless - otherwise overriden. - mapping: known mappings of parameters to groups for this module, - destructively modified by this function. - """ - # If key self is defined in param_groups then chenge the default param - # group for all parameters and children in the module. - if hasattr(module, "param_groups") and "self" in module.param_groups: - default_group = module.param_groups["self"] - - # Collect all the parameters that are directly inside the `module`, - # they will be in the default param group if they don't have - # defined group. - if hasattr(module, "param_groups"): - mapping.update(module.param_groups) - - for name, param in module.named_parameters(recurse=False): - if param.requires_grad: - group_name = mapping.get(name, default_group) - logger.debug(f"Assigning {name} to param_group {group_name}") - param_groups[group_name].append(param) - - # If children have defined default param group then use it else pass - # own default. - for child_name, child in module.named_children(): - mapping_to_add = { - name[len(child_name) + 1 :]: group - for name, group in mapping.items() - if name.startswith(child_name + ".") - } - traverse(child, mapping.get(child_name, default_group), mapping_to_add) - - traverse(module, "default", {}) - return param_groups - - def _get_group_learning_rate(self, group_name: str) -> float: - """ - Wraps the `group_learning_rates` dictionary providing errors and returns - `self.lr` for "default" group_name. - - Args: - group_name: a string representing the name of the group - Returns: - learning rate for a specific group - """ - if group_name == "default": - return self.lr - lr = self.group_learning_rates.get(group_name, None) - if lr is None: - raise ValueError(f"no learning rate given for group {group_name}") - return lr diff --git a/pytorch3d/projects/implicitron_trainer/impl/training_loop.py b/pytorch3d/projects/implicitron_trainer/impl/training_loop.py deleted file mode 100644 index 57917cc8183e95b91080663dec06e6e7f4dbad37..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/impl/training_loop.py +++ /dev/null @@ -1,452 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import logging -import os -import time -from typing import Any, List, Optional - -import torch -from accelerate import Accelerator -from pytorch3d.implicitron.evaluation.evaluator import EvaluatorBase -from pytorch3d.implicitron.models.base_model import ImplicitronModelBase -from pytorch3d.implicitron.models.generic_model import EvaluationMode -from pytorch3d.implicitron.tools import model_io, vis_utils -from pytorch3d.implicitron.tools.config import ( - registry, - ReplaceableBase, - run_auto_creation, -) -from pytorch3d.implicitron.tools.stats import Stats -from torch.utils.data import DataLoader, Dataset - -from .utils import seed_all_random_engines - -logger = logging.getLogger(__name__) - - -# pyre-fixme[13]: Attribute `evaluator` is never initialized. -class TrainingLoopBase(ReplaceableBase): - """ - Members: - evaluator: An EvaluatorBase instance, used to evaluate training results. - """ - - evaluator: Optional[EvaluatorBase] - evaluator_class_type: Optional[str] = "ImplicitronEvaluator" - - def run( - self, - train_loader: DataLoader, - val_loader: Optional[DataLoader], - test_loader: Optional[DataLoader], - train_dataset: Dataset, - model: ImplicitronModelBase, - optimizer: torch.optim.Optimizer, - scheduler: Any, - **kwargs, - ) -> None: - raise NotImplementedError() - - def load_stats( - self, - log_vars: List[str], - exp_dir: str, - resume: bool = True, - resume_epoch: int = -1, - **kwargs, - ) -> Stats: - raise NotImplementedError() - - -@registry.register -class ImplicitronTrainingLoop(TrainingLoopBase): - """ - Members: - eval_only: If True, only run evaluation using the test dataloader. - max_epochs: Train for this many epochs. Note that if the model was - loaded from a checkpoint, we will restart training at the appropriate - epoch and run for (max_epochs - checkpoint_epoch) epochs. - store_checkpoints: If True, store model and optimizer state checkpoints. - store_checkpoints_purge: If >= 0, remove any checkpoints older or equal - to this many epochs. - test_interval: Evaluate on a test dataloader each `test_interval` epochs. - test_when_finished: If True, evaluate on a test dataloader when training - completes. - validation_interval: Validate each `validation_interval` epochs. - clip_grad: Optionally clip the gradient norms. - If set to a value <=0.0, no clipping - metric_print_interval: The batch interval at which the stats should be - logged. - visualize_interval: The batch interval at which the visualizations - should be plotted - visdom_env: The name of the Visdom environment to use for plotting. - visdom_port: The Visdom port. - visdom_server: Address of the Visdom server. - """ - - # Parameters of the outer training loop. - eval_only: bool = False - max_epochs: int = 1000 - store_checkpoints: bool = True - store_checkpoints_purge: int = 1 - test_interval: int = -1 - test_when_finished: bool = False - validation_interval: int = 1 - - # Gradient clipping. - clip_grad: float = 0.0 - - # Visualization/logging parameters. - metric_print_interval: int = 5 - visualize_interval: int = 1000 - visdom_env: str = "" - visdom_port: int = int(os.environ.get("VISDOM_PORT", 8097)) - visdom_server: str = "http://127.0.0.1" - - def __post_init__(self): - run_auto_creation(self) - - # pyre-fixme[14]: `run` overrides method defined in `TrainingLoopBase` - # inconsistently. - def run( - self, - *, - train_loader: DataLoader, - val_loader: Optional[DataLoader], - test_loader: Optional[DataLoader], - train_dataset: Dataset, - model: ImplicitronModelBase, - optimizer: torch.optim.Optimizer, - scheduler: Any, - accelerator: Optional[Accelerator], - device: torch.device, - exp_dir: str, - stats: Stats, - seed: int, - **kwargs, - ): - """ - Entry point to run the training and validation loops - based on the specified config file. - """ - start_epoch = stats.epoch + 1 - assert scheduler.last_epoch == stats.epoch + 1 - assert scheduler.last_epoch == start_epoch - - # only run evaluation on the test dataloader - if self.eval_only: - if test_loader is not None: - # pyre-fixme[16]: `Optional` has no attribute `run`. - self.evaluator.run( - dataloader=test_loader, - device=device, - dump_to_json=True, - epoch=stats.epoch, - exp_dir=exp_dir, - model=model, - ) - return - else: - raise ValueError( - "Cannot evaluate and dump results to json, no test data provided." - ) - - # loop through epochs - for epoch in range(start_epoch, self.max_epochs): - # automatic new_epoch and plotting of stats at every epoch start - with stats: - - # Make sure to re-seed random generators to ensure reproducibility - # even after restart. - seed_all_random_engines(seed + epoch) - - cur_lr = float(scheduler.get_last_lr()[-1]) - logger.debug(f"scheduler lr = {cur_lr:1.2e}") - - # train loop - self._training_or_validation_epoch( - accelerator=accelerator, - device=device, - epoch=epoch, - loader=train_loader, - model=model, - optimizer=optimizer, - stats=stats, - validation=False, - ) - - # val loop (optional) - if val_loader is not None and epoch % self.validation_interval == 0: - self._training_or_validation_epoch( - accelerator=accelerator, - device=device, - epoch=epoch, - loader=val_loader, - model=model, - optimizer=optimizer, - stats=stats, - validation=True, - ) - - # eval loop (optional) - if ( - test_loader is not None - and self.test_interval > 0 - and epoch % self.test_interval == 0 - ): - self.evaluator.run( - device=device, - dataloader=test_loader, - model=model, - ) - - assert stats.epoch == epoch, "inconsistent stats!" - self._checkpoint(accelerator, epoch, exp_dir, model, optimizer, stats) - - scheduler.step() - new_lr = float(scheduler.get_last_lr()[-1]) - if new_lr != cur_lr: - logger.info(f"LR change! {cur_lr} -> {new_lr}") - - if self.test_when_finished: - if test_loader is not None: - self.evaluator.run( - device=device, - dump_to_json=True, - epoch=stats.epoch, - exp_dir=exp_dir, - dataloader=test_loader, - model=model, - ) - else: - raise ValueError( - "Cannot evaluate and dump results to json, no test data provided." - ) - - def load_stats( - self, - log_vars: List[str], - exp_dir: str, - resume: bool = True, - resume_epoch: int = -1, - **kwargs, - ) -> Stats: - """ - Load Stats that correspond to the model's log_vars and resume_epoch. - - Args: - log_vars: A list of variable names to log. Should be a subset of the - `preds` returned by the forward function of the corresponding - ImplicitronModelBase instance. - exp_dir: Root experiment directory. - resume: If False, do not load stats from the checkpoint speci- - fied by resume and resume_epoch; instead, create a fresh stats object. - - stats: The stats structure (optionally loaded from checkpoint) - """ - # Init the stats struct - visdom_env_charts = ( - vis_utils.get_visdom_env(self.visdom_env, exp_dir) + "_charts" - ) - stats = Stats( - # log_vars should be a list, but OmegaConf might load them as ListConfig - list(log_vars), - plot_file=os.path.join(exp_dir, "train_stats.pdf"), - visdom_env=visdom_env_charts, - visdom_server=self.visdom_server, - visdom_port=self.visdom_port, - ) - - model_path = None - if resume: - if resume_epoch > 0: - model_path = model_io.get_checkpoint(exp_dir, resume_epoch) - if not os.path.isfile(model_path): - raise FileNotFoundError( - f"Cannot find stats from epoch {resume_epoch}." - ) - else: - model_path = model_io.find_last_checkpoint(exp_dir) - - if model_path is not None: - stats_path = model_io.get_stats_path(model_path) - stats_load = model_io.load_stats(stats_path) - - # Determine if stats should be reset - if resume: - if stats_load is None: - logger.warning("\n\n\n\nCORRUPT STATS -> clearing stats\n\n\n\n") - last_epoch = model_io.parse_epoch_from_model_path(model_path) - logger.info(f"Estimated resume epoch = {last_epoch}") - - # Reset the stats struct - for _ in range(last_epoch + 1): - stats.new_epoch() - assert last_epoch == stats.epoch - else: - logger.info(f"Found previous stats in {stats_path} -> resuming.") - stats = stats_load - - # Update stats properties incase it was reset on load - stats.visdom_env = visdom_env_charts - stats.visdom_server = self.visdom_server - stats.visdom_port = self.visdom_port - stats.plot_file = os.path.join(exp_dir, "train_stats.pdf") - stats.synchronize_logged_vars(log_vars) - else: - logger.info("Clearing stats") - - return stats - - def _training_or_validation_epoch( - self, - epoch: int, - loader: DataLoader, - model: ImplicitronModelBase, - optimizer: torch.optim.Optimizer, - stats: Stats, - validation: bool, - *, - accelerator: Optional[Accelerator], - bp_var: str = "objective", - device: torch.device, - **kwargs, - ) -> None: - """ - This is the main loop for training and evaluation including: - model forward pass, loss computation, backward pass and visualization. - - Args: - epoch: The index of the current epoch - loader: The dataloader to use for the loop - model: The model module optionally loaded from checkpoint - optimizer: The optimizer module optionally loaded from checkpoint - stats: The stats struct, also optionally loaded from checkpoint - validation: If true, run the loop with the model in eval mode - and skip the backward pass - accelerator: An optional Accelerator instance. - bp_var: The name of the key in the model output `preds` dict which - should be used as the loss for the backward pass. - device: The device on which to run the model. - """ - - if validation: - model.eval() - trainmode = "val" - else: - model.train() - trainmode = "train" - - t_start = time.time() - - # get the visdom env name - visdom_env_imgs = stats.visdom_env + "_images_" + trainmode - viz = vis_utils.get_visdom_connection( - server=stats.visdom_server, - port=stats.visdom_port, - ) - - # Iterate through the batches - n_batches = len(loader) - for it, net_input in enumerate(loader): - last_iter = it == n_batches - 1 - - # move to gpu where possible (in place) - net_input = net_input.to(device) - - # run the forward pass - if not validation: - optimizer.zero_grad() - preds = model( - **{**net_input, "evaluation_mode": EvaluationMode.TRAINING} - ) - else: - with torch.no_grad(): - preds = model( - **{**net_input, "evaluation_mode": EvaluationMode.EVALUATION} - ) - - # make sure we dont overwrite something - assert all(k not in preds for k in net_input.keys()) - # merge everything into one big dict - preds.update(net_input) - - # update the stats logger - stats.update(preds, time_start=t_start, stat_set=trainmode) - # pyre-ignore [16] - assert stats.it[trainmode] == it, "inconsistent stat iteration number!" - - # print textual status update - if it % self.metric_print_interval == 0 or last_iter: - std_out = stats.get_status_string(stat_set=trainmode, max_it=n_batches) - logger.info(std_out) - - # visualize results - if ( - (accelerator is None or accelerator.is_local_main_process) - and self.visualize_interval > 0 - and it % self.visualize_interval == 0 - ): - prefix = f"e{stats.epoch}_it{stats.it[trainmode]}" - if hasattr(model, "visualize"): - model.visualize( - viz, - visdom_env_imgs, - preds, - prefix, - ) - - # optimizer step - if not validation: - loss = preds[bp_var] - assert torch.isfinite(loss).all(), "Non-finite loss!" - # backprop - if accelerator is None: - loss.backward() - else: - accelerator.backward(loss) - if self.clip_grad > 0.0: - # Optionally clip the gradient norms. - total_norm = torch.nn.utils.clip_grad_norm( - model.parameters(), self.clip_grad - ) - if total_norm > self.clip_grad: - logger.debug( - f"Clipping gradient: {total_norm}" - + f" with coef {self.clip_grad / float(total_norm)}." - ) - - optimizer.step() - - def _checkpoint( - self, - accelerator: Optional[Accelerator], - epoch: int, - exp_dir: str, - model: ImplicitronModelBase, - optimizer: torch.optim.Optimizer, - stats: Stats, - ): - """ - Save a model and its corresponding Stats object to a file, if - `self.store_checkpoints` is True. In addition, if - `self.store_checkpoints_purge` is True, remove any checkpoints older - than `self.store_checkpoints_purge` epochs old. - """ - if self.store_checkpoints and ( - accelerator is None or accelerator.is_local_main_process - ): - if self.store_checkpoints_purge > 0: - for prev_epoch in range(epoch - self.store_checkpoints_purge): - model_io.purge_epoch(exp_dir, prev_epoch) - outfile = model_io.get_checkpoint(exp_dir, epoch) - unwrapped_model = ( - model if accelerator is None else accelerator.unwrap_model(model) - ) - model_io.safe_save_model( - unwrapped_model, stats, outfile, optimizer=optimizer - ) diff --git a/pytorch3d/projects/implicitron_trainer/impl/utils.py b/pytorch3d/projects/implicitron_trainer/impl/utils.py deleted file mode 100644 index 4fac4463857f319127c9b38b496173c2ac05fd13..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/impl/utils.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import random - -import numpy as np -import torch - - -def seed_all_random_engines(seed: int) -> None: - np.random.seed(seed) - torch.manual_seed(seed) - random.seed(seed) diff --git a/pytorch3d/projects/implicitron_trainer/tests/__init__.py b/pytorch3d/projects/implicitron_trainer/tests/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/tests/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/projects/implicitron_trainer/tests/experiment.yaml b/pytorch3d/projects/implicitron_trainer/tests/experiment.yaml deleted file mode 100644 index e0394f2207ee4c01dc0a8179d0a0ceeb4ddcbbda..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/tests/experiment.yaml +++ /dev/null @@ -1,1243 +0,0 @@ -data_source_class_type: ImplicitronDataSource -model_factory_class_type: ImplicitronModelFactory -optimizer_factory_class_type: ImplicitronOptimizerFactory -training_loop_class_type: ImplicitronTrainingLoop -seed: 42 -detect_anomaly: false -exp_dir: ./data/default_experiment/ -hydra: - run: - dir: . - output_subdir: null - mode: RUN -data_source_ImplicitronDataSource_args: - dataset_map_provider_class_type: ??? - data_loader_map_provider_class_type: SequenceDataLoaderMapProvider - dataset_map_provider_BlenderDatasetMapProvider_args: - base_dir: ??? - object_name: ??? - path_manager_factory_class_type: PathManagerFactory - n_known_frames_for_test: null - path_manager_factory_PathManagerFactory_args: - silence_logs: true - dataset_map_provider_JsonIndexDatasetMapProvider_args: - category: ??? - task_str: singlesequence - dataset_root: '' - n_frames_per_sequence: -1 - test_on_train: false - restrict_sequence_name: [] - test_restrict_sequence_id: -1 - assert_single_seq: false - only_test_set: false - dataset_class_type: JsonIndexDataset - path_manager_factory_class_type: PathManagerFactory - dataset_JsonIndexDataset_args: - limit_to: 0 - limit_sequences_to: 0 - exclude_sequence: [] - limit_category_to: [] - load_images: true - load_depths: true - load_depth_masks: true - load_masks: true - load_point_clouds: false - max_points: 0 - mask_images: false - mask_depths: false - image_height: 800 - image_width: 800 - box_crop: true - box_crop_mask_thr: 0.4 - box_crop_context: 0.3 - remove_empty_masks: true - seed: 0 - sort_frames: false - path_manager_factory_PathManagerFactory_args: - silence_logs: true - dataset_map_provider_JsonIndexDatasetMapProviderV2_args: - category: ??? - subset_name: ??? - dataset_root: '' - test_on_train: false - only_test_set: false - load_eval_batches: true - num_load_workers: 4 - n_known_frames_for_test: 0 - dataset_class_type: JsonIndexDataset - path_manager_factory_class_type: PathManagerFactory - dataset_JsonIndexDataset_args: - limit_to: 0 - limit_sequences_to: 0 - pick_sequence: [] - exclude_sequence: [] - limit_category_to: [] - load_images: true - load_depths: true - load_depth_masks: true - load_masks: true - load_point_clouds: false - max_points: 0 - mask_images: false - mask_depths: false - image_height: 800 - image_width: 800 - box_crop: true - box_crop_mask_thr: 0.4 - box_crop_context: 0.3 - remove_empty_masks: true - n_frames_per_sequence: -1 - seed: 0 - sort_frames: false - path_manager_factory_PathManagerFactory_args: - silence_logs: true - dataset_map_provider_LlffDatasetMapProvider_args: - base_dir: ??? - object_name: ??? - path_manager_factory_class_type: PathManagerFactory - n_known_frames_for_test: null - path_manager_factory_PathManagerFactory_args: - silence_logs: true - downscale_factor: 4 - dataset_map_provider_RenderedMeshDatasetMapProvider_args: - num_views: 40 - data_file: null - azimuth_range: 180.0 - distance: 2.7 - resolution: 128 - use_point_light: true - gpu_idx: 0 - path_manager_factory_class_type: PathManagerFactory - path_manager_factory_PathManagerFactory_args: - silence_logs: true - data_loader_map_provider_SequenceDataLoaderMapProvider_args: - batch_size: 1 - num_workers: 0 - dataset_length_train: 0 - dataset_length_val: 0 - dataset_length_test: 0 - train_conditioning_type: SAME - val_conditioning_type: SAME - test_conditioning_type: KNOWN - images_per_seq_options: [] - sample_consecutive_frames: false - consecutive_frames_max_gap: 0 - consecutive_frames_max_gap_seconds: 0.1 - data_loader_map_provider_SimpleDataLoaderMapProvider_args: - batch_size: 1 - num_workers: 0 - dataset_length_train: 0 - dataset_length_val: 0 - dataset_length_test: 0 - data_loader_map_provider_TrainEvalDataLoaderMapProvider_args: - batch_size: 1 - num_workers: 0 - dataset_length_train: 0 - dataset_length_val: 0 - dataset_length_test: 0 - train_conditioning_type: SAME - val_conditioning_type: SAME - test_conditioning_type: KNOWN - images_per_seq_options: [] - sample_consecutive_frames: false - consecutive_frames_max_gap: 0 - consecutive_frames_max_gap_seconds: 0.1 -model_factory_ImplicitronModelFactory_args: - resume: true - model_class_type: GenericModel - resume_epoch: -1 - force_resume: false - model_GenericModel_args: - log_vars: - - loss_rgb_psnr_fg - - loss_rgb_psnr - - loss_rgb_mse - - loss_rgb_huber - - loss_depth_abs - - loss_depth_abs_fg - - loss_mask_neg_iou - - loss_mask_bce - - loss_mask_beta_prior - - loss_eikonal - - loss_density_tv - - loss_depth_neg_penalty - - loss_autodecoder_norm - - loss_prev_stage_rgb_mse - - loss_prev_stage_rgb_psnr_fg - - loss_prev_stage_rgb_psnr - - loss_prev_stage_mask_bce - - objective - - epoch - - sec/it - mask_images: true - mask_depths: true - render_image_width: 400 - render_image_height: 400 - mask_threshold: 0.5 - output_rasterized_mc: false - bg_color: - - 0.0 - - 0.0 - - 0.0 - num_passes: 1 - chunk_size_grid: 4096 - render_features_dimensions: 3 - tqdm_trigger_threshold: 16 - n_train_target_views: 1 - sampling_mode_training: mask_sample - sampling_mode_evaluation: full_grid - global_encoder_class_type: null - raysampler_class_type: AdaptiveRaySampler - renderer_class_type: MultiPassEmissionAbsorptionRenderer - image_feature_extractor_class_type: null - view_pooler_enabled: false - implicit_function_class_type: NeuralRadianceFieldImplicitFunction - view_metrics_class_type: ViewMetrics - regularization_metrics_class_type: RegularizationMetrics - loss_weights: - loss_rgb_mse: 1.0 - loss_prev_stage_rgb_mse: 1.0 - loss_mask_bce: 0.0 - loss_prev_stage_mask_bce: 0.0 - global_encoder_HarmonicTimeEncoder_args: - n_harmonic_functions: 10 - append_input: true - time_divisor: 1.0 - global_encoder_SequenceAutodecoder_args: - autodecoder_args: - encoding_dim: 0 - n_instances: 1 - init_scale: 1.0 - ignore_input: false - raysampler_AdaptiveRaySampler_args: - n_pts_per_ray_training: 64 - n_pts_per_ray_evaluation: 64 - n_rays_per_image_sampled_from_mask: 1024 - n_rays_total_training: null - stratified_point_sampling_training: true - stratified_point_sampling_evaluation: false - cast_ray_bundle_as_cone: false - scene_extent: 8.0 - scene_center: - - 0.0 - - 0.0 - - 0.0 - raysampler_NearFarRaySampler_args: - n_pts_per_ray_training: 64 - n_pts_per_ray_evaluation: 64 - n_rays_per_image_sampled_from_mask: 1024 - n_rays_total_training: null - stratified_point_sampling_training: true - stratified_point_sampling_evaluation: false - cast_ray_bundle_as_cone: false - min_depth: 0.1 - max_depth: 8.0 - renderer_LSTMRenderer_args: - num_raymarch_steps: 10 - init_depth: 17.0 - init_depth_noise_std: 0.0005 - hidden_size: 16 - n_feature_channels: 256 - bg_color: null - verbose: false - renderer_MultiPassEmissionAbsorptionRenderer_args: - raymarcher_class_type: EmissionAbsorptionRaymarcher - n_pts_per_ray_fine_training: 64 - n_pts_per_ray_fine_evaluation: 64 - stratified_sampling_coarse_training: true - stratified_sampling_coarse_evaluation: false - append_coarse_samples_to_fine: true - density_noise_std_train: 0.0 - return_weights: false - blurpool_weights: false - sample_pdf_eps: 1.0e-05 - raymarcher_CumsumRaymarcher_args: - surface_thickness: 1 - bg_color: - - 0.0 - replicate_last_interval: false - background_opacity: 0.0 - density_relu: true - blend_output: false - raymarcher_EmissionAbsorptionRaymarcher_args: - surface_thickness: 1 - bg_color: - - 0.0 - replicate_last_interval: false - background_opacity: 10000000000.0 - density_relu: true - blend_output: false - renderer_SignedDistanceFunctionRenderer_args: - ray_normal_coloring_network_args: - feature_vector_size: 3 - mode: idr - d_in: 9 - d_out: 3 - dims: - - 512 - - 512 - - 512 - - 512 - weight_norm: true - n_harmonic_functions_dir: 0 - pooled_feature_dim: 0 - bg_color: - - 0.0 - soft_mask_alpha: 50.0 - ray_tracer_args: - sdf_threshold: 5.0e-05 - line_search_step: 0.5 - line_step_iters: 1 - sphere_tracing_iters: 10 - n_steps: 100 - n_secant_steps: 8 - image_feature_extractor_ResNetFeatureExtractor_args: - name: resnet34 - pretrained: true - stages: - - 1 - - 2 - - 3 - - 4 - normalize_image: true - image_rescale: 0.16 - first_max_pool: true - proj_dim: 32 - l2_norm: true - add_masks: true - add_images: true - global_average_pool: false - feature_rescale: 1.0 - view_pooler_args: - feature_aggregator_class_type: AngleWeightedReductionFeatureAggregator - view_sampler_args: - masked_sampling: false - sampling_mode: bilinear - feature_aggregator_AngleWeightedIdentityFeatureAggregator_args: - exclude_target_view: true - exclude_target_view_mask_features: true - concatenate_output: true - weight_by_ray_angle_gamma: 1.0 - min_ray_angle_weight: 0.1 - feature_aggregator_AngleWeightedReductionFeatureAggregator_args: - exclude_target_view: true - exclude_target_view_mask_features: true - concatenate_output: true - reduction_functions: - - AVG - - STD - weight_by_ray_angle_gamma: 1.0 - min_ray_angle_weight: 0.1 - feature_aggregator_IdentityFeatureAggregator_args: - exclude_target_view: true - exclude_target_view_mask_features: true - concatenate_output: true - feature_aggregator_ReductionFeatureAggregator_args: - exclude_target_view: true - exclude_target_view_mask_features: true - concatenate_output: true - reduction_functions: - - AVG - - STD - implicit_function_IdrFeatureField_args: - d_in: 3 - d_out: 1 - dims: - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - geometric_init: true - bias: 1.0 - skip_in: [] - weight_norm: true - n_harmonic_functions_xyz: 0 - pooled_feature_dim: 0 - implicit_function_NeRFormerImplicitFunction_args: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_dir: 128 - input_xyz: true - xyz_ray_dir_in_camera_coords: false - use_integrated_positional_encoding: false - transformer_dim_down_factor: 2.0 - n_hidden_neurons_xyz: 80 - n_layers_xyz: 2 - append_xyz: - - 1 - implicit_function_NeuralRadianceFieldImplicitFunction_args: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_dir: 128 - input_xyz: true - xyz_ray_dir_in_camera_coords: false - use_integrated_positional_encoding: false - transformer_dim_down_factor: 1.0 - n_hidden_neurons_xyz: 256 - n_layers_xyz: 8 - append_xyz: - - 5 - implicit_function_SRNHyperNetImplicitFunction_args: - hypernet_args: - n_harmonic_functions: 3 - n_hidden_units: 256 - n_layers: 2 - n_hidden_units_hypernet: 256 - n_layers_hypernet: 1 - in_features: 3 - out_features: 256 - xyz_in_camera_coords: false - pixel_generator_args: - n_harmonic_functions: 4 - n_hidden_units: 256 - n_hidden_units_color: 128 - n_layers: 2 - in_features: 256 - out_features: 3 - ray_dir_in_camera_coords: false - implicit_function_SRNImplicitFunction_args: - raymarch_function_args: - n_harmonic_functions: 3 - n_hidden_units: 256 - n_layers: 2 - in_features: 3 - out_features: 256 - xyz_in_camera_coords: false - raymarch_function: null - pixel_generator_args: - n_harmonic_functions: 4 - n_hidden_units: 256 - n_hidden_units_color: 128 - n_layers: 2 - in_features: 256 - out_features: 3 - ray_dir_in_camera_coords: false - implicit_function_VoxelGridImplicitFunction_args: - harmonic_embedder_xyz_density_args: - n_harmonic_functions: 6 - omega_0: 1.0 - logspace: true - append_input: true - harmonic_embedder_xyz_color_args: - n_harmonic_functions: 6 - omega_0: 1.0 - logspace: true - append_input: true - harmonic_embedder_dir_color_args: - n_harmonic_functions: 6 - omega_0: 1.0 - logspace: true - append_input: true - decoder_density_class_type: MLPDecoder - decoder_color_class_type: MLPDecoder - use_multiple_streams: true - xyz_ray_dir_in_camera_coords: false - scaffold_calculating_epochs: [] - scaffold_resolution: - - 128 - - 128 - - 128 - scaffold_empty_space_threshold: 0.001 - scaffold_occupancy_chunk_size: -1 - scaffold_max_pool_kernel_size: 3 - scaffold_filter_points: true - volume_cropping_epochs: [] - voxel_grid_density_args: - voxel_grid_class_type: FullResolutionVoxelGrid - extents: - - 2.0 - - 2.0 - - 2.0 - translation: - - 0.0 - - 0.0 - - 0.0 - init_std: 0.1 - init_mean: 0.0 - hold_voxel_grid_as_parameters: true - param_groups: {} - voxel_grid_CPFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: 24 - basis_matrix: true - voxel_grid_FullResolutionVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - voxel_grid_VMFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: null - distribution_of_components: null - basis_matrix: true - voxel_grid_color_args: - voxel_grid_class_type: FullResolutionVoxelGrid - extents: - - 2.0 - - 2.0 - - 2.0 - translation: - - 0.0 - - 0.0 - - 0.0 - init_std: 0.1 - init_mean: 0.0 - hold_voxel_grid_as_parameters: true - param_groups: {} - voxel_grid_CPFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: 24 - basis_matrix: true - voxel_grid_FullResolutionVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - voxel_grid_VMFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: null - distribution_of_components: null - basis_matrix: true - decoder_density_ElementwiseDecoder_args: - scale: 1.0 - shift: 0.0 - operation: IDENTITY - decoder_density_MLPDecoder_args: - param_groups: {} - network_args: - n_layers: 8 - output_dim: 256 - skip_dim: 39 - hidden_dim: 256 - input_skips: - - 5 - skip_affine_trans: false - last_layer_bias_init: null - last_activation: RELU - use_xavier_init: true - decoder_color_ElementwiseDecoder_args: - scale: 1.0 - shift: 0.0 - operation: IDENTITY - decoder_color_MLPDecoder_args: - param_groups: {} - network_args: - n_layers: 8 - output_dim: 256 - skip_dim: 39 - hidden_dim: 256 - input_skips: - - 5 - skip_affine_trans: false - last_layer_bias_init: null - last_activation: RELU - use_xavier_init: true - view_metrics_ViewMetrics_args: {} - regularization_metrics_RegularizationMetrics_args: {} - model_OverfitModel_args: - log_vars: - - loss_rgb_psnr_fg - - loss_rgb_psnr - - loss_rgb_mse - - loss_rgb_huber - - loss_depth_abs - - loss_depth_abs_fg - - loss_mask_neg_iou - - loss_mask_bce - - loss_mask_beta_prior - - loss_eikonal - - loss_density_tv - - loss_depth_neg_penalty - - loss_autodecoder_norm - - loss_prev_stage_rgb_mse - - loss_prev_stage_rgb_psnr_fg - - loss_prev_stage_rgb_psnr - - loss_prev_stage_mask_bce - - objective - - epoch - - sec/it - mask_images: true - mask_depths: true - render_image_width: 400 - render_image_height: 400 - mask_threshold: 0.5 - output_rasterized_mc: false - bg_color: - - 0.0 - - 0.0 - - 0.0 - chunk_size_grid: 4096 - render_features_dimensions: 3 - tqdm_trigger_threshold: 16 - n_train_target_views: 1 - sampling_mode_training: mask_sample - sampling_mode_evaluation: full_grid - global_encoder_class_type: null - raysampler_class_type: AdaptiveRaySampler - renderer_class_type: MultiPassEmissionAbsorptionRenderer - share_implicit_function_across_passes: false - implicit_function_class_type: NeuralRadianceFieldImplicitFunction - coarse_implicit_function_class_type: null - view_metrics_class_type: ViewMetrics - regularization_metrics_class_type: RegularizationMetrics - loss_weights: - loss_rgb_mse: 1.0 - loss_prev_stage_rgb_mse: 1.0 - loss_mask_bce: 0.0 - loss_prev_stage_mask_bce: 0.0 - global_encoder_HarmonicTimeEncoder_args: - n_harmonic_functions: 10 - append_input: true - time_divisor: 1.0 - global_encoder_SequenceAutodecoder_args: - autodecoder_args: - encoding_dim: 0 - n_instances: 1 - init_scale: 1.0 - ignore_input: false - raysampler_AdaptiveRaySampler_args: - n_pts_per_ray_training: 64 - n_pts_per_ray_evaluation: 64 - n_rays_per_image_sampled_from_mask: 1024 - n_rays_total_training: null - stratified_point_sampling_training: true - stratified_point_sampling_evaluation: false - cast_ray_bundle_as_cone: false - scene_extent: 8.0 - scene_center: - - 0.0 - - 0.0 - - 0.0 - raysampler_NearFarRaySampler_args: - n_pts_per_ray_training: 64 - n_pts_per_ray_evaluation: 64 - n_rays_per_image_sampled_from_mask: 1024 - n_rays_total_training: null - stratified_point_sampling_training: true - stratified_point_sampling_evaluation: false - cast_ray_bundle_as_cone: false - min_depth: 0.1 - max_depth: 8.0 - renderer_LSTMRenderer_args: - num_raymarch_steps: 10 - init_depth: 17.0 - init_depth_noise_std: 0.0005 - hidden_size: 16 - n_feature_channels: 256 - bg_color: null - verbose: false - renderer_MultiPassEmissionAbsorptionRenderer_args: - raymarcher_class_type: EmissionAbsorptionRaymarcher - n_pts_per_ray_fine_training: 64 - n_pts_per_ray_fine_evaluation: 64 - stratified_sampling_coarse_training: true - stratified_sampling_coarse_evaluation: false - append_coarse_samples_to_fine: true - density_noise_std_train: 0.0 - return_weights: false - blurpool_weights: false - sample_pdf_eps: 1.0e-05 - raymarcher_CumsumRaymarcher_args: - surface_thickness: 1 - bg_color: - - 0.0 - replicate_last_interval: false - background_opacity: 0.0 - density_relu: true - blend_output: false - raymarcher_EmissionAbsorptionRaymarcher_args: - surface_thickness: 1 - bg_color: - - 0.0 - replicate_last_interval: false - background_opacity: 10000000000.0 - density_relu: true - blend_output: false - renderer_SignedDistanceFunctionRenderer_args: - ray_normal_coloring_network_args: - feature_vector_size: 3 - mode: idr - d_in: 9 - d_out: 3 - dims: - - 512 - - 512 - - 512 - - 512 - weight_norm: true - n_harmonic_functions_dir: 0 - pooled_feature_dim: 0 - bg_color: - - 0.0 - soft_mask_alpha: 50.0 - ray_tracer_args: - sdf_threshold: 5.0e-05 - line_search_step: 0.5 - line_step_iters: 1 - sphere_tracing_iters: 10 - n_steps: 100 - n_secant_steps: 8 - implicit_function_IdrFeatureField_args: - d_in: 3 - d_out: 1 - dims: - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - geometric_init: true - bias: 1.0 - skip_in: [] - weight_norm: true - n_harmonic_functions_xyz: 0 - pooled_feature_dim: 0 - implicit_function_NeRFormerImplicitFunction_args: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_dir: 128 - input_xyz: true - xyz_ray_dir_in_camera_coords: false - use_integrated_positional_encoding: false - transformer_dim_down_factor: 2.0 - n_hidden_neurons_xyz: 80 - n_layers_xyz: 2 - append_xyz: - - 1 - implicit_function_NeuralRadianceFieldImplicitFunction_args: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_dir: 128 - input_xyz: true - xyz_ray_dir_in_camera_coords: false - use_integrated_positional_encoding: false - transformer_dim_down_factor: 1.0 - n_hidden_neurons_xyz: 256 - n_layers_xyz: 8 - append_xyz: - - 5 - implicit_function_SRNHyperNetImplicitFunction_args: - latent_dim_hypernet: 0 - hypernet_args: - n_harmonic_functions: 3 - n_hidden_units: 256 - n_layers: 2 - n_hidden_units_hypernet: 256 - n_layers_hypernet: 1 - in_features: 3 - out_features: 256 - xyz_in_camera_coords: false - pixel_generator_args: - n_harmonic_functions: 4 - n_hidden_units: 256 - n_hidden_units_color: 128 - n_layers: 2 - in_features: 256 - out_features: 3 - ray_dir_in_camera_coords: false - implicit_function_SRNImplicitFunction_args: - raymarch_function_args: - n_harmonic_functions: 3 - n_hidden_units: 256 - n_layers: 2 - in_features: 3 - out_features: 256 - xyz_in_camera_coords: false - raymarch_function: null - pixel_generator_args: - n_harmonic_functions: 4 - n_hidden_units: 256 - n_hidden_units_color: 128 - n_layers: 2 - in_features: 256 - out_features: 3 - ray_dir_in_camera_coords: false - implicit_function_VoxelGridImplicitFunction_args: - harmonic_embedder_xyz_density_args: - n_harmonic_functions: 6 - omega_0: 1.0 - logspace: true - append_input: true - harmonic_embedder_xyz_color_args: - n_harmonic_functions: 6 - omega_0: 1.0 - logspace: true - append_input: true - harmonic_embedder_dir_color_args: - n_harmonic_functions: 6 - omega_0: 1.0 - logspace: true - append_input: true - decoder_density_class_type: MLPDecoder - decoder_color_class_type: MLPDecoder - use_multiple_streams: true - xyz_ray_dir_in_camera_coords: false - scaffold_calculating_epochs: [] - scaffold_resolution: - - 128 - - 128 - - 128 - scaffold_empty_space_threshold: 0.001 - scaffold_occupancy_chunk_size: -1 - scaffold_max_pool_kernel_size: 3 - scaffold_filter_points: true - volume_cropping_epochs: [] - voxel_grid_density_args: - voxel_grid_class_type: FullResolutionVoxelGrid - extents: - - 2.0 - - 2.0 - - 2.0 - translation: - - 0.0 - - 0.0 - - 0.0 - init_std: 0.1 - init_mean: 0.0 - hold_voxel_grid_as_parameters: true - param_groups: {} - voxel_grid_CPFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: 24 - basis_matrix: true - voxel_grid_FullResolutionVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - voxel_grid_VMFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: null - distribution_of_components: null - basis_matrix: true - voxel_grid_color_args: - voxel_grid_class_type: FullResolutionVoxelGrid - extents: - - 2.0 - - 2.0 - - 2.0 - translation: - - 0.0 - - 0.0 - - 0.0 - init_std: 0.1 - init_mean: 0.0 - hold_voxel_grid_as_parameters: true - param_groups: {} - voxel_grid_CPFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: 24 - basis_matrix: true - voxel_grid_FullResolutionVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - voxel_grid_VMFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: null - distribution_of_components: null - basis_matrix: true - decoder_density_ElementwiseDecoder_args: - scale: 1.0 - shift: 0.0 - operation: IDENTITY - decoder_density_MLPDecoder_args: - param_groups: {} - network_args: - n_layers: 8 - output_dim: 256 - skip_dim: 39 - hidden_dim: 256 - input_skips: - - 5 - skip_affine_trans: false - last_layer_bias_init: null - last_activation: RELU - use_xavier_init: true - decoder_color_ElementwiseDecoder_args: - scale: 1.0 - shift: 0.0 - operation: IDENTITY - decoder_color_MLPDecoder_args: - param_groups: {} - network_args: - n_layers: 8 - output_dim: 256 - skip_dim: 39 - hidden_dim: 256 - input_skips: - - 5 - skip_affine_trans: false - last_layer_bias_init: null - last_activation: RELU - use_xavier_init: true - coarse_implicit_function_IdrFeatureField_args: - d_in: 3 - d_out: 1 - dims: - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - - 512 - geometric_init: true - bias: 1.0 - skip_in: [] - weight_norm: true - n_harmonic_functions_xyz: 0 - pooled_feature_dim: 0 - coarse_implicit_function_NeRFormerImplicitFunction_args: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_dir: 128 - input_xyz: true - xyz_ray_dir_in_camera_coords: false - use_integrated_positional_encoding: false - transformer_dim_down_factor: 2.0 - n_hidden_neurons_xyz: 80 - n_layers_xyz: 2 - append_xyz: - - 1 - coarse_implicit_function_NeuralRadianceFieldImplicitFunction_args: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_dir: 128 - input_xyz: true - xyz_ray_dir_in_camera_coords: false - use_integrated_positional_encoding: false - transformer_dim_down_factor: 1.0 - n_hidden_neurons_xyz: 256 - n_layers_xyz: 8 - append_xyz: - - 5 - coarse_implicit_function_SRNHyperNetImplicitFunction_args: - latent_dim_hypernet: 0 - hypernet_args: - n_harmonic_functions: 3 - n_hidden_units: 256 - n_layers: 2 - n_hidden_units_hypernet: 256 - n_layers_hypernet: 1 - in_features: 3 - out_features: 256 - xyz_in_camera_coords: false - pixel_generator_args: - n_harmonic_functions: 4 - n_hidden_units: 256 - n_hidden_units_color: 128 - n_layers: 2 - in_features: 256 - out_features: 3 - ray_dir_in_camera_coords: false - coarse_implicit_function_SRNImplicitFunction_args: - raymarch_function_args: - n_harmonic_functions: 3 - n_hidden_units: 256 - n_layers: 2 - in_features: 3 - out_features: 256 - xyz_in_camera_coords: false - raymarch_function: null - pixel_generator_args: - n_harmonic_functions: 4 - n_hidden_units: 256 - n_hidden_units_color: 128 - n_layers: 2 - in_features: 256 - out_features: 3 - ray_dir_in_camera_coords: false - coarse_implicit_function_VoxelGridImplicitFunction_args: - harmonic_embedder_xyz_density_args: - n_harmonic_functions: 6 - omega_0: 1.0 - logspace: true - append_input: true - harmonic_embedder_xyz_color_args: - n_harmonic_functions: 6 - omega_0: 1.0 - logspace: true - append_input: true - harmonic_embedder_dir_color_args: - n_harmonic_functions: 6 - omega_0: 1.0 - logspace: true - append_input: true - decoder_density_class_type: MLPDecoder - decoder_color_class_type: MLPDecoder - use_multiple_streams: true - xyz_ray_dir_in_camera_coords: false - scaffold_calculating_epochs: [] - scaffold_resolution: - - 128 - - 128 - - 128 - scaffold_empty_space_threshold: 0.001 - scaffold_occupancy_chunk_size: -1 - scaffold_max_pool_kernel_size: 3 - scaffold_filter_points: true - volume_cropping_epochs: [] - voxel_grid_density_args: - voxel_grid_class_type: FullResolutionVoxelGrid - extents: - - 2.0 - - 2.0 - - 2.0 - translation: - - 0.0 - - 0.0 - - 0.0 - init_std: 0.1 - init_mean: 0.0 - hold_voxel_grid_as_parameters: true - param_groups: {} - voxel_grid_CPFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: 24 - basis_matrix: true - voxel_grid_FullResolutionVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - voxel_grid_VMFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: null - distribution_of_components: null - basis_matrix: true - voxel_grid_color_args: - voxel_grid_class_type: FullResolutionVoxelGrid - extents: - - 2.0 - - 2.0 - - 2.0 - translation: - - 0.0 - - 0.0 - - 0.0 - init_std: 0.1 - init_mean: 0.0 - hold_voxel_grid_as_parameters: true - param_groups: {} - voxel_grid_CPFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: 24 - basis_matrix: true - voxel_grid_FullResolutionVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - voxel_grid_VMFactorizedVoxelGrid_args: - align_corners: true - padding: zeros - mode: bilinear - n_features: 1 - resolution_changes: - 0: - - 128 - - 128 - - 128 - n_components: null - distribution_of_components: null - basis_matrix: true - decoder_density_ElementwiseDecoder_args: - scale: 1.0 - shift: 0.0 - operation: IDENTITY - decoder_density_MLPDecoder_args: - param_groups: {} - network_args: - n_layers: 8 - output_dim: 256 - skip_dim: 39 - hidden_dim: 256 - input_skips: - - 5 - skip_affine_trans: false - last_layer_bias_init: null - last_activation: RELU - use_xavier_init: true - decoder_color_ElementwiseDecoder_args: - scale: 1.0 - shift: 0.0 - operation: IDENTITY - decoder_color_MLPDecoder_args: - param_groups: {} - network_args: - n_layers: 8 - output_dim: 256 - skip_dim: 39 - hidden_dim: 256 - input_skips: - - 5 - skip_affine_trans: false - last_layer_bias_init: null - last_activation: RELU - use_xavier_init: true - view_metrics_ViewMetrics_args: {} - regularization_metrics_RegularizationMetrics_args: {} -optimizer_factory_ImplicitronOptimizerFactory_args: - betas: - - 0.9 - - 0.999 - breed: Adam - exponential_lr_step_size: 250 - gamma: 0.1 - lr: 0.0005 - lr_policy: MultiStepLR - momentum: 0.9 - multistep_lr_milestones: [] - weight_decay: 0.0 - linear_exponential_lr_milestone: 200 - linear_exponential_start_gamma: 0.1 - foreach: true - group_learning_rates: {} -training_loop_ImplicitronTrainingLoop_args: - evaluator_class_type: ImplicitronEvaluator - evaluator_ImplicitronEvaluator_args: - is_multisequence: false - camera_difficulty_bin_breaks: - - 0.97 - - 0.98 - eval_only: false - max_epochs: 1000 - store_checkpoints: true - store_checkpoints_purge: 1 - test_interval: -1 - test_when_finished: false - validation_interval: 1 - clip_grad: 0.0 - metric_print_interval: 5 - visualize_interval: 1000 - visdom_env: '' - visdom_port: 8097 - visdom_server: http://127.0.0.1 diff --git a/pytorch3d/projects/implicitron_trainer/tests/test_experiment.py b/pytorch3d/projects/implicitron_trainer/tests/test_experiment.py deleted file mode 100644 index 486d2134aa3d8b664bf3ed00efa1df0a812aafa1..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/tests/test_experiment.py +++ /dev/null @@ -1,280 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import os -import tempfile -import unittest -from pathlib import Path - -import torch - -from hydra import compose, initialize_config_dir -from omegaconf import OmegaConf -from projects.implicitron_trainer.impl.optimizer_factory import ( - ImplicitronOptimizerFactory, -) - -from .. import experiment -from .utils import interactive_testing_requested, intercept_logs - -internal = os.environ.get("FB_TEST", False) - - -DATA_DIR = Path(__file__).resolve().parent -IMPLICITRON_CONFIGS_DIR = Path(__file__).resolve().parent.parent / "configs" -DEBUG: bool = False - -# TODO: -# - add enough files to skateboard_first_5 that this works on RE. -# - share common code with PyTorch3D tests? - - -def _parse_float_from_log(line): - return float(line.split()[-1]) - - -class TestExperiment(unittest.TestCase): - def setUp(self): - self.maxDiff = None - - def test_from_defaults(self): - # Test making minimal changes to the dataclass defaults. - if not interactive_testing_requested() or not internal: - return - - # Manually override config values. Note that this is not necessary out- - # side of the tests! - cfg = OmegaConf.structured(experiment.Experiment) - cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = ( - "JsonIndexDatasetMapProvider" - ) - dataset_args = ( - cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args - ) - dataloader_args = ( - cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args - ) - dataset_args.category = "skateboard" - dataset_args.test_restrict_sequence_id = 0 - dataset_args.dataset_root = "manifold://co3d/tree/extracted" - dataset_args.dataset_JsonIndexDataset_args.limit_sequences_to = 5 - dataset_args.dataset_JsonIndexDataset_args.image_height = 80 - dataset_args.dataset_JsonIndexDataset_args.image_width = 80 - dataloader_args.dataset_length_train = 1 - dataloader_args.dataset_length_val = 1 - cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2 - cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False - cfg.optimizer_factory_ImplicitronOptimizerFactory_args.multistep_lr_milestones = [ - 0, - 1, - ] - - if DEBUG: - experiment.dump_cfg(cfg) - with intercept_logs( - logger_name="projects.implicitron_trainer.impl.training_loop", - regexp="LR change!", - ) as intercepted_logs: - experiment_runner = experiment.Experiment(**cfg) - experiment_runner.run() - - # Make sure LR decreased on 0th and 1st epoch 10fold. - self.assertEqual(intercepted_logs[0].split()[-1], "5e-06") - - def test_exponential_lr(self): - # Test making minimal changes to the dataclass defaults. - if not interactive_testing_requested(): - return - cfg = OmegaConf.structured(experiment.Experiment) - cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = ( - "JsonIndexDatasetMapProvider" - ) - dataset_args = ( - cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args - ) - dataloader_args = ( - cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args - ) - dataset_args.category = "skateboard" - dataset_args.test_restrict_sequence_id = 0 - dataset_args.dataset_root = "manifold://co3d/tree/extracted" - dataset_args.dataset_JsonIndexDataset_args.limit_sequences_to = 5 - dataset_args.dataset_JsonIndexDataset_args.image_height = 80 - dataset_args.dataset_JsonIndexDataset_args.image_width = 80 - dataloader_args.dataset_length_train = 1 - dataloader_args.dataset_length_val = 1 - cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2 - cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False - cfg.optimizer_factory_ImplicitronOptimizerFactory_args.lr_policy = "Exponential" - cfg.optimizer_factory_ImplicitronOptimizerFactory_args.exponential_lr_step_size = ( - 2 - ) - - if DEBUG: - experiment.dump_cfg(cfg) - with intercept_logs( - logger_name="projects.implicitron_trainer.impl.training_loop", - regexp="LR change!", - ) as intercepted_logs: - experiment_runner = experiment.Experiment(**cfg) - experiment_runner.run() - - # Make sure we followed the exponential lr schedule with gamma=0.1, - # exponential_lr_step_size=2 -- so after two epochs, should - # decrease lr 10x to 5e-5. - self.assertEqual(intercepted_logs[0].split()[-1], "0.00015811388300841897") - self.assertEqual(intercepted_logs[1].split()[-1], "5e-05") - - def test_yaml_contents(self): - # Check that the default config values, defined by Experiment and its - # members, is what we expect it to be. - cfg = OmegaConf.structured(experiment.Experiment) - # the following removes the possible effect of env variables - ds_arg = cfg.data_source_ImplicitronDataSource_args - ds_arg.dataset_map_provider_JsonIndexDatasetMapProvider_args.dataset_root = "" - ds_arg.dataset_map_provider_JsonIndexDatasetMapProviderV2_args.dataset_root = "" - if "dataset_map_provider_SqlIndexDatasetMapProvider_args" in ds_arg: - del ds_arg.dataset_map_provider_SqlIndexDatasetMapProvider_args - cfg.training_loop_ImplicitronTrainingLoop_args.visdom_port = 8097 - yaml = OmegaConf.to_yaml(cfg, sort_keys=False) - if DEBUG: - (DATA_DIR / "experiment.yaml").write_text(yaml) - self.assertEqual(yaml, (DATA_DIR / "experiment.yaml").read_text()) - - def test_load_configs(self): - # Check that all the pre-prepared configs are valid. - config_files = [] - - for pattern in ( - "repro_singleseq*.yaml", - "repro_multiseq*.yaml", - "overfit_singleseq*.yaml", - ): - config_files.extend( - [ - f - for f in IMPLICITRON_CONFIGS_DIR.glob(pattern) - if not f.name.endswith("_base.yaml") - ] - ) - - for file in config_files: - with self.subTest(file.name): - with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)): - compose(file.name) - - def test_optimizer_factory(self): - model = torch.nn.Linear(2, 2) - - adam, sched = ImplicitronOptimizerFactory(breed="Adam")(0, model) - self.assertIsInstance(adam, torch.optim.Adam) - sgd, sched = ImplicitronOptimizerFactory(breed="SGD")(0, model) - self.assertIsInstance(sgd, torch.optim.SGD) - adagrad, sched = ImplicitronOptimizerFactory(breed="Adagrad")(0, model) - self.assertIsInstance(adagrad, torch.optim.Adagrad) - - -class TestNerfRepro(unittest.TestCase): - @unittest.skip("This test runs full blender training.") - def test_nerf_blender(self): - # Train vanilla NERF. - # Set env vars BLENDER_DATASET_ROOT and BLENDER_SINGLESEQ_CLASS first! - if not interactive_testing_requested(): - return - with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)): - cfg = compose(config_name="repro_singleseq_nerf_blender", overrides=[]) - experiment_runner = experiment.Experiment(**cfg) - experiment.dump_cfg(cfg) - experiment_runner.run() - - @unittest.skip("This test runs full llff training.") - def test_nerf_llff(self): - # Train vanilla NERF. - # Set env vars LLFF_DATASET_ROOT and LLFF_SINGLESEQ_CLASS first! - LLFF_SINGLESEQ_CLASS = os.environ["LLFF_SINGLESEQ_CLASS"] - if not interactive_testing_requested(): - return - with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)): - cfg = compose( - config_name=f"repro_singleseq_nerf_llff_{LLFF_SINGLESEQ_CLASS}", - overrides=[], - ) - experiment_runner = experiment.Experiment(**cfg) - experiment.dump_cfg(cfg) - experiment_runner.run() - - @unittest.skip("This test runs nerf training on co3d v2 - manyview.") - def test_nerf_co3dv2_manyview(self): - # Train NERF - if not interactive_testing_requested(): - return - with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)): - cfg = compose( - config_name="repro_singleseq_v2_nerf", - overrides=[], - ) - experiment_runner = experiment.Experiment(**cfg) - experiment.dump_cfg(cfg) - experiment_runner.run() - - @unittest.skip("This test runs nerformer training on co3d v2 - fewview.") - def test_nerformer_co3dv2_fewview(self): - # Train NeRFormer - if not interactive_testing_requested(): - return - with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)): - cfg = compose( - config_name="repro_multiseq_v2_nerformer", - overrides=[], - ) - experiment_runner = experiment.Experiment(**cfg) - experiment.dump_cfg(cfg) - experiment_runner.run() - - @unittest.skip("This test checks resuming of the NeRF training.") - def test_nerf_blender_resume(self): - # Train one train batch of NeRF, then resume for one more batch. - # Set env vars BLENDER_DATASET_ROOT and BLENDER_SINGLESEQ_CLASS first! - if not interactive_testing_requested(): - return - with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)): - with tempfile.TemporaryDirectory() as exp_dir: - cfg = compose(config_name="repro_singleseq_nerf_blender", overrides=[]) - cfg.exp_dir = exp_dir - - # set dataset len to 1 - - # fmt: off - ( - cfg - .data_source_ImplicitronDataSource_args - .data_loader_map_provider_SequenceDataLoaderMapProvider_args - .dataset_length_train - ) = 1 - # fmt: on - - # run for one epoch - cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 1 - experiment_runner = experiment.Experiment(**cfg) - experiment.dump_cfg(cfg) - experiment_runner.run() - - # update num epochs + 2, let the optimizer resume - cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 3 - experiment_runner = experiment.Experiment(**cfg) - experiment_runner.run() - - # start from scratch - cfg.model_factory_ImplicitronModelFactory_args.resume = False - experiment_runner = experiment.Experiment(**cfg) - experiment_runner.run() - - # force resume from epoch 1 - cfg.model_factory_ImplicitronModelFactory_args.resume = True - cfg.model_factory_ImplicitronModelFactory_args.force_resume = True - cfg.model_factory_ImplicitronModelFactory_args.resume_epoch = 1 - experiment_runner = experiment.Experiment(**cfg) - experiment_runner.run() diff --git a/pytorch3d/projects/implicitron_trainer/tests/test_optimizer_factory.py b/pytorch3d/projects/implicitron_trainer/tests/test_optimizer_factory.py deleted file mode 100644 index ef7517fe7269cbbc31701b12dcf838114148366f..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/tests/test_optimizer_factory.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import logging -import os -import unittest - -import torch -from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args - -from ..impl.optimizer_factory import ( - ImplicitronOptimizerFactory, - logger as factory_logger, -) - -internal = os.environ.get("FB_TEST", False) - - -class TestOptimizerFactory(unittest.TestCase): - def setUp(self) -> None: - torch.manual_seed(42) - expand_args_fields(ImplicitronOptimizerFactory) - - def _get_param_groups(self, model): - default_cfg = get_default_args(ImplicitronOptimizerFactory) - factory = ImplicitronOptimizerFactory(default_cfg) - oldlevel = factory_logger.level - factory_logger.setLevel(logging.ERROR) - out = factory._get_param_groups(model) - factory_logger.setLevel(oldlevel) - return out - - def _assert_allin(self, a, param_groups, key): - """ - Asserts that all the parameters in a are in the group - named by key. - """ - with self.subTest(f"Testing key {key}"): - b = param_groups[key] - for el in a: - if el not in b: - raise ValueError( - f"Element {el}\n\n from:\n\n {a}\n\n not in:\n\n {b}\n\n." - + f" Full param groups = \n\n{param_groups}" - ) - for el in b: - if el not in a: - raise ValueError( - f"Element {el}\n\n from:\n\n {b}\n\n not in:\n\n {a}\n\n." - + f" Full param groups = \n\n{param_groups}" - ) - - def test_default_param_group_assignment(self): - pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)] - na, nb = Node(params=[pa]), Node(params=[pb]) - root = Node(children=[na, nb], params=[pc]) - param_groups = self._get_param_groups(root) - self._assert_allin([pa, pb, pc], param_groups, "default") - - def test_member_overrides_default_param_group_assignment(self): - pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)] - na, nb = Node(params=[pa]), Node(params=[pb]) - root = Node(children=[na, nb], params=[pc], param_groups={"m1": "pb"}) - param_groups = self._get_param_groups(root) - self._assert_allin([pa, pc], param_groups, "default") - self._assert_allin([pb], param_groups, "pb") - - def test_self_overrides_member_param_group_assignment(self): - pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)] - na, nb = Node(params=[pa]), Node(params=[pb], param_groups={"self": "pb_self"}) - root = Node(children=[na, nb], params=[pc], param_groups={"m1": "pb_member"}) - param_groups = self._get_param_groups(root) - self._assert_allin([pa, pc], param_groups, "default") - self._assert_allin([pb], param_groups, "pb_self") - assert len(param_groups["pb_member"]) == 0, param_groups - - def test_param_overrides_self_param_group_assignment(self): - pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)] - na, nb = Node(params=[pa]), Node( - params=[pb], param_groups={"self": "pb_self", "p1": "pb_param"} - ) - root = Node(children=[na, nb], params=[pc], param_groups={"m1": "pb_member"}) - param_groups = self._get_param_groups(root) - self._assert_allin([pa, pc], param_groups, "default") - self._assert_allin([pb], param_groups, "pb_self") - assert len(param_groups["pb_member"]) == 0, param_groups - - def test_no_param_groups_defined(self): - pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)] - na, nb = Node(params=[pa]), Node(params=[pb]) - root = Node(children=[na, nb], params=[pc]) - param_groups = self._get_param_groups(root) - self._assert_allin([pa, pb, pc], param_groups, "default") - - def test_double_dotted(self): - pa, pb = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(2)] - na = Node(params=[pa, pb]) - nb = Node(children=[na]) - root = Node(children=[nb], param_groups={"m0.m0.p0": "X", "m0.m0": "Y"}) - param_groups = self._get_param_groups(root) - self._assert_allin([pa], param_groups, "X") - self._assert_allin([pb], param_groups, "Y") - - def test_tree_param_groups_defined(self): - """ - Test generic tree assignment. - - A0 - |--------------------------- - | | | - Bb M J- - |----- |------- - | | | | - C Ddg K Ll - |-------------- - | | | | - E4 Ff G H- - - All nodes have one parameter. Character next to the capital - letter means they have added something to their `parameter_groups`: - - small letter same as capital means self is set to that letter - - small letter different then capital means that member is set - (the one that is named like that) - - number means parameter's parameter_group is set like that - - "-" means it does not have `parameter_groups` member - """ - p = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(12)] - L = Node(params=[p[11]], param_groups={"self": "l"}) - K = Node(params=[p[10]], param_groups={}) - J = Node(params=[p[9]], param_groups=None, children=[K, L]) - M = Node(params=[p[8]], param_groups={}) - - E = Node(params=[p[4]], param_groups={"p0": "4"}) - F = Node(params=[p[5]], param_groups={"self": "f"}) - G = Node(params=[p[6]], param_groups={}) - H = Node(params=[p[7]], param_groups=None) - - D = Node( - params=[p[3]], param_groups={"self": "d", "m2": "g"}, children=[E, F, G, H] - ) - C = Node(params=[p[2]], param_groups={}) - - B = Node(params=[p[1]], param_groups={"self": "b"}, children=[C, D]) - - A = Node(params=[p[0]], param_groups={"p0": "0"}, children=[B, M, J]) - - param_groups = self._get_param_groups(A) - - # if parts of the group belong to two different categories assert is repeated - # parameter level - self._assert_allin([p[0]], param_groups, "0") - self._assert_allin([p[4]], param_groups, "4") - # self level - self._assert_allin([p[5]], param_groups, "f") - self._assert_allin([p[11]], param_groups, "l") - self._assert_allin([p[2], p[1]], param_groups, "b") - self._assert_allin([p[7], p[3]], param_groups, "d") - # member level - self._assert_allin([p[6]], param_groups, "g") - # inherit level - self._assert_allin([p[7], p[3]], param_groups, "d") - self._assert_allin([p[2], p[1]], param_groups, "b") - # default level - self._assert_allin([p[8], p[9], p[10]], param_groups, "default") - - -class Node(torch.nn.Module): - def __init__(self, children=(), params=(), param_groups=None): - super().__init__() - for i, child in enumerate(children): - self.add_module("m" + str(i), child) - for i, param in enumerate(params): - setattr(self, "p" + str(i), param) - if param_groups is not None: - self.param_groups = param_groups - - def __str__(self): - return ( - "modules:\n" + str(self._modules) + "\nparameters\n" + str(self._parameters) - ) diff --git a/pytorch3d/projects/implicitron_trainer/tests/test_visualize.py b/pytorch3d/projects/implicitron_trainer/tests/test_visualize.py deleted file mode 100644 index d414a05d95af2870de7de103969ebbd266939fa1..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/tests/test_visualize.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import os -import unittest - -from .. import visualize_reconstruction -from .utils import interactive_testing_requested - -internal = os.environ.get("FB_TEST", False) - - -class TestVisualize(unittest.TestCase): - def test_from_defaults(self): - if not interactive_testing_requested(): - return - checkpoint_dir = os.environ["exp_dir"] - argv = [ - f"exp_dir={checkpoint_dir}", - "n_eval_cameras=40", - "render_size=[64,64]", - "video_size=[256,256]", - ] - visualize_reconstruction.main(argv) diff --git a/pytorch3d/projects/implicitron_trainer/tests/utils.py b/pytorch3d/projects/implicitron_trainer/tests/utils.py deleted file mode 100644 index 4b6d84ae575aeb6cf8b0411600b7e55e51fce3a9..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/tests/utils.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import contextlib -import logging -import os -import re - - -@contextlib.contextmanager -def intercept_logs(logger_name: str, regexp: str): - # Intercept logs that match a regexp, from a given logger. - intercepted_messages = [] - logger = logging.getLogger(logger_name) - - class LoggerInterceptor(logging.Filter): - def filter(self, record): - message = record.getMessage() - if re.search(regexp, message): - intercepted_messages.append(message) - return True - - interceptor = LoggerInterceptor() - logger.addFilter(interceptor) - try: - yield intercepted_messages - finally: - logger.removeFilter(interceptor) - - -def interactive_testing_requested() -> bool: - """ - Certain tests are only useful when run interactively, and so are not regularly run. - These are activated by this funciton returning True, which the user requests by - setting the environment variable `PYTORCH3D_INTERACTIVE_TESTING` to 1. - """ - return os.environ.get("PYTORCH3D_INTERACTIVE_TESTING", "") == "1" diff --git a/pytorch3d/projects/implicitron_trainer/visualize_reconstruction.py b/pytorch3d/projects/implicitron_trainer/visualize_reconstruction.py deleted file mode 100644 index 618d1aa61e312594f7ebf89a4bb4a82e8a8e21df..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/implicitron_trainer/visualize_reconstruction.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -""" -Script to visualize a previously trained model. Example call: - - pytorch3d_implicitron_visualizer \ - exp_dir='./exps/checkpoint_dir' visdom_show_preds=True visdom_port=8097 \ - n_eval_cameras=40 render_size="[64,64]" video_size="[256,256]" -""" - -import os -import sys -from typing import Optional, Tuple - -import numpy as np -import torch -from omegaconf import DictConfig, OmegaConf -from pytorch3d.implicitron.models.visualization.render_flyaround import render_flyaround -from pytorch3d.implicitron.tools.config import enable_get_default_args, get_default_args - -from .experiment import Experiment - - -def visualize_reconstruction( - exp_dir: str = "", - restrict_sequence_name: Optional[str] = None, - output_directory: Optional[str] = None, - render_size: Tuple[int, int] = (512, 512), - video_size: Optional[Tuple[int, int]] = None, - split: str = "train", - n_source_views: int = 9, - n_eval_cameras: int = 40, - visdom_show_preds: bool = False, - visdom_server: str = "http://127.0.0.1", - visdom_port: int = 8097, - visdom_env: Optional[str] = None, - **render_flyaround_kwargs, -) -> None: - """ - Given an `exp_dir` containing a trained Implicitron model, generates videos consisting - of renderes of sequences from the dataset used to train and evaluate the trained - Implicitron model. - - Args: - exp_dir: Implicitron experiment directory. - restrict_sequence_name: If set, defines the list of sequences to visualize. - output_directory: If set, defines a custom directory to output visualizations to. - render_size: The size (HxW) of the generated renders. - video_size: The size (HxW) of the output video. - split: The dataset split to use for visualization. - Can be "train" / "val" / "test". - n_source_views: The number of source views added to each rendered batch. These - views are required inputs for models such as NeRFormer / NeRF-WCE. - n_eval_cameras: The number of cameras each fly-around trajectory. - visdom_show_preds: If `True`, outputs visualizations to visdom. - visdom_server: The address of the visdom server. - visdom_port: The port of the visdom server. - visdom_env: If set, defines a custom name for the visdom environment. - render_flyaround_kwargs: Keyword arguments passed to the invoked `render_flyaround` - function (see `pytorch3d.implicitron.models.visualization.render_flyaround`). - """ - - # In case an output directory is specified use it. If no output_directory - # is specified create a vis folder inside the experiment directory - if output_directory is None: - output_directory = os.path.join(exp_dir, "vis") - os.makedirs(output_directory, exist_ok=True) - - # Set the random seeds - torch.manual_seed(0) - np.random.seed(0) - - # Get the config from the experiment_directory, - # and overwrite relevant fields - config = _get_config_from_experiment_directory(exp_dir) - config.exp_dir = exp_dir - # important so that the CO3D dataset gets loaded in full - data_source_args = config.data_source_ImplicitronDataSource_args - if "dataset_map_provider_JsonIndexDatasetMapProvider_args" in data_source_args: - dataset_args = ( - data_source_args.dataset_map_provider_JsonIndexDatasetMapProvider_args - ) - dataset_args.test_on_train = False - if restrict_sequence_name is not None: - dataset_args.restrict_sequence_name = restrict_sequence_name - - # Set the rendering image size - model_factory_args = config.model_factory_ImplicitronModelFactory_args - model_factory_args.force_resume = True - model_args = model_factory_args.model_GenericModel_args - model_args.render_image_width = render_size[0] - model_args.render_image_height = render_size[1] - - # Load the previously trained model - experiment = Experiment(**config) - model = experiment.model_factory(exp_dir=exp_dir) - device = torch.device("cuda") - model.to(device) - model.eval() - - # Setup the dataset - data_source = experiment.data_source - dataset_map, _ = data_source.get_datasets_and_dataloaders() - dataset = dataset_map[split] - if dataset is None: - raise ValueError(f"{split} dataset not provided") - - if visdom_env is None: - visdom_env = ( - "visualizer_" + config.training_loop_ImplicitronTrainingLoop_args.visdom_env - ) - - # iterate over the sequences in the dataset - for sequence_name in dataset.sequence_names(): - with torch.no_grad(): - render_kwargs = { - "dataset": dataset, - "sequence_name": sequence_name, - "model": model, - "output_video_path": os.path.join(output_directory, "video"), - "n_source_views": n_source_views, - "visdom_show_preds": visdom_show_preds, - "n_flyaround_poses": n_eval_cameras, - "visdom_server": visdom_server, - "visdom_port": visdom_port, - "visdom_environment": visdom_env, - "video_resize": video_size, - "device": device, - **render_flyaround_kwargs, - } - render_flyaround(**render_kwargs) - - -enable_get_default_args(visualize_reconstruction) - - -def _get_config_from_experiment_directory(experiment_directory) -> DictConfig: - cfg_file = os.path.join(experiment_directory, "expconfig.yaml") - config = OmegaConf.load(cfg_file) - # pyre-ignore[7] - return OmegaConf.merge(get_default_args(Experiment), config) - - -def main(argv=sys.argv) -> None: - # automatically parses arguments of visualize_reconstruction - cfg = OmegaConf.create(get_default_args(visualize_reconstruction)) - cfg.update(OmegaConf.from_cli(argv)) - with torch.no_grad(): - visualize_reconstruction(**cfg) - - -if __name__ == "__main__": - main() diff --git a/pytorch3d/projects/nerf/.gitignore b/pytorch3d/projects/nerf/.gitignore deleted file mode 100644 index 07f207f0f7bb99be4841fd6b3a7818d251ccc6d3..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -checkpoints -outputs -data/*.png -data/*.pth -data/*_license.txt diff --git a/pytorch3d/projects/nerf/README.md b/pytorch3d/projects/nerf/README.md deleted file mode 100644 index a103aab47d21ebfa7bfd65387a68f83f0a1c8fbc..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/README.md +++ /dev/null @@ -1,91 +0,0 @@ -Neural Radiance Fields in PyTorch3D -=================================== - -This project implements the Neural Radiance Fields (NeRF) from [1]. - - - - -Installation ------------- -1) [Install PyTorch3D](https://github.com/facebookresearch/pytorch3d/blob/main/INSTALL.md) - -2) Install other dependencies: - - [`visdom`](https://github.com/facebookresearch/visdom) - - [`hydra`](https://github.com/facebookresearch/hydra) - - [`Pillow`](https://python-pillow.org/) - - [`requests`](https://pypi.org/project/requests/) - - E.g. using `pip`: - ``` - pip install visdom - pip install hydra-core --upgrade - pip install Pillow - pip install requests - ``` - - Exporting videos further requires a working `ffmpeg`. - -Training NeRF -------------- -``` -python ./train_nerf.py --config-name lego -``` -will train the model from [1] on the Lego dataset. - -Note that the script outputs visualizations to `Visdom`. In order to enable this, make sure to start the visdom server (before launching the training) with the following command: -``` -python -m visdom.server -``` -Note that training on the "lego" scene takes roughly 24 hours on a single Tesla V100. - -#### Training data -Note that the `train_nerf.py` script will automatically download the relevant dataset in case it is missing. - -Testing NeRF ------------- -``` -python ./test_nerf.py --config-name lego -``` -Will load a trained model from the `./checkpoints` directory and evaluate it on the test split of the corresponding dataset (Lego in the case above). - -### Exporting multi-view video of the radiance field -Furthermore, the codebase supports generating videos of the neural radiance field. -The following generates a turntable video of the Lego scene: -``` -python ./test_nerf.py --config-name=lego test.mode='export_video' -``` -Note that this requires a working `ffmpeg` for generating the video from exported frames. - -Additionally, note that generation of the video in the original resolution is quite slow. In order to speed up the process, one can decrease the resolution of the output video by setting the `data.image_size` flag: -``` -python ./test_nerf.py --config-name=lego test.mode='export_video' data.image_size="[128,128]" -``` -This will generate the video in a lower `128 x 128` resolution. - - -Training & testing on other datasets ------------------------------------- -Currently we support the following datasets: -- lego `python ./train_nerf.py --config-name lego` -- fern `python ./train_nerf.py --config-name fern` -- pt3logo `python ./train_nerf.py --config-name pt3logo` - -The dataset files are located in the following public S3 bucket: -https://dl.fbaipublicfiles.com/pytorch3d_nerf_data - -Attribution: `lego` and `fern` are data from the original code release of [1] in https://drive.google.com/drive/folders/128yBriW1IG_3NJ5Rp7APSTZsJqdJdfc1, which are hosted under the CC-BY license (https://creativecommons.org/licenses/by/4.0/) The S3 bucket files contains the same images while the camera matrices have been adjusted to follow the PyTorch3D convention. - -#### Quantitative results -Below are the comparisons between our implementation and the official [`TensorFlow code`](https://github.com/bmild/nerf). The speed is measured on NVidia Quadro GP100. -``` -+----------------+------------------+------------------+-----------------+ -| Implementation | Lego: test PSNR | Fern: test PSNR | training speed | -+----------------+------------------+------------------+-----------------+ -| TF (official) | 31.0 | 27.5 | 0.24 sec/it | -| PyTorch3D | 32.7 | 27.9 | 0.18 sec/it | -+----------------+------------------+------------------+-----------------+ -``` - -#### References -[1] Ben Mildenhall and Pratul P. Srinivasan and Matthew Tancik and Jonathan T. Barron and Ravi Ramamoorthi and Ren Ng, NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis, ECCV2020 diff --git a/pytorch3d/projects/nerf/__init__.py b/pytorch3d/projects/nerf/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/projects/nerf/configs/fern.yaml b/pytorch3d/projects/nerf/configs/fern.yaml deleted file mode 100644 index 1beb67211ca43d44f3b92f2ed0c0698205456136..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/configs/fern.yaml +++ /dev/null @@ -1,45 +0,0 @@ -seed: 3 -resume: True -stats_print_interval: 10 -validation_epoch_interval: 150 -checkpoint_epoch_interval: 150 -checkpoint_path: 'checkpoints/fern_pt3d.pth' -data: - dataset_name: 'fern' - image_size: [378, 504] # [height, width] - precache_rays: True -test: - mode: 'evaluation' - trajectory_type: 'figure_eight' - up: [0.0, 1.0, 0.0] - scene_center: [0.0, 0.0, -2.0] - n_frames: 100 - fps: 20 - trajectory_scale: 1.0 -optimizer: - max_epochs: 37500 - lr: 0.0005 - lr_scheduler_step_size: 12500 - lr_scheduler_gamma: 0.1 -visualization: - history_size: 10 - visdom: True - visdom_server: 'localhost' - visdom_port: 8097 - visdom_env: 'nerf_pytorch3d' -raysampler: - n_pts_per_ray: 64 - n_pts_per_ray_fine: 64 - n_rays_per_image: 1024 - min_depth: 1.2 - max_depth: 6.28 - stratified: True - stratified_test: False - chunk_size_test: 6000 -implicit_function: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_xyz: 256 - n_hidden_neurons_dir: 128 - density_noise_std: 0.0 - n_layers_xyz: 8 diff --git a/pytorch3d/projects/nerf/configs/lego.yaml b/pytorch3d/projects/nerf/configs/lego.yaml deleted file mode 100644 index 5a8870a7f6e7754843706f79663c8187f227b7f4..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/configs/lego.yaml +++ /dev/null @@ -1,45 +0,0 @@ -seed: 3 -resume: True -stats_print_interval: 10 -validation_epoch_interval: 30 -checkpoint_epoch_interval: 30 -checkpoint_path: 'checkpoints/lego_pt3d.pth' -data: - dataset_name: 'lego' - image_size: [800, 800] # [height, width] - precache_rays: True -test: - mode: 'evaluation' - trajectory_type: 'circular' - up: [0.0, 0.0, 1.0] - scene_center: [0.0, 0.0, 0.0] - n_frames: 100 - fps: 20 - trajectory_scale: 0.2 -optimizer: - max_epochs: 20000 - lr: 0.0005 - lr_scheduler_step_size: 5000 - lr_scheduler_gamma: 0.1 -visualization: - history_size: 10 - visdom: True - visdom_server: 'localhost' - visdom_port: 8097 - visdom_env: 'nerf_pytorch3d' -raysampler: - n_pts_per_ray: 64 - n_pts_per_ray_fine: 64 - n_rays_per_image: 1024 - min_depth: 2.0 - max_depth: 6.0 - stratified: True - stratified_test: False - chunk_size_test: 6000 -implicit_function: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_xyz: 256 - n_hidden_neurons_dir: 128 - density_noise_std: 0.0 - n_layers_xyz: 8 diff --git a/pytorch3d/projects/nerf/configs/pt3logo.yaml b/pytorch3d/projects/nerf/configs/pt3logo.yaml deleted file mode 100644 index db4a640ab2c10329a29acf7bd33f34bebb46ede7..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/configs/pt3logo.yaml +++ /dev/null @@ -1,45 +0,0 @@ -seed: 3 -resume: True -stats_print_interval: 10 -validation_epoch_interval: 30 -checkpoint_epoch_interval: 30 -checkpoint_path: 'checkpoints/pt3logo_pt3d.pth' -data: - dataset_name: 'pt3logo' - image_size: [512, 1024] # [height, width] - precache_rays: True -test: - mode: 'export_video' - trajectory_type: 'figure_eight' - up: [0.0, -1.0, 0.0] - scene_center: [0.0, 0.0, 0.0] - n_frames: 100 - fps: 20 - trajectory_scale: 0.2 -optimizer: - max_epochs: 100000 - lr: 0.0005 - lr_scheduler_step_size: 10000 - lr_scheduler_gamma: 0.1 -visualization: - history_size: 20 - visdom: True - visdom_server: 'localhost' - visdom_port: 8097 - visdom_env: 'nerf_pytorch3d' -raysampler: - n_pts_per_ray: 64 - n_pts_per_ray_fine: 64 - n_rays_per_image: 1024 - min_depth: 8.0 - max_depth: 23.0 - stratified: True - stratified_test: False - chunk_size_test: 6000 -implicit_function: - n_harmonic_functions_xyz: 10 - n_harmonic_functions_dir: 4 - n_hidden_neurons_xyz: 256 - n_hidden_neurons_dir: 128 - density_noise_std: 0.0 - n_layers_xyz: 8 diff --git a/pytorch3d/projects/nerf/nerf/__init__.py b/pytorch3d/projects/nerf/nerf/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/nerf/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/projects/nerf/nerf/dataset.py b/pytorch3d/projects/nerf/nerf/dataset.py deleted file mode 100644 index d13a896a23aa8d0672616f8898f071262a69bdf7..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/nerf/dataset.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import os -from typing import List, Optional, Tuple - -import numpy as np -import requests -import torch -from PIL import Image -from pytorch3d.renderer import PerspectiveCameras -from torch.utils.data import Dataset - - -DEFAULT_DATA_ROOT = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "..", "data" -) - -DEFAULT_URL_ROOT = "https://dl.fbaipublicfiles.com/pytorch3d_nerf_data" - -ALL_DATASETS = ("lego", "fern", "pt3logo") - - -def trivial_collate(batch): - """ - A trivial collate function that merely returns the uncollated batch. - """ - return batch - - -class ListDataset(Dataset): - """ - A simple dataset made of a list of entries. - """ - - def __init__(self, entries: List) -> None: - """ - Args: - entries: The list of dataset entries. - """ - self._entries = entries - - def __len__( - self, - ) -> int: - return len(self._entries) - - def __getitem__(self, index): - return self._entries[index] - - -def get_nerf_datasets( - dataset_name: str, # 'lego | fern' - image_size: Tuple[int, int], - data_root: str = DEFAULT_DATA_ROOT, - autodownload: bool = True, -) -> Tuple[Dataset, Dataset, Dataset]: - """ - Obtains the training and validation dataset object for a dataset specified - with the `dataset_name` argument. - - Args: - dataset_name: The name of the dataset to load. - image_size: A tuple (height, width) denoting the sizes of the loaded dataset images. - data_root: The root folder at which the data is stored. - autodownload: Auto-download the dataset files in case they are missing. - - Returns: - train_dataset: The training dataset object. - val_dataset: The validation dataset object. - test_dataset: The testing dataset object. - """ - - if dataset_name not in ALL_DATASETS: - raise ValueError(f"'{dataset_name}'' does not refer to a known dataset.") - - print(f"Loading dataset {dataset_name}, image size={str(image_size)} ...") - - cameras_path = os.path.join(data_root, dataset_name + ".pth") - image_path = cameras_path.replace(".pth", ".png") - - if autodownload and any(not os.path.isfile(p) for p in (cameras_path, image_path)): - # Automatically download the data files if missing. - download_data((dataset_name,), data_root=data_root) - - train_data = torch.load(cameras_path) - n_cameras = train_data["cameras"]["R"].shape[0] - - _image_max_image_pixels = Image.MAX_IMAGE_PIXELS - Image.MAX_IMAGE_PIXELS = None # The dataset image is very large ... - images = torch.FloatTensor(np.array(Image.open(image_path))) / 255.0 - images = torch.stack(torch.chunk(images, n_cameras, dim=0))[..., :3] - Image.MAX_IMAGE_PIXELS = _image_max_image_pixels - - scale_factors = [s_new / s for s, s_new in zip(images.shape[1:3], image_size)] - if abs(scale_factors[0] - scale_factors[1]) > 1e-3: - raise ValueError( - "Non-isotropic scaling is not allowed. Consider changing the 'image_size' argument." - ) - scale_factor = sum(scale_factors) * 0.5 - - if scale_factor != 1.0: - print(f"Rescaling dataset (factor={scale_factor})") - images = torch.nn.functional.interpolate( - images.permute(0, 3, 1, 2), - size=tuple(image_size), - mode="bilinear", - ).permute(0, 2, 3, 1) - - cameras = [ - PerspectiveCameras( - **{k: v[cami][None] for k, v in train_data["cameras"].items()} - ).to("cpu") - for cami in range(n_cameras) - ] - - train_idx, val_idx, test_idx = train_data["split"] - - train_dataset, val_dataset, test_dataset = [ - ListDataset( - [ - {"image": images[i], "camera": cameras[i], "camera_idx": int(i)} - for i in idx - ] - ) - for idx in [train_idx, val_idx, test_idx] - ] - - return train_dataset, val_dataset, test_dataset - - -def download_data( - dataset_names: Optional[List[str]] = None, - data_root: str = DEFAULT_DATA_ROOT, - url_root: str = DEFAULT_URL_ROOT, -) -> None: - """ - Downloads the relevant dataset files. - - Args: - dataset_names: A list of the names of datasets to download. If `None`, - downloads all available datasets. - """ - - if dataset_names is None: - dataset_names = ALL_DATASETS - - os.makedirs(data_root, exist_ok=True) - - for dataset_name in dataset_names: - cameras_file = dataset_name + ".pth" - images_file = cameras_file.replace(".pth", ".png") - license_file = cameras_file.replace(".pth", "_license.txt") - - for fl in (cameras_file, images_file, license_file): - local_fl = os.path.join(data_root, fl) - remote_fl = os.path.join(url_root, fl) - - print(f"Downloading dataset {dataset_name} from {remote_fl} to {local_fl}.") - - r = requests.get(remote_fl) - with open(local_fl, "wb") as f: - f.write(r.content) diff --git a/pytorch3d/projects/nerf/nerf/eval_video_utils.py b/pytorch3d/projects/nerf/nerf/eval_video_utils.py deleted file mode 100644 index 0dca5f848227cf3c6f1499edd5209af513e34c1f..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/nerf/eval_video_utils.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -from typing import Tuple - -import torch -from pytorch3d.renderer import look_at_view_transform, PerspectiveCameras -from torch.utils.data.dataset import Dataset - - -def generate_eval_video_cameras( - train_dataset, - n_eval_cams: int = 100, - trajectory_type: str = "figure_eight", - trajectory_scale: float = 0.2, - scene_center: Tuple[float, float, float] = (0.0, 0.0, 0.0), - up: Tuple[float, float, float] = (0.0, 0.0, 1.0), -) -> Dataset[torch.Tensor]: - """ - Generate a camera trajectory for visualizing a NeRF model. - - Args: - train_dataset: The training dataset object. - n_eval_cams: Number of cameras in the trajectory. - trajectory_type: The type of the camera trajectory. Can be one of: - circular: Rotating around the center of the scene at a fixed radius. - figure_eight: Figure-of-8 trajectory around the center of the - central camera of the training dataset. - trefoil_knot: Same as 'figure_eight', but the trajectory has a shape - of a trefoil knot (https://en.wikipedia.org/wiki/Trefoil_knot). - figure_eight_knot: Same as 'figure_eight', but the trajectory has a shape - of a figure-eight knot - (https://en.wikipedia.org/wiki/Figure-eight_knot_(mathematics)). - trajectory_scale: The extent of the trajectory. - up: The "up" vector of the scene (=the normal of the scene floor). - Active for the `trajectory_type="circular"`. - scene_center: The center of the scene in world coordinates which all - the cameras from the generated trajectory look at. - Returns: - Dictionary of camera instances which can be used as the test dataset - """ - if trajectory_type in ("figure_eight", "trefoil_knot", "figure_eight_knot"): - cam_centers = torch.cat( - [e["camera"].get_camera_center() for e in train_dataset] - ) - # get the nearest camera center to the mean of centers - mean_camera_idx = ( - ((cam_centers - cam_centers.mean(dim=0)[None]) ** 2) - .sum(dim=1) - .min(dim=0) - .indices - ) - # generate the knot trajectory in canonical coords - time = torch.linspace(0, 2 * math.pi, n_eval_cams + 1)[:n_eval_cams] - if trajectory_type == "trefoil_knot": - traj = _trefoil_knot(time) - elif trajectory_type == "figure_eight_knot": - traj = _figure_eight_knot(time) - elif trajectory_type == "figure_eight": - traj = _figure_eight(time) - traj[:, 2] -= traj[:, 2].max() - - # transform the canonical knot to the coord frame of the mean camera - traj_trans = ( - train_dataset[mean_camera_idx]["camera"] - .get_world_to_view_transform() - .inverse() - ) - traj_trans = traj_trans.scale(cam_centers.std(dim=0).mean() * trajectory_scale) - traj = traj_trans.transform_points(traj) - - elif trajectory_type == "circular": - cam_centers = torch.cat( - [e["camera"].get_camera_center() for e in train_dataset] - ) - - # fit plane to the camera centers - plane_mean = cam_centers.mean(dim=0) - cam_centers_c = cam_centers - plane_mean[None] - - if up is not None: - # us the up vector instead of the plane through the camera centers - plane_normal = torch.FloatTensor(up) - else: - cov = (cam_centers_c.t() @ cam_centers_c) / cam_centers_c.shape[0] - _, e_vec = torch.linalg.eigh(cov, UPLO="U") - plane_normal = e_vec[:, 0] - - plane_dist = (plane_normal[None] * cam_centers_c).sum(dim=-1) - cam_centers_on_plane = cam_centers_c - plane_dist[:, None] * plane_normal[None] - - cov = ( - cam_centers_on_plane.t() @ cam_centers_on_plane - ) / cam_centers_on_plane.shape[0] - _, e_vec = torch.linalg.eigh(cov, UPLO="U") - traj_radius = (cam_centers_on_plane**2).sum(dim=1).sqrt().mean() - angle = torch.linspace(0, 2.0 * math.pi, n_eval_cams) - traj = traj_radius * torch.stack( - (torch.zeros_like(angle), angle.cos(), angle.sin()), dim=-1 - ) - traj = traj @ e_vec.t() + plane_mean[None] - - else: - raise ValueError(f"Unknown trajectory_type {trajectory_type}.") - - # point all cameras towards the center of the scene - R, T = look_at_view_transform( - eye=traj, - at=(scene_center,), # (1, 3) - up=(up,), # (1, 3) - device=traj.device, - ) - - # get the average focal length and principal point - focal = torch.cat([e["camera"].focal_length for e in train_dataset]).mean(dim=0) - p0 = torch.cat([e["camera"].principal_point for e in train_dataset]).mean(dim=0) - - # assemble the dataset - test_dataset = [ - { - "image": None, - "camera": PerspectiveCameras( - focal_length=focal[None], - principal_point=p0[None], - R=R_[None], - T=T_[None], - ), - "camera_idx": i, - } - for i, (R_, T_) in enumerate(zip(R, T)) - ] - - return test_dataset - - -def _figure_eight_knot(t: torch.Tensor, z_scale: float = 0.5): - x = (2 + (2 * t).cos()) * (3 * t).cos() - y = (2 + (2 * t).cos()) * (3 * t).sin() - z = (4 * t).sin() * z_scale - return torch.stack((x, y, z), dim=-1) - - -def _trefoil_knot(t: torch.Tensor, z_scale: float = 0.5): - x = t.sin() + 2 * (2 * t).sin() - y = t.cos() - 2 * (2 * t).cos() - z = -(3 * t).sin() * z_scale - return torch.stack((x, y, z), dim=-1) - - -def _figure_eight(t: torch.Tensor, z_scale: float = 0.5): - x = t.cos() - y = (2 * t).sin() / 2 - z = t.sin() * z_scale - return torch.stack((x, y, z), dim=-1) diff --git a/pytorch3d/projects/nerf/nerf/implicit_function.py b/pytorch3d/projects/nerf/nerf/implicit_function.py deleted file mode 100644 index 4209e53c91ed5a5a97f90bd7b2fa9b950f3ae502..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/nerf/implicit_function.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Tuple - -import torch -from pytorch3d.common.linear_with_repeat import LinearWithRepeat -from pytorch3d.renderer import HarmonicEmbedding, ray_bundle_to_ray_points, RayBundle - - -def _xavier_init(linear): - """ - Performs the Xavier weight initialization of the linear layer `linear`. - """ - torch.nn.init.xavier_uniform_(linear.weight.data) - - -class NeuralRadianceField(torch.nn.Module): - def __init__( - self, - n_harmonic_functions_xyz: int = 6, - n_harmonic_functions_dir: int = 4, - n_hidden_neurons_xyz: int = 256, - n_hidden_neurons_dir: int = 128, - n_layers_xyz: int = 8, - append_xyz: Tuple[int, ...] = (5,), - use_multiple_streams: bool = True, - **kwargs, - ): - """ - Args: - n_harmonic_functions_xyz: The number of harmonic functions - used to form the harmonic embedding of 3D point locations. - n_harmonic_functions_dir: The number of harmonic functions - used to form the harmonic embedding of the ray directions. - n_hidden_neurons_xyz: The number of hidden units in the - fully connected layers of the MLP that accepts the 3D point - locations and outputs the occupancy field with the intermediate - features. - n_hidden_neurons_dir: The number of hidden units in the - fully connected layers of the MLP that accepts the intermediate - features and ray directions and outputs the radiance field - (per-point colors). - n_layers_xyz: The number of layers of the MLP that outputs the - occupancy field. - append_xyz: The list of indices of the skip layers of the occupancy MLP. - use_multiple_streams: Whether density and color should be calculated on - separate CUDA streams. - """ - super().__init__() - - # The harmonic embedding layer converts input 3D coordinates - # to a representation that is more suitable for - # processing with a deep neural network. - self.harmonic_embedding_xyz = HarmonicEmbedding(n_harmonic_functions_xyz) - self.harmonic_embedding_dir = HarmonicEmbedding(n_harmonic_functions_dir) - embedding_dim_xyz = n_harmonic_functions_xyz * 2 * 3 + 3 - embedding_dim_dir = n_harmonic_functions_dir * 2 * 3 + 3 - - self.mlp_xyz = MLPWithInputSkips( - n_layers_xyz, - embedding_dim_xyz, - n_hidden_neurons_xyz, - embedding_dim_xyz, - n_hidden_neurons_xyz, - input_skips=append_xyz, - ) - - self.intermediate_linear = torch.nn.Linear( - n_hidden_neurons_xyz, n_hidden_neurons_xyz - ) - _xavier_init(self.intermediate_linear) - - self.density_layer = torch.nn.Linear(n_hidden_neurons_xyz, 1) - _xavier_init(self.density_layer) - - # Zero the bias of the density layer to avoid - # a completely transparent initialization. - self.density_layer.bias.data[:] = 0.0 # fixme: Sometimes this is not enough - - self.color_layer = torch.nn.Sequential( - LinearWithRepeat( - n_hidden_neurons_xyz + embedding_dim_dir, n_hidden_neurons_dir - ), - torch.nn.ReLU(True), - torch.nn.Linear(n_hidden_neurons_dir, 3), - torch.nn.Sigmoid(), - ) - self.use_multiple_streams = use_multiple_streams - - def _get_densities( - self, - features: torch.Tensor, - depth_values: torch.Tensor, - density_noise_std: float, - ) -> torch.Tensor: - """ - This function takes `features` predicted by `self.mlp_xyz` - and converts them to `raw_densities` with `self.density_layer`. - `raw_densities` are later re-weighted using the depth step sizes - and mapped to [0-1] range with 1 - inverse exponential of `raw_densities`. - """ - raw_densities = self.density_layer(features) - deltas = torch.cat( - ( - depth_values[..., 1:] - depth_values[..., :-1], - 1e10 * torch.ones_like(depth_values[..., :1]), - ), - dim=-1, - )[..., None] - if density_noise_std > 0.0: - raw_densities = ( - raw_densities + torch.randn_like(raw_densities) * density_noise_std - ) - densities = 1 - (-deltas * torch.relu(raw_densities)).exp() - return densities - - def _get_colors( - self, features: torch.Tensor, rays_directions: torch.Tensor - ) -> torch.Tensor: - """ - This function takes per-point `features` predicted by `self.mlp_xyz` - and evaluates the color model in order to attach to each - point a 3D vector of its RGB color. - """ - # Normalize the ray_directions to unit l2 norm. - rays_directions_normed = torch.nn.functional.normalize(rays_directions, dim=-1) - - # Obtain the harmonic embedding of the normalized ray directions. - rays_embedding = self.harmonic_embedding_dir(rays_directions_normed) - - return self.color_layer((self.intermediate_linear(features), rays_embedding)) - - def _get_densities_and_colors( - self, features: torch.Tensor, ray_bundle: RayBundle, density_noise_std: float - ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - The second part of the forward calculation. - - Args: - features: the output of the common mlp (the prior part of the - calculation), shape - (minibatch x ... x self.n_hidden_neurons_xyz). - ray_bundle: As for forward(). - density_noise_std: As for forward(). - - Returns: - rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)` - denoting the opacity of each ray point. - rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)` - denoting the color of each ray point. - """ - if self.use_multiple_streams and features.is_cuda: - current_stream = torch.cuda.current_stream(features.device) - other_stream = torch.cuda.Stream(features.device) - other_stream.wait_stream(current_stream) - - with torch.cuda.stream(other_stream): - rays_densities = self._get_densities( - features, ray_bundle.lengths, density_noise_std - ) - # rays_densities.shape = [minibatch x ... x 1] in [0-1] - - rays_colors = self._get_colors(features, ray_bundle.directions) - # rays_colors.shape = [minibatch x ... x 3] in [0-1] - - current_stream.wait_stream(other_stream) - else: - # Same calculation as above, just serial. - rays_densities = self._get_densities( - features, ray_bundle.lengths, density_noise_std - ) - rays_colors = self._get_colors(features, ray_bundle.directions) - return rays_densities, rays_colors - - def forward( - self, - ray_bundle: RayBundle, - density_noise_std: float = 0.0, - **kwargs, - ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - The forward function accepts the parametrizations of - 3D points sampled along projection rays. The forward - pass is responsible for attaching a 3D vector - and a 1D scalar representing the point's - RGB color and opacity respectively. - - Args: - ray_bundle: A RayBundle object containing the following variables: - origins: A tensor of shape `(minibatch, ..., 3)` denoting the - origins of the sampling rays in world coords. - directions: A tensor of shape `(minibatch, ..., 3)` - containing the direction vectors of sampling rays in world coords. - lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)` - containing the lengths at which the rays are sampled. - density_noise_std: A floating point value representing the - variance of the random normal noise added to the output of - the opacity function. This can prevent floating artifacts. - - Returns: - rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)` - denoting the opacity of each ray point. - rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)` - denoting the color of each ray point. - """ - # We first convert the ray parametrizations to world - # coordinates with `ray_bundle_to_ray_points`. - rays_points_world = ray_bundle_to_ray_points(ray_bundle) - # rays_points_world.shape = [minibatch x ... x 3] - - # For each 3D world coordinate, we obtain its harmonic embedding. - embeds_xyz = self.harmonic_embedding_xyz(rays_points_world) - # embeds_xyz.shape = [minibatch x ... x self.n_harmonic_functions*6 + 3] - - # self.mlp maps each harmonic embedding to a latent feature space. - features = self.mlp_xyz(embeds_xyz, embeds_xyz) - # features.shape = [minibatch x ... x self.n_hidden_neurons_xyz] - - rays_densities, rays_colors = self._get_densities_and_colors( - features, ray_bundle, density_noise_std - ) - return rays_densities, rays_colors - - -class MLPWithInputSkips(torch.nn.Module): - """ - Implements the multi-layer perceptron architecture of the Neural Radiance Field. - - As such, `MLPWithInputSkips` is a multi layer perceptron consisting - of a sequence of linear layers with ReLU activations. - - Additionally, for a set of predefined layers `input_skips`, the forward pass - appends a skip tensor `z` to the output of the preceding layer. - - Note that this follows the architecture described in the Supplementary - Material (Fig. 7) of [1]. - - References: - [1] Ben Mildenhall and Pratul P. Srinivasan and Matthew Tancik - and Jonathan T. Barron and Ravi Ramamoorthi and Ren Ng: - NeRF: Representing Scenes as Neural Radiance Fields for View - Synthesis, ECCV2020 - """ - - def __init__( - self, - n_layers: int, - input_dim: int, - output_dim: int, - skip_dim: int, - hidden_dim: int, - input_skips: Tuple[int, ...] = (), - ): - """ - Args: - n_layers: The number of linear layers of the MLP. - input_dim: The number of channels of the input tensor. - output_dim: The number of channels of the output. - skip_dim: The number of channels of the tensor `z` appended when - evaluating the skip layers. - hidden_dim: The number of hidden units of the MLP. - input_skips: The list of layer indices at which we append the skip - tensor `z`. - """ - super().__init__() - layers = [] - for layeri in range(n_layers): - if layeri == 0: - dimin = input_dim - dimout = hidden_dim - elif layeri in input_skips: - dimin = hidden_dim + skip_dim - dimout = hidden_dim - else: - dimin = hidden_dim - dimout = hidden_dim - linear = torch.nn.Linear(dimin, dimout) - _xavier_init(linear) - layers.append(torch.nn.Sequential(linear, torch.nn.ReLU(True))) - self.mlp = torch.nn.ModuleList(layers) - self._input_skips = set(input_skips) - - def forward(self, x: torch.Tensor, z: torch.Tensor) -> torch.Tensor: - """ - Args: - x: The input tensor of shape `(..., input_dim)`. - z: The input skip tensor of shape `(..., skip_dim)` which is appended - to layers whose indices are specified by `input_skips`. - Returns: - y: The output tensor of shape `(..., output_dim)`. - """ - y = x - for li, layer in enumerate(self.mlp): - if li in self._input_skips: - y = torch.cat((y, z), dim=-1) - y = layer(y) - return y diff --git a/pytorch3d/projects/nerf/nerf/nerf_renderer.py b/pytorch3d/projects/nerf/nerf/nerf_renderer.py deleted file mode 100644 index d72089734d580ebf7fa00dd1c7cbd9faba85bf35..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/nerf/nerf_renderer.py +++ /dev/null @@ -1,434 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, Optional, Tuple - -import torch -from pytorch3d.renderer import ImplicitRenderer, ray_bundle_to_ray_points -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.structures import Pointclouds -from pytorch3d.vis.plotly_vis import plot_scene -from visdom import Visdom - -from .implicit_function import NeuralRadianceField -from .raymarcher import EmissionAbsorptionNeRFRaymarcher -from .raysampler import NeRFRaysampler, ProbabilisticRaysampler -from .utils import calc_mse, calc_psnr, sample_images_at_mc_locs - - -class RadianceFieldRenderer(torch.nn.Module): - """ - Implements a renderer of a Neural Radiance Field. - - This class holds pointers to the fine and coarse renderer objects, which are - instances of `pytorch3d.renderer.ImplicitRenderer`, and pointers to the - neural networks representing the fine and coarse Neural Radiance Fields, - which are instances of `NeuralRadianceField`. - - The rendering forward pass proceeds as follows: - 1) For a given input camera, rendering rays are generated with the - `NeRFRaysampler` object of `self._renderer['coarse']`. - In the training mode (`self.training==True`), the rays are a set - of `n_rays_per_image` random 2D locations of the image grid. - In the evaluation mode (`self.training==False`), the rays correspond - to the full image grid. The rays are further split to - `chunk_size_test`-sized chunks to prevent out-of-memory errors. - 2) For each ray point, the coarse `NeuralRadianceField` MLP is evaluated. - The pointer to this MLP is stored in `self._implicit_function['coarse']` - 3) The coarse radiance field is rendered with the - `EmissionAbsorptionNeRFRaymarcher` object of `self._renderer['coarse']`. - 4) The coarse raymarcher outputs a probability distribution that guides - the importance raysampling of the fine rendering pass. The - `ProbabilisticRaysampler` stored in `self._renderer['fine'].raysampler` - implements the importance ray-sampling. - 5) Similar to 2) the fine MLP in `self._implicit_function['fine']` - labels the ray points with occupancies and colors. - 6) self._renderer['fine'].raymarcher` generates the final fine render. - 7) The fine and coarse renders are compared to the ground truth input image - with PSNR and MSE metrics. - """ - - def __init__( - self, - image_size: Tuple[int, int], - n_pts_per_ray: int, - n_pts_per_ray_fine: int, - n_rays_per_image: int, - min_depth: float, - max_depth: float, - stratified: bool, - stratified_test: bool, - chunk_size_test: int, - n_harmonic_functions_xyz: int = 6, - n_harmonic_functions_dir: int = 4, - n_hidden_neurons_xyz: int = 256, - n_hidden_neurons_dir: int = 128, - n_layers_xyz: int = 8, - append_xyz: Tuple[int, ...] = (5,), - density_noise_std: float = 0.0, - visualization: bool = False, - ): - """ - Args: - image_size: The size of the rendered image (`[height, width]`). - n_pts_per_ray: The number of points sampled along each ray for the - coarse rendering pass. - n_pts_per_ray_fine: The number of points sampled along each ray for the - fine rendering pass. - n_rays_per_image: Number of Monte Carlo ray samples when training - (`self.training==True`). - min_depth: The minimum depth of a sampled ray-point for the coarse rendering. - max_depth: The maximum depth of a sampled ray-point for the coarse rendering. - stratified: If `True`, stratifies (=randomly offsets) the depths - of each ray point during training (`self.training==True`). - stratified_test: If `True`, stratifies (=randomly offsets) the depths - of each ray point during evaluation (`self.training==False`). - chunk_size_test: The number of rays in each chunk of image rays. - Active only when `self.training==True`. - n_harmonic_functions_xyz: The number of harmonic functions - used to form the harmonic embedding of 3D point locations. - n_harmonic_functions_dir: The number of harmonic functions - used to form the harmonic embedding of the ray directions. - n_hidden_neurons_xyz: The number of hidden units in the - fully connected layers of the MLP that accepts the 3D point - locations and outputs the occupancy field with the intermediate - features. - n_hidden_neurons_dir: The number of hidden units in the - fully connected layers of the MLP that accepts the intermediate - features and ray directions and outputs the radiance field - (per-point colors). - n_layers_xyz: The number of layers of the MLP that outputs the - occupancy field. - append_xyz: The list of indices of the skip layers of the occupancy MLP. - Prior to evaluating the skip layers, the tensor which was input to MLP - is appended to the skip layer input. - density_noise_std: The standard deviation of the random normal noise - added to the output of the occupancy MLP. - Active only when `self.training==True`. - visualization: whether to store extra output for visualization. - """ - - super().__init__() - - # The renderers and implicit functions are stored under the fine/coarse - # keys in ModuleDict PyTorch modules. - self._renderer = torch.nn.ModuleDict() - self._implicit_function = torch.nn.ModuleDict() - - # Init the EA raymarcher used by both passes. - raymarcher = EmissionAbsorptionNeRFRaymarcher() - - # Parse out image dimensions. - image_height, image_width = image_size - - for render_pass in ("coarse", "fine"): - if render_pass == "coarse": - # Initialize the coarse raysampler. - raysampler = NeRFRaysampler( - n_pts_per_ray=n_pts_per_ray, - min_depth=min_depth, - max_depth=max_depth, - stratified=stratified, - stratified_test=stratified_test, - n_rays_per_image=n_rays_per_image, - image_height=image_height, - image_width=image_width, - ) - elif render_pass == "fine": - # Initialize the fine raysampler. - raysampler = ProbabilisticRaysampler( - n_pts_per_ray=n_pts_per_ray_fine, - stratified=stratified, - stratified_test=stratified_test, - ) - else: - raise ValueError(f"No such rendering pass {render_pass}") - - # Initialize the fine/coarse renderer. - self._renderer[render_pass] = ImplicitRenderer( - raysampler=raysampler, - raymarcher=raymarcher, - ) - - # Instantiate the fine/coarse NeuralRadianceField module. - self._implicit_function[render_pass] = NeuralRadianceField( - n_harmonic_functions_xyz=n_harmonic_functions_xyz, - n_harmonic_functions_dir=n_harmonic_functions_dir, - n_hidden_neurons_xyz=n_hidden_neurons_xyz, - n_hidden_neurons_dir=n_hidden_neurons_dir, - n_layers_xyz=n_layers_xyz, - append_xyz=append_xyz, - ) - - self._density_noise_std = density_noise_std - self._chunk_size_test = chunk_size_test - self._image_size = image_size - self.visualization = visualization - - def precache_rays( - self, - cache_cameras: List[CamerasBase], - cache_camera_hashes: List[str], - ): - """ - Precaches the rays emitted from the list of cameras `cache_cameras`, - where each camera is uniquely identified with the corresponding hash - from `cache_camera_hashes`. - - The cached rays are moved to cpu and stored in - `self._renderer['coarse']._ray_cache`. - - Raises `ValueError` when caching two cameras with the same hash. - - Args: - cache_cameras: A list of `N` cameras for which the rays are pre-cached. - cache_camera_hashes: A list of `N` unique identifiers for each - camera from `cameras`. - """ - self._renderer["coarse"].raysampler.precache_rays( - cache_cameras, - cache_camera_hashes, - ) - - def _process_ray_chunk( - self, - camera_hash: Optional[str], - camera: CamerasBase, - image: torch.Tensor, - chunk_idx: int, - ) -> dict: - """ - Samples and renders a chunk of rays. - - Args: - camera_hash: A unique identifier of a pre-cached camera. - If `None`, the cache is not searched and the sampled rays are - calculated from scratch. - camera: A batch of cameras from which the scene is rendered. - image: A batch of corresponding ground truth images of shape - ('batch_size', Β·, Β·, 3). - chunk_idx: The index of the currently rendered ray chunk. - Returns: - out: `dict` containing the outputs of the rendering: - `rgb_coarse`: The result of the coarse rendering pass. - `rgb_fine`: The result of the fine rendering pass. - `rgb_gt`: The corresponding ground-truth RGB values. - """ - # Initialize the outputs of the coarse rendering to None. - coarse_ray_bundle = None - coarse_weights = None - - # First evaluate the coarse rendering pass, then the fine one. - for renderer_pass in ("coarse", "fine"): - (rgb, weights), ray_bundle_out = self._renderer[renderer_pass]( - cameras=camera, - volumetric_function=self._implicit_function[renderer_pass], - chunksize=self._chunk_size_test, - chunk_idx=chunk_idx, - density_noise_std=(self._density_noise_std if self.training else 0.0), - input_ray_bundle=coarse_ray_bundle, - ray_weights=coarse_weights, - camera_hash=camera_hash, - ) - - if renderer_pass == "coarse": - rgb_coarse = rgb - # Store the weights and the rays of the first rendering pass - # for the ensuing importance ray-sampling of the fine render. - coarse_ray_bundle = ray_bundle_out - coarse_weights = weights - if image is not None: - # Sample the ground truth images at the xy locations of the - # rendering ray pixels. - rgb_gt = sample_images_at_mc_locs( - image[..., :3][None], - ray_bundle_out.xys, - ) - else: - rgb_gt = None - - elif renderer_pass == "fine": - rgb_fine = rgb - - else: - raise ValueError(f"No such rendering pass {renderer_pass}") - - out = {"rgb_fine": rgb_fine, "rgb_coarse": rgb_coarse, "rgb_gt": rgb_gt} - if self.visualization: - # Store the coarse rays/weights only for visualization purposes. - out["coarse_ray_bundle"] = type(coarse_ray_bundle)( - *[v.detach().cpu() for k, v in coarse_ray_bundle._asdict().items()] - ) - out["coarse_weights"] = coarse_weights.detach().cpu() - - return out - - def forward( - self, - camera_hash: Optional[str], - camera: CamerasBase, - image: torch.Tensor, - ) -> Tuple[dict, dict]: - """ - Performs the coarse and fine rendering passes of the radiance field - from the viewpoint of the input `camera`. - Afterwards, both renders are compared to the input ground truth `image` - by evaluating the peak signal-to-noise ratio and the mean-squared error. - - The rendering result depends on the `self.training` flag: - - In the training mode (`self.training==True`), the function renders - a random subset of image rays (Monte Carlo rendering). - - In evaluation mode (`self.training==False`), the function renders - the full image. In order to prevent out-of-memory errors, - when `self.training==False`, the rays are sampled and rendered - in batches of size `chunksize`. - - Args: - camera_hash: A unique identifier of a pre-cached camera. - If `None`, the cache is not searched and the sampled rays are - calculated from scratch. - camera: A batch of cameras from which the scene is rendered. - image: A batch of corresponding ground truth images of shape - ('batch_size', Β·, Β·, 3). - Returns: - out: `dict` containing the outputs of the rendering: - `rgb_coarse`: The result of the coarse rendering pass. - `rgb_fine`: The result of the fine rendering pass. - `rgb_gt`: The corresponding ground-truth RGB values. - - The shape of `rgb_coarse`, `rgb_fine`, `rgb_gt` depends on the - `self.training` flag: - If `==True`, all 3 tensors are of shape - `(batch_size, n_rays_per_image, 3)` and contain the result - of the Monte Carlo training rendering pass. - If `==False`, all 3 tensors are of shape - `(batch_size, image_size[0], image_size[1], 3)` and contain - the result of the full image rendering pass. - metrics: `dict` containing the error metrics comparing the fine and - coarse renders to the ground truth: - `mse_coarse`: Mean-squared error between the coarse render and - the input `image` - `mse_fine`: Mean-squared error between the fine render and - the input `image` - `psnr_coarse`: Peak signal-to-noise ratio between the coarse render and - the input `image` - `psnr_fine`: Peak signal-to-noise ratio between the fine render and - the input `image` - """ - if not self.training: - # Full evaluation pass. - n_chunks = self._renderer["coarse"].raysampler.get_n_chunks( - self._chunk_size_test, - camera.R.shape[0], - ) - else: - # MonteCarlo ray sampling. - n_chunks = 1 - - # Process the chunks of rays. - chunk_outputs = [ - self._process_ray_chunk( - camera_hash, - camera, - image, - chunk_idx, - ) - for chunk_idx in range(n_chunks) - ] - - if not self.training: - # For a full render pass concatenate the output chunks, - # and reshape to image size. - out = { - k: torch.cat( - [ch_o[k] for ch_o in chunk_outputs], - dim=1, - ).view(-1, *self._image_size, 3) - if chunk_outputs[0][k] is not None - else None - for k in ("rgb_fine", "rgb_coarse", "rgb_gt") - } - else: - out = chunk_outputs[0] - - # Calc the error metrics. - metrics = {} - if image is not None: - for render_pass in ("coarse", "fine"): - for metric_name, metric_fun in zip( - ("mse", "psnr"), (calc_mse, calc_psnr) - ): - metrics[f"{metric_name}_{render_pass}"] = metric_fun( - out["rgb_" + render_pass][..., :3], - out["rgb_gt"][..., :3], - ) - - return out, metrics - - -def visualize_nerf_outputs( - nerf_out: dict, output_cache: List, viz: Visdom, visdom_env: str -): - """ - Visualizes the outputs of the `RadianceFieldRenderer`. - - Args: - nerf_out: An output of the validation rendering pass. - output_cache: A list with outputs of several training render passes. - viz: A visdom connection object. - visdom_env: The name of visdom environment for visualization. - """ - - # Show the training images. - ims = torch.stack([o["image"] for o in output_cache]) - ims = torch.cat(list(ims), dim=1) - viz.image( - ims.permute(2, 0, 1), - env=visdom_env, - win="images", - opts={"title": "train_images"}, - ) - - # Show the coarse and fine renders together with the ground truth images. - ims_full = torch.cat( - [ - nerf_out[imvar][0].permute(2, 0, 1).detach().cpu().clamp(0.0, 1.0) - for imvar in ("rgb_coarse", "rgb_fine", "rgb_gt") - ], - dim=2, - ) - viz.image( - ims_full, - env=visdom_env, - win="images_full", - opts={"title": "coarse | fine | target"}, - ) - - # Make a 3D plot of training cameras and their emitted rays. - camera_trace = { - f"camera_{ci:03d}": o["camera"].cpu() for ci, o in enumerate(output_cache) - } - ray_pts_trace = { - f"ray_pts_{ci:03d}": Pointclouds( - ray_bundle_to_ray_points(o["coarse_ray_bundle"]) - .detach() - .cpu() - .view(1, -1, 3) - ) - for ci, o in enumerate(output_cache) - } - plotly_plot = plot_scene( - { - "training_scene": { - **camera_trace, - **ray_pts_trace, - }, - }, - pointcloud_max_points=5000, - pointcloud_marker_size=1, - camera_scale=0.3, - ) - viz.plotlyplot(plotly_plot, env=visdom_env, win="scenes") diff --git a/pytorch3d/projects/nerf/nerf/raymarcher.py b/pytorch3d/projects/nerf/nerf/raymarcher.py deleted file mode 100644 index d5a80c2a40f250e0874b29c2a288012870fcac5e..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/nerf/raymarcher.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch -from pytorch3d.renderer import EmissionAbsorptionRaymarcher -from pytorch3d.renderer.implicit.raymarching import ( - _check_density_bounds, - _check_raymarcher_inputs, - _shifted_cumprod, -) - - -class EmissionAbsorptionNeRFRaymarcher(EmissionAbsorptionRaymarcher): - """ - This is essentially the `pytorch3d.renderer.EmissionAbsorptionRaymarcher` - which additionally returns the rendering weights. It also skips returning - the computation of the alpha-mask which is, in case of NeRF, equal to 1 - everywhere. - - The weights are later used in the NeRF pipeline to carry out the importance - ray-sampling for the fine rendering pass. - - For more details about the EmissionAbsorptionRaymarcher please refer to - the documentation of `pytorch3d.renderer.EmissionAbsorptionRaymarcher`. - """ - - def forward( - self, - rays_densities: torch.Tensor, - rays_features: torch.Tensor, - eps: float = 1e-10, - **kwargs, - ) -> torch.Tensor: - """ - Args: - rays_densities: Per-ray density values represented with a tensor - of shape `(..., n_points_per_ray, 1)` whose values range in [0, 1]. - rays_features: Per-ray feature values represented with a tensor - of shape `(..., n_points_per_ray, feature_dim)`. - eps: A lower bound added to `rays_densities` before computing - the absorption function (cumprod of `1-rays_densities` along - each ray). This prevents the cumprod to yield exact 0 - which would inhibit any gradient-based learning. - - Returns: - features: A tensor of shape `(..., feature_dim)` containing - the rendered features for each ray. - weights: A tensor of shape `(..., n_points_per_ray)` containing - the ray-specific emission-absorption distribution. - Each ray distribution `(..., :)` is a valid probability - distribution, i.e. it contains non-negative values that integrate - to 1, such that `weights.sum(dim=-1)==1).all()` yields `True`. - """ - _check_raymarcher_inputs( - rays_densities, - rays_features, - None, - z_can_be_none=True, - features_can_be_none=False, - density_1d=True, - ) - _check_density_bounds(rays_densities) - rays_densities = rays_densities[..., 0] - absorption = _shifted_cumprod( - (1.0 + eps) - rays_densities, shift=self.surface_thickness - ) - weights = rays_densities * absorption - features = (weights[..., None] * rays_features).sum(dim=-2) - - return features, weights diff --git a/pytorch3d/projects/nerf/nerf/raysampler.py b/pytorch3d/projects/nerf/nerf/raysampler.py deleted file mode 100644 index 69e99b9ad9340207bc9bb72e6cc7bbb36b1c2d67..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/nerf/raysampler.py +++ /dev/null @@ -1,365 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -from typing import List - -import torch -from pytorch3d.renderer import MonteCarloRaysampler, NDCMultinomialRaysampler, RayBundle -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.renderer.implicit.sample_pdf import sample_pdf - - -class ProbabilisticRaysampler(torch.nn.Module): - """ - Implements the importance sampling of points along rays. - The input is a `RayBundle` object with a `ray_weights` tensor - which specifies the probabilities of sampling a point along each ray. - - This raysampler is used for the fine rendering pass of NeRF. - As such, the forward pass accepts the RayBundle output by the - raysampling of the coarse rendering pass. Hence, it does not - take cameras as input. - """ - - def __init__( - self, - n_pts_per_ray: int, - stratified: bool, - stratified_test: bool, - add_input_samples: bool = True, - ): - """ - Args: - n_pts_per_ray: The number of points to sample along each ray. - stratified: If `True`, the input `ray_weights` are assumed to be - sampled at equidistant intervals. - stratified_test: Same as `stratified` with the difference that this - setting is applied when the module is in the `eval` mode - (`self.training==False`). - add_input_samples: Concatenates and returns the sampled values - together with the input samples. - """ - super().__init__() - self._n_pts_per_ray = n_pts_per_ray - self._stratified = stratified - self._stratified_test = stratified_test - self._add_input_samples = add_input_samples - - def forward( - self, - input_ray_bundle: RayBundle, - ray_weights: torch.Tensor, - **kwargs, - ) -> RayBundle: - """ - Args: - input_ray_bundle: An instance of `RayBundle` specifying the - source rays for sampling of the probability distribution. - ray_weights: A tensor of shape - `(..., input_ray_bundle.legths.shape[-1])` with non-negative - elements defining the probability distribution to sample - ray points from. - - Returns: - ray_bundle: A new `RayBundle` instance containing the input ray - points together with `n_pts_per_ray` additional sampled - points per ray. - """ - - # Calculate the mid-points between the ray depths. - z_vals = input_ray_bundle.lengths - batch_size = z_vals.shape[0] - - # Carry out the importance sampling. - with torch.no_grad(): - z_vals_mid = 0.5 * (z_vals[..., 1:] + z_vals[..., :-1]) - z_samples = sample_pdf( - z_vals_mid.view(-1, z_vals_mid.shape[-1]), - ray_weights.view(-1, ray_weights.shape[-1])[..., 1:-1], - self._n_pts_per_ray, - det=not ( - (self._stratified and self.training) - or (self._stratified_test and not self.training) - ), - ).view(batch_size, z_vals.shape[1], self._n_pts_per_ray) - - if self._add_input_samples: - # Add the new samples to the input ones. - z_vals = torch.cat((z_vals, z_samples), dim=-1) - else: - z_vals = z_samples - # Resort by depth. - z_vals, _ = torch.sort(z_vals, dim=-1) - - return RayBundle( - origins=input_ray_bundle.origins, - directions=input_ray_bundle.directions, - lengths=z_vals, - xys=input_ray_bundle.xys, - ) - - -class NeRFRaysampler(torch.nn.Module): - """ - Implements the raysampler of NeRF. - - Depending on the `self.training` flag, the raysampler either samples - a chunk of random rays (`self.training==True`), or returns a subset of rays - of the full image grid (`self.training==False`). - The chunking of rays allows for efficient evaluation of the NeRF implicit - surface function without encountering out-of-GPU-memory errors. - - Additionally, this raysampler supports pre-caching of the ray bundles - for a set of input cameras (`self.precache_rays`). - Pre-caching the rays before training greatly speeds-up the ensuing - raysampling step of the training NeRF iterations. - """ - - def __init__( - self, - n_pts_per_ray: int, - min_depth: float, - max_depth: float, - n_rays_per_image: int, - image_width: int, - image_height: int, - stratified: bool = False, - stratified_test: bool = False, - ): - """ - Args: - n_pts_per_ray: The number of points sampled along each ray. - min_depth: The minimum depth of a ray-point. - max_depth: The maximum depth of a ray-point. - n_rays_per_image: Number of Monte Carlo ray samples when training - (`self.training==True`). - image_width: The horizontal size of the image grid. - image_height: The vertical size of the image grid. - stratified: If `True`, stratifies (=randomly offsets) the depths - of each ray point during training (`self.training==True`). - stratified_test: If `True`, stratifies (=randomly offsets) the depths - of each ray point during evaluation (`self.training==False`). - """ - - super().__init__() - self._stratified = stratified - self._stratified_test = stratified_test - - # Initialize the grid ray sampler. - self._grid_raysampler = NDCMultinomialRaysampler( - image_width=image_width, - image_height=image_height, - n_pts_per_ray=n_pts_per_ray, - min_depth=min_depth, - max_depth=max_depth, - ) - - # Initialize the Monte Carlo ray sampler. - self._mc_raysampler = MonteCarloRaysampler( - min_x=-1.0, - max_x=1.0, - min_y=-1.0, - max_y=1.0, - n_rays_per_image=n_rays_per_image, - n_pts_per_ray=n_pts_per_ray, - min_depth=min_depth, - max_depth=max_depth, - ) - - # create empty ray cache - self._ray_cache = {} - - def get_n_chunks(self, chunksize: int, batch_size: int): - """ - Returns the total number of `chunksize`-sized chunks - of the raysampler's rays. - - Args: - chunksize: The number of rays per chunk. - batch_size: The size of the batch of the raysampler. - - Returns: - n_chunks: The total number of chunks. - """ - return int( - math.ceil( - (self._grid_raysampler._xy_grid.numel() * 0.5 * batch_size) / chunksize - ) - ) - - def _print_precaching_progress(self, i, total, bar_len=30): - """ - Print a progress bar for ray precaching. - """ - position = round((i + 1) / total * bar_len) - pbar = "[" + "β–ˆ" * position + " " * (bar_len - position) + "]" - print(pbar, end="\r") - - def precache_rays(self, cameras: List[CamerasBase], camera_hashes: List): - """ - Precaches the rays emitted from the list of cameras `cameras`, - where each camera is uniquely identified with the corresponding hash - from `camera_hashes`. - - The cached rays are moved to cpu and stored in `self._ray_cache`. - Raises `ValueError` when caching two cameras with the same hash. - - Args: - cameras: A list of `N` cameras for which the rays are pre-cached. - camera_hashes: A list of `N` unique identifiers of each - camera from `cameras`. - """ - print(f"Precaching {len(cameras)} ray bundles ...") - full_chunksize = ( - self._grid_raysampler._xy_grid.numel() - // 2 - * self._grid_raysampler._n_pts_per_ray - ) - if self.get_n_chunks(full_chunksize, 1) != 1: - raise ValueError("There has to be one chunk for precaching rays!") - for camera_i, (camera, camera_hash) in enumerate(zip(cameras, camera_hashes)): - ray_bundle = self.forward( - camera, - caching=True, - chunksize=full_chunksize, - ) - if camera_hash in self._ray_cache: - raise ValueError("There are redundant cameras!") - self._ray_cache[camera_hash] = RayBundle( - *[v.to("cpu").detach() for v in ray_bundle] - ) - self._print_precaching_progress(camera_i, len(cameras)) - print("") - - def _stratify_ray_bundle(self, ray_bundle: RayBundle): - """ - Stratifies the lengths of the input `ray_bundle`. - - More specifically, the stratification replaces each ray points' depth `z` - with a sample from a uniform random distribution on - `[z - delta_depth, z+delta_depth]`, where `delta_depth` is the difference - of depths of the consecutive ray depth values. - - Args: - `ray_bundle`: The input `RayBundle`. - - Returns: - `stratified_ray_bundle`: `ray_bundle` whose `lengths` field is replaced - with the stratified samples. - """ - z_vals = ray_bundle.lengths - # Get intervals between samples. - mids = 0.5 * (z_vals[..., 1:] + z_vals[..., :-1]) - upper = torch.cat((mids, z_vals[..., -1:]), dim=-1) - lower = torch.cat((z_vals[..., :1], mids), dim=-1) - # Stratified samples in those intervals. - z_vals = lower + (upper - lower) * torch.rand_like(lower) - return ray_bundle._replace(lengths=z_vals) - - def _normalize_raybundle(self, ray_bundle: RayBundle): - """ - Normalizes the ray directions of the input `RayBundle` to unit norm. - """ - ray_bundle = ray_bundle._replace( - directions=torch.nn.functional.normalize(ray_bundle.directions, dim=-1) - ) - return ray_bundle - - def forward( - self, - cameras: CamerasBase, - chunksize: int = None, - chunk_idx: int = 0, - camera_hash: str = None, - caching: bool = False, - **kwargs, - ) -> RayBundle: - """ - Args: - cameras: A batch of `batch_size` cameras from which the rays are emitted. - chunksize: The number of rays per chunk. - Active only when `self.training==False`. - chunk_idx: The index of the ray chunk. The number has to be in - `[0, self.get_n_chunks(chunksize, batch_size)-1]`. - Active only when `self.training==False`. - camera_hash: A unique identifier of a pre-cached camera. If `None`, - the cache is not searched and the rays are calculated from scratch. - caching: If `True`, activates the caching mode that returns the `RayBundle` - that should be stored into the cache. - Returns: - A named tuple `RayBundle` with the following fields: - origins: A tensor of shape - `(batch_size, n_rays_per_image, 3)` - denoting the locations of ray origins in the world coordinates. - directions: A tensor of shape - `(batch_size, n_rays_per_image, 3)` - denoting the directions of each ray in the world coordinates. - lengths: A tensor of shape - `(batch_size, n_rays_per_image, n_pts_per_ray)` - containing the z-coordinate (=depth) of each ray in world units. - xys: A tensor of shape - `(batch_size, n_rays_per_image, 2)` - containing the 2D image coordinates of each ray. - """ - - batch_size = cameras.R.shape[0] # pyre-ignore - device = cameras.device - - if (camera_hash is None) and (not caching) and self.training: - # Sample random rays from scratch. - ray_bundle = self._mc_raysampler(cameras) - ray_bundle = self._normalize_raybundle(ray_bundle) - else: - if camera_hash is not None: - # The case where we retrieve a camera from cache. - if batch_size != 1: - raise NotImplementedError( - "Ray caching works only for batches with a single camera!" - ) - full_ray_bundle = self._ray_cache[camera_hash] - else: - # We generate a full ray grid from scratch. - full_ray_bundle = self._grid_raysampler(cameras) - full_ray_bundle = self._normalize_raybundle(full_ray_bundle) - - n_pixels = full_ray_bundle.directions.shape[:-1].numel() - - if self.training: - # During training we randomly subsample rays. - sel_rays = torch.randperm( - n_pixels, device=full_ray_bundle.lengths.device - )[: self._mc_raysampler._n_rays_per_image] - else: - # In case we test, we take only the requested chunk. - if chunksize is None: - chunksize = n_pixels * batch_size - start = chunk_idx * chunksize * batch_size - end = min(start + chunksize, n_pixels) - sel_rays = torch.arange( - start, - end, - dtype=torch.long, - device=full_ray_bundle.lengths.device, - ) - - # Take the "sel_rays" rays from the full ray bundle. - ray_bundle = RayBundle( - *[ - v.view(n_pixels, -1)[sel_rays] - .view(batch_size, sel_rays.numel() // batch_size, -1) - .to(device) - for v in full_ray_bundle - ] - ) - - if ( - (self._stratified and self.training) - or (self._stratified_test and not self.training) - ) and not caching: # Make sure not to stratify when caching! - ray_bundle = self._stratify_ray_bundle(ray_bundle) - - return ray_bundle diff --git a/pytorch3d/projects/nerf/nerf/stats.py b/pytorch3d/projects/nerf/nerf/stats.py deleted file mode 100644 index cb02472bcf990ea8d6dfb3b4ea1484d739b754df..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/nerf/stats.py +++ /dev/null @@ -1,346 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import time -import warnings -from itertools import cycle -from typing import List, Optional - -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -from matplotlib import colors as mcolors -from visdom import Visdom - - -class AverageMeter: - """ - Computes and stores the average and current value. - Tracks the exact history of the added values in every epoch. - """ - - def __init__(self) -> None: - """ - Initialize the structure with empty history and zero-ed moving average. - """ - self.history = [] - self.reset() - - def reset(self) -> None: - """ - Reset the running average meter. - """ - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val: float, n: int = 1, epoch: int = 0) -> None: - """ - Updates the average meter with a value `val`. - - Args: - val: A float to be added to the meter. - n: Represents the number of entities to be added. - epoch: The epoch to which the number should be added. - """ - # make sure the history is of the same len as epoch - while len(self.history) <= epoch: - self.history.append([]) - self.history[epoch].append(val / n) - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - def get_epoch_averages(self): - """ - Returns: - averages: A list of average values of the metric for each epoch - in the history buffer. - """ - if len(self.history) == 0: - return None - return [ - (float(np.array(h).mean()) if len(h) > 0 else float("NaN")) - for h in self.history - ] - - -class Stats: - """ - Stats logging object useful for gathering statistics of training - a deep network in PyTorch. - - Example: - ``` - # Init stats structure that logs statistics 'objective' and 'top1e'. - stats = Stats( ('objective','top1e') ) - - network = init_net() # init a pytorch module (=neural network) - dataloader = init_dataloader() # init a dataloader - - for epoch in range(10): - - # start of epoch -> call new_epoch - stats.new_epoch() - - # Iterate over batches. - for batch in dataloader: - # Run a model and save into a dict of output variables "output" - output = network(batch) - - # stats.update() automatically parses the 'objective' and 'top1e' - # from the "output" dict and stores this into the db. - stats.update(output) - stats.print() # prints the averages over given epoch - - # Stores the training plots into '/tmp/epoch_stats.pdf' - # and plots into a visdom server running at localhost (if running). - stats.plot_stats(plot_file='/tmp/epoch_stats.pdf') - ``` - """ - - def __init__( - self, - log_vars: List[str], - verbose: bool = False, - epoch: int = -1, - plot_file: Optional[str] = None, - ) -> None: - """ - Args: - log_vars: The list of variable names to be logged. - verbose: Print status messages. - epoch: The initial epoch of the object. - plot_file: The path to the file that will hold the training plots. - """ - self.verbose = verbose - self.log_vars = log_vars - self.plot_file = plot_file - self.hard_reset(epoch=epoch) - - def reset(self) -> None: - """ - Called before an epoch to clear current epoch buffers. - """ - stat_sets = list(self.stats.keys()) - if self.verbose: - print("stats: epoch %d - reset" % self.epoch) - self.it = {k: -1 for k in stat_sets} - for stat_set in stat_sets: - for stat in self.stats[stat_set]: - self.stats[stat_set][stat].reset() - - # Set a new timestamp. - self._epoch_start = time.time() - - def hard_reset(self, epoch: int = -1) -> None: - """ - Erases all logged data. - """ - self._epoch_start = None - self.epoch = epoch - if self.verbose: - print("stats: epoch %d - hard reset" % self.epoch) - self.stats = {} - self.reset() - - def new_epoch(self) -> None: - """ - Initializes a new epoch. - """ - if self.verbose: - print("stats: new epoch %d" % (self.epoch + 1)) - self.epoch += 1 # increase epoch counter - self.reset() # zero the stats - - def _gather_value(self, val): - if isinstance(val, float): - pass - else: - val = val.data.cpu().numpy() - val = float(val.sum()) - return val - - def update(self, preds: dict, stat_set: str = "train") -> None: - """ - Update the internal logs with metrics of a training step. - - Each metric is stored as an instance of an AverageMeter. - - Args: - preds: Dict of values to be added to the logs. - stat_set: The set of statistics to be updated (e.g. "train", "val"). - """ - - if self.epoch == -1: # uninitialized - warnings.warn( - "self.epoch==-1 means uninitialized stats structure" - " -> new_epoch() called" - ) - self.new_epoch() - - if stat_set not in self.stats: - self.stats[stat_set] = {} - self.it[stat_set] = -1 - - self.it[stat_set] += 1 - - epoch = self.epoch - it = self.it[stat_set] - - for stat in self.log_vars: - - if stat not in self.stats[stat_set]: - self.stats[stat_set][stat] = AverageMeter() - - if stat == "sec/it": # compute speed - elapsed = time.time() - self._epoch_start - time_per_it = float(elapsed) / float(it + 1) - val = time_per_it - else: - if stat in preds: - val = self._gather_value(preds[stat]) - else: - val = None - - if val is not None: - self.stats[stat_set][stat].update(val, epoch=epoch, n=1) - - def print(self, max_it: Optional[int] = None, stat_set: str = "train") -> None: - """ - Print the current values of all stored stats. - - Args: - max_it: Maximum iteration number to be displayed. - If None, the maximum iteration number is not displayed. - stat_set: The set of statistics to be printed. - """ - - epoch = self.epoch - stats = self.stats - - str_out = "" - - it = self.it[stat_set] - stat_str = "" - stats_print = sorted(stats[stat_set].keys()) - for stat in stats_print: - if stats[stat_set][stat].count == 0: - continue - stat_str += " {0:.12}: {1:1.3f} |".format(stat, stats[stat_set][stat].avg) - - head_str = f"[{stat_set}] | epoch {epoch} | it {it}" - if max_it: - head_str += f"/ {max_it}" - - str_out = f"{head_str} | {stat_str}" - - print(str_out) - - def plot_stats( - self, - viz: Visdom = None, - visdom_env: Optional[str] = None, - plot_file: Optional[str] = None, - ) -> None: - """ - Plot the line charts of the history of the stats. - - Args: - viz: The Visdom object holding the connection to a Visdom server. - visdom_env: The visdom environment for storing the graphs. - plot_file: The path to a file with training plots. - """ - - stat_sets = list(self.stats.keys()) - - if viz is None: - withvisdom = False - elif not viz.check_connection(): - warnings.warn("Cannot connect to the visdom server! Skipping visdom plots.") - withvisdom = False - else: - withvisdom = True - - lines = [] - - for stat in self.log_vars: - vals = [] - stat_sets_now = [] - for stat_set in stat_sets: - val = self.stats[stat_set][stat].get_epoch_averages() - if val is None: - continue - else: - val = np.array(val).reshape(-1) - stat_sets_now.append(stat_set) - vals.append(val) - - if len(vals) == 0: - continue - - vals = np.stack(vals, axis=1) - x = np.arange(vals.shape[0]) - - lines.append((stat_sets_now, stat, x, vals)) - - if withvisdom: - for tmodes, stat, x, vals in lines: - title = "%s" % stat - opts = {"title": title, "legend": list(tmodes)} - for i, (tmode, val) in enumerate(zip(tmodes, vals.T)): - update = "append" if i > 0 else None - valid = np.where(np.isfinite(val)) - if len(valid) == 0: - continue - viz.line( - Y=val[valid], - X=x[valid], - env=visdom_env, - opts=opts, - win=f"stat_plot_{title}", - name=tmode, - update=update, - ) - - if plot_file is None: - plot_file = self.plot_file - - if plot_file is not None: - print("Exporting stats to %s" % plot_file) - ncol = 3 - nrow = int(np.ceil(float(len(lines)) / ncol)) - matplotlib.rcParams.update({"font.size": 5}) - color = cycle(plt.cm.tab10(np.linspace(0, 1, 10))) - fig = plt.figure(1) - plt.clf() - for idx, (tmodes, stat, x, vals) in enumerate(lines): - c = next(color) - plt.subplot(nrow, ncol, idx + 1) - for vali, vals_ in enumerate(vals.T): - c_ = c * (1.0 - float(vali) * 0.3) - valid = np.where(np.isfinite(vals_)) - if len(valid) == 0: - continue - plt.plot(x[valid], vals_[valid], c=c_, linewidth=1) - plt.ylabel(stat) - plt.xlabel("epoch") - plt.gca().yaxis.label.set_color(c[0:3] * 0.75) - plt.legend(tmodes) - gcolor = np.array(mcolors.to_rgba("lightgray")) - plt.grid( - b=True, which="major", color=gcolor, linestyle="-", linewidth=0.4 - ) - plt.grid( - b=True, which="minor", color=gcolor, linestyle="--", linewidth=0.2 - ) - plt.minorticks_on() - - plt.tight_layout() - plt.show() - fig.savefig(plot_file) diff --git a/pytorch3d/projects/nerf/nerf/utils.py b/pytorch3d/projects/nerf/nerf/utils.py deleted file mode 100644 index dbe5e91285a3f70ab62848a7f8927f425feb3513..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/nerf/utils.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch - - -def calc_mse(x: torch.Tensor, y: torch.Tensor): - """ - Calculates the mean square error between tensors `x` and `y`. - """ - return torch.mean((x - y) ** 2) - - -def calc_psnr(x: torch.Tensor, y: torch.Tensor): - """ - Calculates the Peak-signal-to-noise ratio between tensors `x` and `y`. - """ - mse = calc_mse(x, y) - psnr = -10.0 * torch.log10(mse) - return psnr - - -def sample_images_at_mc_locs( - target_images: torch.Tensor, - sampled_rays_xy: torch.Tensor, -): - """ - Given a set of pixel locations `sampled_rays_xy` this method samples the tensor - `target_images` at the respective 2D locations. - - This function is used in order to extract the colors from ground truth images - that correspond to the colors rendered using a Monte Carlo rendering. - - Args: - target_images: A tensor of shape `(batch_size, ..., 3)`. - sampled_rays_xy: A tensor of shape `(batch_size, S_1, ..., S_N, 2)`. - - Returns: - images_sampled: A tensor of shape `(batch_size, S_1, ..., S_N, 3)` - containing `target_images` sampled at `sampled_rays_xy`. - """ - ba = target_images.shape[0] - dim = target_images.shape[-1] - spatial_size = sampled_rays_xy.shape[1:-1] - - # The coordinate grid convention for grid_sample has both x and y - # directions inverted. - xy_sample = -sampled_rays_xy.view(ba, -1, 1, 2).clone() - - images_sampled = torch.nn.functional.grid_sample( - target_images.permute(0, 3, 1, 2), - xy_sample, - align_corners=True, - mode="bilinear", - ) - return images_sampled.permute(0, 2, 3, 1).view(ba, *spatial_size, dim) diff --git a/pytorch3d/projects/nerf/test_nerf.py b/pytorch3d/projects/nerf/test_nerf.py deleted file mode 100644 index 2d7bafc0b5d9e4e9b1101920fbe6a979faafa300..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/test_nerf.py +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import os -import warnings - -import hydra -import numpy as np -import torch -from nerf.dataset import get_nerf_datasets, trivial_collate -from nerf.eval_video_utils import generate_eval_video_cameras -from nerf.nerf_renderer import RadianceFieldRenderer -from nerf.stats import Stats -from omegaconf import DictConfig -from PIL import Image - - -CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") - - -@hydra.main(config_path=CONFIG_DIR, config_name="lego") -def main(cfg: DictConfig): - - # Device on which to run. - if torch.cuda.is_available(): - device = "cuda" - else: - warnings.warn( - "Please note that although executing on CPU is supported," - + "the testing is unlikely to finish in reasonable time." - ) - device = "cpu" - - # Initialize the Radiance Field model. - model = RadianceFieldRenderer( - image_size=cfg.data.image_size, - n_pts_per_ray=cfg.raysampler.n_pts_per_ray, - n_pts_per_ray_fine=cfg.raysampler.n_pts_per_ray, - n_rays_per_image=cfg.raysampler.n_rays_per_image, - min_depth=cfg.raysampler.min_depth, - max_depth=cfg.raysampler.max_depth, - stratified=cfg.raysampler.stratified, - stratified_test=cfg.raysampler.stratified_test, - chunk_size_test=cfg.raysampler.chunk_size_test, - n_harmonic_functions_xyz=cfg.implicit_function.n_harmonic_functions_xyz, - n_harmonic_functions_dir=cfg.implicit_function.n_harmonic_functions_dir, - n_hidden_neurons_xyz=cfg.implicit_function.n_hidden_neurons_xyz, - n_hidden_neurons_dir=cfg.implicit_function.n_hidden_neurons_dir, - n_layers_xyz=cfg.implicit_function.n_layers_xyz, - density_noise_std=cfg.implicit_function.density_noise_std, - ) - - # Move the model to the relevant device. - model.to(device) - - # Resume from the checkpoint. - checkpoint_path = os.path.join(hydra.utils.get_original_cwd(), cfg.checkpoint_path) - if not os.path.isfile(checkpoint_path): - raise ValueError(f"Model checkpoint {checkpoint_path} does not exist!") - - print(f"Loading checkpoint {checkpoint_path}.") - loaded_data = torch.load(checkpoint_path) - # Do not load the cached xy grid. - # - this allows setting an arbitrary evaluation image size. - state_dict = { - k: v - for k, v in loaded_data["model"].items() - if "_grid_raysampler._xy_grid" not in k - } - model.load_state_dict(state_dict, strict=False) - - # Load the test data. - if cfg.test.mode == "evaluation": - _, _, test_dataset = get_nerf_datasets( - dataset_name=cfg.data.dataset_name, - image_size=cfg.data.image_size, - ) - elif cfg.test.mode == "export_video": - train_dataset, _, _ = get_nerf_datasets( - dataset_name=cfg.data.dataset_name, - image_size=cfg.data.image_size, - ) - test_dataset = generate_eval_video_cameras( - train_dataset, - trajectory_type=cfg.test.trajectory_type, - up=cfg.test.up, - scene_center=cfg.test.scene_center, - n_eval_cams=cfg.test.n_frames, - trajectory_scale=cfg.test.trajectory_scale, - ) - # store the video in directory (checkpoint_file - extension + '_video') - export_dir = os.path.splitext(checkpoint_path)[0] + "_video" - os.makedirs(export_dir, exist_ok=True) - else: - raise ValueError(f"Unknown test mode {cfg.test_mode}.") - - # Init the test dataloader. - test_dataloader = torch.utils.data.DataLoader( - test_dataset, - batch_size=1, - shuffle=False, - num_workers=0, - collate_fn=trivial_collate, - ) - - if cfg.test.mode == "evaluation": - # Init the test stats object. - eval_stats = ["mse_coarse", "mse_fine", "psnr_coarse", "psnr_fine", "sec/it"] - stats = Stats(eval_stats) - stats.new_epoch() - elif cfg.test.mode == "export_video": - # Init the frame buffer. - frame_paths = [] - - # Set the model to the eval mode. - model.eval() - - # Run the main testing loop. - for batch_idx, test_batch in enumerate(test_dataloader): - test_image, test_camera, camera_idx = test_batch[0].values() - if test_image is not None: - test_image = test_image.to(device) - test_camera = test_camera.to(device) - - # Activate eval mode of the model (lets us do a full rendering pass). - model.eval() - with torch.no_grad(): - test_nerf_out, test_metrics = model( - None, # we do not use pre-cached cameras - test_camera, - test_image, - ) - - if cfg.test.mode == "evaluation": - # Update stats with the validation metrics. - stats.update(test_metrics, stat_set="test") - stats.print(stat_set="test") - - elif cfg.test.mode == "export_video": - # Store the video frame. - frame = test_nerf_out["rgb_fine"][0].detach().cpu() - frame_path = os.path.join(export_dir, f"frame_{batch_idx:05d}.png") - print(f"Writing {frame_path}.") - Image.fromarray((frame.numpy() * 255.0).astype(np.uint8)).save(frame_path) - frame_paths.append(frame_path) - - if cfg.test.mode == "evaluation": - print(f"Final evaluation metrics on '{cfg.data.dataset_name}':") - for stat in eval_stats: - stat_value = stats.stats["test"][stat].get_epoch_averages()[0] - print(f"{stat:15s}: {stat_value:1.4f}") - - elif cfg.test.mode == "export_video": - # Convert the exported frames to a video. - video_path = os.path.join(export_dir, "video.mp4") - ffmpeg_bin = "ffmpeg" - frame_regexp = os.path.join(export_dir, "frame_%05d.png") - ffmcmd = ( - "%s -r %d -i %s -vcodec h264 -f mp4 -y -b 2000k -pix_fmt yuv420p %s" - % (ffmpeg_bin, cfg.test.fps, frame_regexp, video_path) - ) - ret = os.system(ffmcmd) - if ret != 0: - raise RuntimeError("ffmpeg failed!") - - -if __name__ == "__main__": - main() diff --git a/pytorch3d/projects/nerf/tests/__init__.py b/pytorch3d/projects/nerf/tests/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/tests/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/projects/nerf/tests/test_raymarcher.py b/pytorch3d/projects/nerf/tests/test_raymarcher.py deleted file mode 100644 index e9cfb74c4f7d6262100c1c9fdcf520fc369c98b6..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/tests/test_raymarcher.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import unittest - -import torch -from nerf.raymarcher import EmissionAbsorptionNeRFRaymarcher -from pytorch3d.renderer import EmissionAbsorptionRaymarcher - - -class TestRaymarcher(unittest.TestCase): - def setUp(self) -> None: - torch.manual_seed(42) - - def test_raymarcher(self): - """ - Checks that the nerf raymarcher outputs are identical to the - EmissionAbsorptionRaymarcher. - """ - - feat_dim = 3 - rays_densities = torch.rand(100, 10, 1) - rays_features = torch.randn(100, 10, feat_dim) - - out, out_nerf = [ - raymarcher(rays_densities, rays_features) - for raymarcher in ( - EmissionAbsorptionRaymarcher(), - EmissionAbsorptionNeRFRaymarcher(), - ) - ] - - self.assertTrue( - torch.allclose(out[..., :feat_dim], out_nerf[0][..., :feat_dim]) - ) diff --git a/pytorch3d/projects/nerf/tests/test_raysampler.py b/pytorch3d/projects/nerf/tests/test_raysampler.py deleted file mode 100644 index ba53713a819a9c255edec3f606acf106b94a7145..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/tests/test_raysampler.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import unittest - -import torch -from nerf.raysampler import NeRFRaysampler, ProbabilisticRaysampler -from pytorch3d.renderer import PerspectiveCameras -from pytorch3d.transforms.rotation_conversions import random_rotations - - -class TestRaysampler(unittest.TestCase): - def setUp(self) -> None: - torch.manual_seed(42) - - def test_raysampler_caching(self, batch_size=10): - """ - Tests the consistency of the NeRF raysampler caching. - """ - - raysampler = NeRFRaysampler( - min_x=0.0, - max_x=10.0, - min_y=0.0, - max_y=10.0, - n_pts_per_ray=10, - min_depth=0.1, - max_depth=10.0, - n_rays_per_image=12, - image_width=10, - image_height=10, - stratified=False, - stratified_test=False, - invert_directions=True, - ) - - raysampler.eval() - - cameras, rays = [], [] - - for _ in range(batch_size): - - R = random_rotations(1) - T = torch.randn(1, 3) - focal_length = torch.rand(1, 2) + 0.5 - principal_point = torch.randn(1, 2) - - camera = PerspectiveCameras( - focal_length=focal_length, - principal_point=principal_point, - R=R, - T=T, - ) - - cameras.append(camera) - rays.append(raysampler(camera)) - - raysampler.precache_rays(cameras, list(range(batch_size))) - - for cam_index, rays_ in enumerate(rays): - rays_cached_ = raysampler( - cameras=cameras[cam_index], - chunksize=None, - chunk_idx=0, - camera_hash=cam_index, - caching=False, - ) - - for v, v_cached in zip(rays_, rays_cached_): - self.assertTrue(torch.allclose(v, v_cached)) - - def test_probabilistic_raysampler(self, batch_size=1, n_pts_per_ray=60): - """ - Check that the probabilistic ray sampler does not crash for various - settings. - """ - - raysampler_grid = NeRFRaysampler( - min_x=0.0, - max_x=10.0, - min_y=0.0, - max_y=10.0, - n_pts_per_ray=n_pts_per_ray, - min_depth=1.0, - max_depth=10.0, - n_rays_per_image=12, - image_width=10, - image_height=10, - stratified=False, - stratified_test=False, - invert_directions=True, - ) - - R = random_rotations(batch_size) - T = torch.randn(batch_size, 3) - focal_length = torch.rand(batch_size, 2) + 0.5 - principal_point = torch.randn(batch_size, 2) - camera = PerspectiveCameras( - focal_length=focal_length, - principal_point=principal_point, - R=R, - T=T, - ) - - raysampler_grid.eval() - - ray_bundle = raysampler_grid(cameras=camera) - - ray_weights = torch.rand_like(ray_bundle.lengths) - - # Just check that we dont crash for all possible settings. - for stratified_test in (True, False): - for stratified in (True, False): - raysampler_prob = ProbabilisticRaysampler( - n_pts_per_ray=n_pts_per_ray, - stratified=stratified, - stratified_test=stratified_test, - add_input_samples=True, - ) - for mode in ("train", "eval"): - getattr(raysampler_prob, mode)() - for _ in range(10): - raysampler_prob(ray_bundle, ray_weights) diff --git a/pytorch3d/projects/nerf/train_nerf.py b/pytorch3d/projects/nerf/train_nerf.py deleted file mode 100644 index 6b079bb3650d4c4569a55cdd3f6ada6501660ee9..0000000000000000000000000000000000000000 --- a/pytorch3d/projects/nerf/train_nerf.py +++ /dev/null @@ -1,273 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import collections -import os -import pickle -import warnings - -import hydra -import numpy as np -import torch -from nerf.dataset import get_nerf_datasets, trivial_collate -from nerf.nerf_renderer import RadianceFieldRenderer, visualize_nerf_outputs -from nerf.stats import Stats -from omegaconf import DictConfig -from visdom import Visdom - - -CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") - - -@hydra.main(config_path=CONFIG_DIR, config_name="lego") -def main(cfg: DictConfig): - - # Set the relevant seeds for reproducibility. - np.random.seed(cfg.seed) - torch.manual_seed(cfg.seed) - - # Device on which to run. - if torch.cuda.is_available(): - device = "cuda" - else: - warnings.warn( - "Please note that although executing on CPU is supported," - + "the training is unlikely to finish in reasonable time." - ) - device = "cpu" - - # Initialize the Radiance Field model. - model = RadianceFieldRenderer( - image_size=cfg.data.image_size, - n_pts_per_ray=cfg.raysampler.n_pts_per_ray, - n_pts_per_ray_fine=cfg.raysampler.n_pts_per_ray, - n_rays_per_image=cfg.raysampler.n_rays_per_image, - min_depth=cfg.raysampler.min_depth, - max_depth=cfg.raysampler.max_depth, - stratified=cfg.raysampler.stratified, - stratified_test=cfg.raysampler.stratified_test, - chunk_size_test=cfg.raysampler.chunk_size_test, - n_harmonic_functions_xyz=cfg.implicit_function.n_harmonic_functions_xyz, - n_harmonic_functions_dir=cfg.implicit_function.n_harmonic_functions_dir, - n_hidden_neurons_xyz=cfg.implicit_function.n_hidden_neurons_xyz, - n_hidden_neurons_dir=cfg.implicit_function.n_hidden_neurons_dir, - n_layers_xyz=cfg.implicit_function.n_layers_xyz, - density_noise_std=cfg.implicit_function.density_noise_std, - visualization=cfg.visualization.visdom, - ) - - # Move the model to the relevant device. - model.to(device) - - # Init stats to None before loading. - stats = None - optimizer_state_dict = None - start_epoch = 0 - - checkpoint_path = os.path.join(hydra.utils.get_original_cwd(), cfg.checkpoint_path) - if len(cfg.checkpoint_path) > 0: - # Make the root of the experiment directory. - checkpoint_dir = os.path.split(checkpoint_path)[0] - os.makedirs(checkpoint_dir, exist_ok=True) - - # Resume training if requested. - if cfg.resume and os.path.isfile(checkpoint_path): - print(f"Resuming from checkpoint {checkpoint_path}.") - loaded_data = torch.load(checkpoint_path) - model.load_state_dict(loaded_data["model"]) - stats = pickle.loads(loaded_data["stats"]) - print(f" => resuming from epoch {stats.epoch}.") - optimizer_state_dict = loaded_data["optimizer"] - start_epoch = stats.epoch - - # Initialize the optimizer. - optimizer = torch.optim.Adam( - model.parameters(), - lr=cfg.optimizer.lr, - ) - - # Load the optimizer state dict in case we are resuming. - if optimizer_state_dict is not None: - optimizer.load_state_dict(optimizer_state_dict) - optimizer.last_epoch = start_epoch - - # Init the stats object. - if stats is None: - stats = Stats( - ["loss", "mse_coarse", "mse_fine", "psnr_coarse", "psnr_fine", "sec/it"], - ) - - # Learning rate scheduler setup. - - # Following the original code, we use exponential decay of the - # learning rate: current_lr = base_lr * gamma ** (epoch / step_size) - def lr_lambda(epoch): - return cfg.optimizer.lr_scheduler_gamma ** ( - epoch / cfg.optimizer.lr_scheduler_step_size - ) - - # The learning rate scheduling is implemented with LambdaLR PyTorch scheduler. - lr_scheduler = torch.optim.lr_scheduler.LambdaLR( - optimizer, lr_lambda, last_epoch=start_epoch - 1, verbose=False - ) - - # Initialize the cache for storing variables needed for visualization. - visuals_cache = collections.deque(maxlen=cfg.visualization.history_size) - - # Init the visualization visdom env. - if cfg.visualization.visdom: - viz = Visdom( - server=cfg.visualization.visdom_server, - port=cfg.visualization.visdom_port, - use_incoming_socket=False, - ) - else: - viz = None - - # Load the training/validation data. - train_dataset, val_dataset, _ = get_nerf_datasets( - dataset_name=cfg.data.dataset_name, - image_size=cfg.data.image_size, - ) - - if cfg.data.precache_rays: - # Precache the projection rays. - model.eval() - with torch.no_grad(): - for dataset in (train_dataset, val_dataset): - cache_cameras = [e["camera"].to(device) for e in dataset] - cache_camera_hashes = [e["camera_idx"] for e in dataset] - model.precache_rays(cache_cameras, cache_camera_hashes) - - train_dataloader = torch.utils.data.DataLoader( - train_dataset, - batch_size=1, - shuffle=True, - num_workers=0, - collate_fn=trivial_collate, - ) - - # The validation dataloader is just an endless stream of random samples. - val_dataloader = torch.utils.data.DataLoader( - val_dataset, - batch_size=1, - num_workers=0, - collate_fn=trivial_collate, - sampler=torch.utils.data.RandomSampler( - val_dataset, - replacement=True, - num_samples=cfg.optimizer.max_epochs, - ), - ) - - # Set the model to the training mode. - model.train() - - # Run the main training loop. - for epoch in range(start_epoch, cfg.optimizer.max_epochs): - stats.new_epoch() # Init a new epoch. - for iteration, batch in enumerate(train_dataloader): - image, camera, camera_idx = batch[0].values() - image = image.to(device) - camera = camera.to(device) - - optimizer.zero_grad() - - # Run the forward pass of the model. - nerf_out, metrics = model( - camera_idx if cfg.data.precache_rays else None, - camera, - image, - ) - - # The loss is a sum of coarse and fine MSEs - loss = metrics["mse_coarse"] + metrics["mse_fine"] - - # Take the training step. - loss.backward() - optimizer.step() - - # Update stats with the current metrics. - stats.update( - {"loss": float(loss), **metrics}, - stat_set="train", - ) - - if iteration % cfg.stats_print_interval == 0: - stats.print(stat_set="train") - - # Update the visualization cache. - if viz is not None: - visuals_cache.append( - { - "camera": camera.cpu(), - "camera_idx": camera_idx, - "image": image.cpu().detach(), - "rgb_fine": nerf_out["rgb_fine"].cpu().detach(), - "rgb_coarse": nerf_out["rgb_coarse"].cpu().detach(), - "rgb_gt": nerf_out["rgb_gt"].cpu().detach(), - "coarse_ray_bundle": nerf_out["coarse_ray_bundle"], - } - ) - - # Adjust the learning rate. - lr_scheduler.step() - - # Validation - if epoch % cfg.validation_epoch_interval == 0 and epoch > 0: - - # Sample a validation camera/image. - val_batch = next(val_dataloader.__iter__()) - val_image, val_camera, camera_idx = val_batch[0].values() - val_image = val_image.to(device) - val_camera = val_camera.to(device) - - # Activate eval mode of the model (lets us do a full rendering pass). - model.eval() - with torch.no_grad(): - val_nerf_out, val_metrics = model( - camera_idx if cfg.data.precache_rays else None, - val_camera, - val_image, - ) - - # Update stats with the validation metrics. - stats.update(val_metrics, stat_set="val") - stats.print(stat_set="val") - - if viz is not None: - # Plot that loss curves into visdom. - stats.plot_stats( - viz=viz, - visdom_env=cfg.visualization.visdom_env, - plot_file=None, - ) - # Visualize the intermediate results. - visualize_nerf_outputs( - val_nerf_out, visuals_cache, viz, cfg.visualization.visdom_env - ) - - # Set the model back to train mode. - model.train() - - # Checkpoint. - if ( - epoch % cfg.checkpoint_epoch_interval == 0 - and len(cfg.checkpoint_path) > 0 - and epoch > 0 - ): - print(f"Storing checkpoint {checkpoint_path}.") - data_to_store = { - "model": model.state_dict(), - "optimizer": optimizer.state_dict(), - "stats": pickle.dumps(stats), - } - torch.save(data_to_store, checkpoint_path) - - -if __name__ == "__main__": - main() diff --git a/pytorch3d/pytorch3d/__init__.py b/pytorch3d/pytorch3d/__init__.py deleted file mode 100644 index dc4b478e577e3c8e19a33833e7ccbe7091dab600..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -__version__ = "0.7.6" diff --git a/pytorch3d/pytorch3d/common/__init__.py b/pytorch3d/pytorch3d/common/__init__.py deleted file mode 100644 index 8f5d84e961ca7f04618ee01cb3fa2fa658c5bd97..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/common/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .datatypes import Device, get_device, make_device - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/common/compat.py b/pytorch3d/pytorch3d/common/compat.py deleted file mode 100644 index 5c155f12f4157e2d74da642a61ec5a4f180d3357..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/common/compat.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Sequence, Tuple, Union - -import torch - - -""" -Some functions which depend on PyTorch or Python versions. -""" - - -def meshgrid_ij( - *A: Union[torch.Tensor, Sequence[torch.Tensor]] -) -> Tuple[torch.Tensor, ...]: # pragma: no cover - """ - Like torch.meshgrid was before PyTorch 1.10.0, i.e. with indexing set to ij - """ - if ( - # pyre-fixme[16]: Callable `meshgrid` has no attribute `__kwdefaults__`. - torch.meshgrid.__kwdefaults__ is not None - and "indexing" in torch.meshgrid.__kwdefaults__ - ): - # PyTorch >= 1.10.0 - # pyre-fixme[6]: For 1st param expected `Union[List[Tensor], Tensor]` but - # got `Union[Sequence[Tensor], Tensor]`. - return torch.meshgrid(*A, indexing="ij") - # pyre-fixme[6]: For 1st param expected `Union[List[Tensor], Tensor]` but got - # `Union[Sequence[Tensor], Tensor]`. - return torch.meshgrid(*A) - - -def prod(iterable, *, start=1): - """ - Like math.prod in Python 3.8 and later. - """ - for i in iterable: - start *= i - return start diff --git a/pytorch3d/pytorch3d/common/datatypes.py b/pytorch3d/pytorch3d/common/datatypes.py deleted file mode 100644 index 03fe3efc54dd81044ee579ee0aba8641eaa6b834..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/common/datatypes.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Optional, Union - -import torch - - -Device = Union[str, torch.device] - - -def make_device(device: Device) -> torch.device: - """ - Makes an actual torch.device object from the device specified as - either a string or torch.device object. If the device is `cuda` without - a specific index, the index of the current device is assigned. - - Args: - device: Device (as str or torch.device) - - Returns: - A matching torch.device object - """ - device = torch.device(device) if isinstance(device, str) else device - if device.type == "cuda" and device.index is None: - # If cuda but with no index, then the current cuda device is indicated. - # In that case, we fix to that device - device = torch.device(f"cuda:{torch.cuda.current_device()}") - return device - - -def get_device(x, device: Optional[Device] = None) -> torch.device: - """ - Gets the device of the specified variable x if it is a tensor, or - falls back to a default CPU device otherwise. Allows overriding by - providing an explicit device. - - Args: - x: a torch.Tensor to get the device from or another type - device: Device (as str or torch.device) to fall back to - - Returns: - A matching torch.device object - """ - - # User overrides device - if device is not None: - return make_device(device) - - # Set device based on input tensor - if torch.is_tensor(x): - return x.device - - # Default device is cpu - return torch.device("cpu") diff --git a/pytorch3d/pytorch3d/common/linear_with_repeat.py b/pytorch3d/pytorch3d/common/linear_with_repeat.py deleted file mode 100644 index 2dd477be3f1045386bdfc5d588101b8c7be7ab31..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/common/linear_with_repeat.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -from typing import Tuple - -import torch -import torch.nn.functional as F -from torch.nn import init, Parameter - - -class LinearWithRepeat(torch.nn.Module): - """ - if x has shape (..., k, n1) - and y has shape (..., n2) - then - LinearWithRepeat(n1 + n2, out_features).forward((x,y)) - is equivalent to - Linear(n1 + n2, out_features).forward( - torch.cat([x, y.unsqueeze(-2).expand(..., k, n2)], dim=-1) - ) - - Or visually: - Given the following, for each ray, - - feature -> - - ray xxxxxxxx - position xxxxxxxx - | xxxxxxxx - v xxxxxxxx - - - and - yyyyyyyy - - where the y's do not depend on the position - but only on the ray, - we want to evaluate a Linear layer on both - types of data at every position. - - It's as if we constructed - - xxxxxxxxyyyyyyyy - xxxxxxxxyyyyyyyy - xxxxxxxxyyyyyyyy - xxxxxxxxyyyyyyyy - - and sent that through the Linear. - """ - - def __init__( - self, - in_features: int, - out_features: int, - bias: bool = True, - device=None, - dtype=None, - ) -> None: - """ - Copied from torch.nn.Linear. - """ - factory_kwargs = {"device": device, "dtype": dtype} - super().__init__() - self.in_features = in_features - self.out_features = out_features - self.weight = Parameter( - torch.empty((out_features, in_features), **factory_kwargs) - ) - if bias: - self.bias = Parameter(torch.empty(out_features, **factory_kwargs)) - else: - self.register_parameter("bias", None) - self.reset_parameters() - - def reset_parameters(self) -> None: - """ - Copied from torch.nn.Linear. - """ - init.kaiming_uniform_(self.weight, a=math.sqrt(5)) - if self.bias is not None: - fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) - bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0 - init.uniform_(self.bias, -bound, bound) - - def forward(self, input: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor: - n1 = input[0].shape[-1] - output1 = F.linear(input[0], self.weight[:, :n1], self.bias) - output2 = F.linear(input[1], self.weight[:, n1:], None) - return output1 + output2.unsqueeze(-2) diff --git a/pytorch3d/pytorch3d/common/workaround/__init__.py b/pytorch3d/pytorch3d/common/workaround/__init__.py deleted file mode 100644 index 64c5d3fab285ba8b52c945a65f0d0a3996a5a581..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/common/workaround/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .symeig3x3 import symeig3x3 -from .utils import _safe_det_3x3 diff --git a/pytorch3d/pytorch3d/common/workaround/symeig3x3.py b/pytorch3d/pytorch3d/common/workaround/symeig3x3.py deleted file mode 100644 index 479f8b000a81d8a6b35e389e4db5f0cd7dbc769b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/common/workaround/symeig3x3.py +++ /dev/null @@ -1,317 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -from typing import Optional, Tuple - -import torch -import torch.nn.functional as F -from torch import nn - - -class _SymEig3x3(nn.Module): - """ - Optimized implementation of eigenvalues and eigenvectors computation for symmetric 3x3 - matrices. - - Please see https://en.wikipedia.org/wiki/Eigenvalue_algorithm#3.C3.973_matrices - and https://www.geometrictools.com/Documentation/RobustEigenSymmetric3x3.pdf - """ - - def __init__(self, eps: Optional[float] = None) -> None: - """ - Args: - eps: epsilon to specify, if None then use torch.float eps - """ - super().__init__() - - self.register_buffer("_identity", torch.eye(3)) - self.register_buffer("_rotation_2d", torch.tensor([[0.0, -1.0], [1.0, 0.0]])) - self.register_buffer( - "_rotations_3d", self._create_rotation_matrices(self._rotation_2d) - ) - - self._eps = eps or torch.finfo(torch.float).eps - - @staticmethod - def _create_rotation_matrices(rotation_2d) -> torch.Tensor: - """ - Compute rotations for later use in U V computation - - Args: - rotation_2d: a Ο€/2 rotation matrix. - - Returns: - a (3, 3, 3) tensor containing 3 rotation matrices around each of the coordinate axes - by Ο€/2 - """ - - rotations_3d = torch.zeros((3, 3, 3)) - rotation_axes = set(range(3)) - for rotation_axis in rotation_axes: - rest = list(rotation_axes - {rotation_axis}) - rotations_3d[rotation_axis][rest[0], rest] = rotation_2d[0] - rotations_3d[rotation_axis][rest[1], rest] = rotation_2d[1] - - return rotations_3d - - def forward( - self, inputs: torch.Tensor, eigenvectors: bool = True - ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: - """ - Compute eigenvalues and (optionally) eigenvectors - - Args: - inputs: symmetric matrices with shape of (..., 3, 3) - eigenvectors: whether should we compute only eigenvalues or eigenvectors as well - - Returns: - Either a tuple of (eigenvalues, eigenvectors) or eigenvalues only, depending on - given params. Eigenvalues are of shape (..., 3) and eigenvectors (..., 3, 3) - """ - if inputs.shape[-2:] != (3, 3): - raise ValueError("Only inputs of shape (..., 3, 3) are supported.") - - inputs_diag = inputs.diagonal(dim1=-2, dim2=-1) - inputs_trace = inputs_diag.sum(-1) - q = inputs_trace / 3.0 - - # Calculate squared sum of elements outside the main diagonal / 2 - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - p1 = ((inputs**2).sum(dim=(-1, -2)) - (inputs_diag**2).sum(-1)) / 2 - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - p2 = ((inputs_diag - q[..., None]) ** 2).sum(dim=-1) + 2.0 * p1.clamp(self._eps) - - p = torch.sqrt(p2 / 6.0) - B = (inputs - q[..., None, None] * self._identity) / p[..., None, None] - - r = torch.det(B) / 2.0 - # Keep r within (-1.0, 1.0) boundaries with a margin to prevent exploding gradients. - r = r.clamp(-1.0 + self._eps, 1.0 - self._eps) - - phi = torch.acos(r) / 3.0 - eig1 = q + 2 * p * torch.cos(phi) - eig2 = q + 2 * p * torch.cos(phi + 2 * math.pi / 3) - eig3 = 3 * q - eig1 - eig2 - # eigenvals[..., i] is the i-th eigenvalue of the input, Ξ±0 ≀ Ξ±1 ≀ Ξ±2. - eigenvals = torch.stack((eig2, eig3, eig1), dim=-1) - - # Soft dispatch between the degenerate case (diagonal A) and general. - # diag_soft_cond -> 1.0 when p1 < 6 * eps and diag_soft_cond -> 0.0 otherwise. - # We use 6 * eps to take into account the error accumulated during the p1 summation - diag_soft_cond = torch.exp(-((p1 / (6 * self._eps)) ** 2)).detach()[..., None] - - # Eigenvalues are the ordered elements of main diagonal in the degenerate case - diag_eigenvals, _ = torch.sort(inputs_diag, dim=-1) - eigenvals = diag_soft_cond * diag_eigenvals + (1.0 - diag_soft_cond) * eigenvals - - if eigenvectors: - eigenvecs = self._construct_eigenvecs_set(inputs, eigenvals) - else: - eigenvecs = None - - return eigenvals, eigenvecs - - def _construct_eigenvecs_set( - self, inputs: torch.Tensor, eigenvals: torch.Tensor - ) -> torch.Tensor: - """ - Construct orthonormal set of eigenvectors by given inputs and pre-computed eigenvalues - - Args: - inputs: tensor of symmetric matrices of shape (..., 3, 3) - eigenvals: tensor of pre-computed eigenvalues of of shape (..., 3, 3) - - Returns: - Tuple of three eigenvector tensors of shape (..., 3, 3), composing an orthonormal - set - """ - eigenvecs_tuple_for_01 = self._construct_eigenvecs( - inputs, eigenvals[..., 0], eigenvals[..., 1] - ) - eigenvecs_for_01 = torch.stack(eigenvecs_tuple_for_01, dim=-1) - - eigenvecs_tuple_for_21 = self._construct_eigenvecs( - inputs, eigenvals[..., 2], eigenvals[..., 1] - ) - eigenvecs_for_21 = torch.stack(eigenvecs_tuple_for_21[::-1], dim=-1) - - # The result will be smooth here even if both parts of comparison - # are close, because eigenvecs_01 and eigenvecs_21 would be mostly equal as well - eigenvecs_cond = ( - eigenvals[..., 1] - eigenvals[..., 0] - > eigenvals[..., 2] - eigenvals[..., 1] - ).detach() - eigenvecs = torch.where( - eigenvecs_cond[..., None, None], eigenvecs_for_01, eigenvecs_for_21 - ) - - return eigenvecs - - def _construct_eigenvecs( - self, inputs: torch.Tensor, alpha0: torch.Tensor, alpha1: torch.Tensor - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Construct an orthonormal set of eigenvectors by given pair of eigenvalues. - - Args: - inputs: tensor of symmetric matrices of shape (..., 3, 3) - alpha0: first eigenvalues of shape (..., 3) - alpha1: second eigenvalues of shape (..., 3) - - Returns: - Tuple of three eigenvector tensors of shape (..., 3, 3), composing an orthonormal - set - """ - - # Find the eigenvector corresponding to alpha0, its eigenvalue is distinct - ev0 = self._get_ev0(inputs - alpha0[..., None, None] * self._identity) - u, v = self._get_uv(ev0) - ev1 = self._get_ev1(inputs - alpha1[..., None, None] * self._identity, u, v) - # Third eigenvector is computed as the cross-product of the other two - ev2 = torch.cross(ev0, ev1, dim=-1) - - return ev0, ev1, ev2 - - def _get_ev0(self, char_poly: torch.Tensor) -> torch.Tensor: - """ - Construct the first normalized eigenvector given a characteristic polynomial - - Args: - char_poly: a characteristic polynomials of the input matrices of shape (..., 3, 3) - - Returns: - Tensor of first eigenvectors of shape (..., 3) - """ - - r01 = torch.cross(char_poly[..., 0, :], char_poly[..., 1, :], dim=-1) - r12 = torch.cross(char_poly[..., 1, :], char_poly[..., 2, :], dim=-1) - r02 = torch.cross(char_poly[..., 0, :], char_poly[..., 2, :], dim=-1) - - cross_products = torch.stack((r01, r12, r02), dim=-2) - # Regularize it with + or -eps depending on the sign of the first vector - cross_products += self._eps * self._sign_without_zero( - cross_products[..., :1, :] - ) - - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - norms_sq = (cross_products**2).sum(dim=-1) - max_norms_index = norms_sq.argmax(dim=-1) - - # Pick only the cross-product with highest squared norm for each input - max_cross_products = self._gather_by_index( - cross_products, max_norms_index[..., None, None], -2 - ) - # Pick corresponding squared norms for each cross-product - max_norms_sq = self._gather_by_index(norms_sq, max_norms_index[..., None], -1) - - # Normalize cross-product vectors by thier norms - return max_cross_products / torch.sqrt(max_norms_sq[..., None]) - - def _gather_by_index( - self, source: torch.Tensor, index: torch.Tensor, dim: int - ) -> torch.Tensor: - """ - Selects elements from the given source tensor by provided index tensor. - Number of dimensions should be the same for source and index tensors. - - Args: - source: input tensor to gather from - index: index tensor with indices to gather from source - dim: dimension to gather across - - Returns: - Tensor of shape same as the source with exception of specified dimension. - """ - - index_shape = list(source.shape) - index_shape[dim] = 1 - - return source.gather(dim, index.expand(index_shape)).squeeze(dim) - - def _get_uv(self, w: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Computes unit-length vectors U and V such that {U, V, W} is a right-handed - orthonormal set. - - Args: - w: eigenvector tensor of shape (..., 3) - - Returns: - Tuple of U and V unit-length vector tensors of shape (..., 3) - """ - - min_idx = w.abs().argmin(dim=-1) - rotation_2d = self._rotations_3d[min_idx].to(w) - - u = F.normalize((rotation_2d @ w[..., None])[..., 0], dim=-1) - v = torch.cross(w, u, dim=-1) - return u, v - - def _get_ev1( - self, char_poly: torch.Tensor, u: torch.Tensor, v: torch.Tensor - ) -> torch.Tensor: - """ - Computes the second normalized eigenvector given a characteristic polynomial - and U and V vectors - - Args: - char_poly: a characteristic polynomials of the input matrices of shape (..., 3, 3) - u: unit-length vectors from _get_uv method - v: unit-length vectors from _get_uv method - - Returns: - desc - """ - - j = torch.stack((u, v), dim=-1) - m = j.transpose(-1, -2) @ char_poly @ j - - # If angle between those vectors is acute, take their sum = m[..., 0, :] + m[..., 1, :], - # otherwise take the difference = m[..., 0, :] - m[..., 1, :] - # m is in theory of rank 1 (or 0), so it snaps only when one of the rows is close to 0 - is_acute_sign = self._sign_without_zero( - (m[..., 0, :] * m[..., 1, :]).sum(dim=-1) - ).detach() - - rowspace = m[..., 0, :] + is_acute_sign[..., None] * m[..., 1, :] - # rowspace will be near zero for second-order eigenvalues - # this regularization guarantees abs(rowspace[0]) >= eps in a smooth'ish way - rowspace += self._eps * self._sign_without_zero(rowspace[..., :1]) - - return ( - j - @ F.normalize(rowspace @ self._rotation_2d.to(rowspace), dim=-1)[..., None] - )[..., 0] - - @staticmethod - def _sign_without_zero(tensor): - """ - Args: - tensor: an arbitrary shaped tensor - - Returns: - Tensor of the same shape as an input, but with 1.0 if tensor > 0.0 and -1.0 - otherwise - """ - return 2.0 * (tensor > 0.0).to(tensor.dtype) - 1.0 - - -def symeig3x3( - inputs: torch.Tensor, eigenvectors: bool = True -) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: - """ - Compute eigenvalues and (optionally) eigenvectors - - Args: - inputs: symmetric matrices with shape of (..., 3, 3) - eigenvectors: whether should we compute only eigenvalues or eigenvectors as well - - Returns: - Either a tuple of (eigenvalues, eigenvectors) or eigenvalues only, depending on - given params. Eigenvalues are of shape (..., 3) and eigenvectors (..., 3, 3) - """ - return _SymEig3x3().to(inputs.device)(inputs, eigenvectors=eigenvectors) diff --git a/pytorch3d/pytorch3d/common/workaround/utils.py b/pytorch3d/pytorch3d/common/workaround/utils.py deleted file mode 100644 index 6cd694129a154551a986f30e1f3b88c772a44237..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/common/workaround/utils.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import torch - - -def _safe_det_3x3(t: torch.Tensor): - """ - Fast determinant calculation for a batch of 3x3 matrices. - - Note, result of this function might not be the same as `torch.det()`. - The differences might be in the last significant digit. - - Args: - t: Tensor of shape (N, 3, 3). - - Returns: - Tensor of shape (N) with determinants. - """ - - det = ( - t[..., 0, 0] * (t[..., 1, 1] * t[..., 2, 2] - t[..., 1, 2] * t[..., 2, 1]) - - t[..., 0, 1] * (t[..., 1, 0] * t[..., 2, 2] - t[..., 2, 0] * t[..., 1, 2]) - + t[..., 0, 2] * (t[..., 1, 0] * t[..., 2, 1] - t[..., 2, 0] * t[..., 1, 1]) - ) - - return det diff --git a/pytorch3d/pytorch3d/csrc/ball_query/ball_query.cu b/pytorch3d/pytorch3d/csrc/ball_query/ball_query.cu deleted file mode 100644 index 586701c18150b2fbd91c7b48989d9b96b1cfd55f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/ball_query/ball_query.cu +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include - -// A chunk of work is blocksize-many points of P1. -// The number of potential chunks to do is N*(1+(P1-1)/blocksize) -// call (1+(P1-1)/blocksize) chunks_per_cloud -// These chunks are divided among the gridSize-many blocks. -// In block b, we work on chunks b, b+gridSize, b+2*gridSize etc . -// In chunk i, we work on cloud i/chunks_per_cloud on points starting from -// blocksize*(i%chunks_per_cloud). - -template -__global__ void BallQueryKernel( - const at::PackedTensorAccessor64 p1, - const at::PackedTensorAccessor64 p2, - const at::PackedTensorAccessor64 - lengths1, - const at::PackedTensorAccessor64 - lengths2, - at::PackedTensorAccessor64 idxs, - at::PackedTensorAccessor64 dists, - const int64_t K, - const float radius2) { - const int64_t N = p1.size(0); - const int64_t chunks_per_cloud = (1 + (p1.size(1) - 1) / blockDim.x); - const int64_t chunks_to_do = N * chunks_per_cloud; - const int D = p1.size(2); - - for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) { - const int64_t n = chunk / chunks_per_cloud; // batch_index - const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud); - int64_t i = start_point + threadIdx.x; - - // Check if point is valid in heterogeneous tensor - if (i >= lengths1[n]) { - continue; - } - - // Iterate over points in p2 until desired count is reached or - // all points have been considered - for (int64_t j = 0, count = 0; j < lengths2[n] && count < K; ++j) { - // Calculate the distance between the points - scalar_t dist2 = 0.0; - for (int d = 0; d < D; ++d) { - scalar_t diff = p1[n][i][d] - p2[n][j][d]; - dist2 += (diff * diff); - } - - if (dist2 < radius2) { - // If the point is within the radius - // Set the value of the index to the point index - idxs[n][i][count] = j; - dists[n][i][count] = dist2; - - // increment the number of selected samples for the point i - ++count; - } - } - } -} - -std::tuple BallQueryCuda( - const at::Tensor& p1, // (N, P1, 3) - const at::Tensor& p2, // (N, P2, 3) - const at::Tensor& lengths1, // (N,) - const at::Tensor& lengths2, // (N,) - int K, - float radius) { - // Check inputs are on the same device - at::TensorArg p1_t{p1, "p1", 1}, p2_t{p2, "p2", 2}, - lengths1_t{lengths1, "lengths1", 3}, lengths2_t{lengths2, "lengths2", 4}; - at::CheckedFrom c = "BallQueryCuda"; - at::checkAllSameGPU(c, {p1_t, p2_t, lengths1_t, lengths2_t}); - at::checkAllSameType(c, {p1_t, p2_t}); - - // Set the device for the kernel launch based on the device of p1 - at::cuda::CUDAGuard device_guard(p1.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - TORCH_CHECK( - p2.size(2) == p1.size(2), "Point sets must have the same last dimension"); - - const int N = p1.size(0); - const int P1 = p1.size(1); - const int64_t K_64 = K; - const float radius2 = radius * radius; - - // Output tensor with indices of neighbors for each point in p1 - auto long_dtype = lengths1.options().dtype(at::kLong); - auto idxs = at::full({N, P1, K}, -1, long_dtype); - auto dists = at::zeros({N, P1, K}, p1.options()); - - if (idxs.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(idxs, dists); - } - - const size_t blocks = 256; - const size_t threads = 256; - - AT_DISPATCH_FLOATING_TYPES( - p1.scalar_type(), "ball_query_kernel_cuda", ([&] { - BallQueryKernel<<>>( - p1.packed_accessor64(), - p2.packed_accessor64(), - lengths1.packed_accessor64(), - lengths2.packed_accessor64(), - idxs.packed_accessor64(), - dists.packed_accessor64(), - K_64, - radius2); - })); - - AT_CUDA_CHECK(cudaGetLastError()); - - return std::make_tuple(idxs, dists); -} diff --git a/pytorch3d/pytorch3d/csrc/ball_query/ball_query.h b/pytorch3d/pytorch3d/csrc/ball_query/ball_query.h deleted file mode 100644 index 059cad8b88b5362304135984827bac8db375a548..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/ball_query/ball_query.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include "utils/pytorch3d_cutils.h" - -// Compute indices of K neighbors in pointcloud p2 to points -// in pointcloud p1 which fall within a specified radius -// -// Args: -// p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each -// containing P1 points of dimension D. -// p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each -// containing P2 points of dimension D. -// lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud. -// lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud. -// K: Integer giving the upper bound on the number of samples to take -// within the radius -// radius: the radius around each point within which the neighbors need to be -// located -// -// Returns: -// p1_neighbor_idx: LongTensor of shape (N, P1, K), where -// p1_neighbor_idx[n, i, k] = j means that the kth -// neighbor to p1[n, i] in the cloud p2[n] is p2[n, j]. -// This is padded with -1s both where a cloud in p2 has fewer than -// S points and where a cloud in p1 has fewer than P1 points and -// also if there are fewer than K points which satisfy the radius -// threshold. -// -// p1_neighbor_dists: FloatTensor of shape (N, P1, K) containing the squared -// distance from each point p1[n, p, :] to its K neighbors -// p2[n, p1_neighbor_idx[n, p, k], :]. - -// CPU implementation -std::tuple BallQueryCpu( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const int K, - const float radius); - -// CUDA implementation -std::tuple BallQueryCuda( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const int K, - const float radius); - -// Implementation which is exposed -// Note: the backward pass reuses the KNearestNeighborBackward kernel -inline std::tuple BallQuery( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - int K, - float radius) { - if (p1.is_cuda() || p2.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(p1); - CHECK_CUDA(p2); - return BallQueryCuda( - p1.contiguous(), - p2.contiguous(), - lengths1.contiguous(), - lengths2.contiguous(), - K, - radius); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return BallQueryCpu( - p1.contiguous(), - p2.contiguous(), - lengths1.contiguous(), - lengths2.contiguous(), - K, - radius); -} diff --git a/pytorch3d/pytorch3d/csrc/ball_query/ball_query_cpu.cpp b/pytorch3d/pytorch3d/csrc/ball_query/ball_query_cpu.cpp deleted file mode 100644 index a38447175ef7d5eeb60061a1ff1ea363bc24d77c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/ball_query/ball_query_cpu.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -std::tuple BallQueryCpu( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - int K, - float radius) { - const int N = p1.size(0); - const int P1 = p1.size(1); - const int D = p1.size(2); - - auto long_opts = lengths1.options().dtype(torch::kInt64); - torch::Tensor idxs = torch::full({N, P1, K}, -1, long_opts); - torch::Tensor dists = torch::full({N, P1, K}, 0, p1.options()); - const float radius2 = radius * radius; - - auto p1_a = p1.accessor(); - auto p2_a = p2.accessor(); - auto lengths1_a = lengths1.accessor(); - auto lengths2_a = lengths2.accessor(); - auto idxs_a = idxs.accessor(); - auto dists_a = dists.accessor(); - - for (int n = 0; n < N; ++n) { - const int64_t length1 = lengths1_a[n]; - const int64_t length2 = lengths2_a[n]; - for (int64_t i = 0; i < length1; ++i) { - for (int64_t j = 0, count = 0; j < length2 && count < K; ++j) { - float dist2 = 0; - for (int d = 0; d < D; ++d) { - float diff = p1_a[n][i][d] - p2_a[n][j][d]; - dist2 += diff * diff; - } - if (dist2 < radius2) { - dists_a[n][i][count] = dist2; - idxs_a[n][i][count] = j; - ++count; - } - } - } - } - return std::make_tuple(idxs, dists); -} diff --git a/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.cu b/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.cu deleted file mode 100644 index 76912c441b155e03e2470144835850cd567cb060..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.cu +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include - -template -__global__ void SigmoidAlphaBlendForwardKernel( - // clang-format off - const at::PackedTensorAccessor64 distances, // (N, H, W, K) - const at::PackedTensorAccessor64 pix_to_face, // (N, H, W, K) - at::PackedTensorAccessor64 alphas, // (N, H, W) - // clang-format on - const scalar_t sigma, - const int N, - const int H, - const int W, - const int K) { - // Parallelize over each pixel in images of - // size H * W, for each image in the batch of size N. - const int num_threads = gridDim.x * blockDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - // TODO: revisit performance of this kernel with shared memory usage - - for (int t_i = tid; t_i < N * H * W; t_i += num_threads) { - // Convert linear index to 3D index - const int n = t_i / (H * W); // batch index. - const int pix_idx = t_i % (H * W); - - // TODO: fix index calculation for non square images. - const int yi = pix_idx / W; - const int xi = pix_idx % W; - scalar_t alpha = 1.0; - - // Loop over all the faces for this pixel. - for (int k = 0; k < K; k++) { - // Index into (N, H, W, K) tensors - const int f = pix_to_face[n][yi][xi][k]; - if (f < 0) { - // Sentinel value is -1 indicating no face overlaps the pixel. - continue; - } - // The distance is negative if a pixel is inside a face and positive - // outside the face. Therefore use -1.0 * the distance to get the - // correct sign. - scalar_t dist = -1.0 * distances[n][yi][xi][k]; - - // Calculate the sigmoid probability. - scalar_t prob = 1. / (1. + exp(-dist / sigma)); - - // The cumulative product ensures that alpha will be 0.0 if at least 1 - // face fully covers the pixel as for that face, prob will be 1.0. - // This results in a multiplication by 0.0 because of the (1.0 - prob) - // term. Therefore the final result of (1.0 - alpha) will be 1.0. - alpha *= (1.0 - prob); - } - alphas[n][yi][xi] = 1.0 - alpha; - } -} - -at::Tensor SigmoidAlphaBlendForwardCuda( - const at::Tensor& distances, // (N, H, W, K) - const at::Tensor& pix_to_face, // (N, H, W, K) - const float sigma) { - const int N = distances.size(0); - const int H = distances.size(1); - const int W = distances.size(2); - const int K = distances.size(3); - - at::Tensor alphas = at::zeros({N, H, W}, distances.options()); - const size_t blocks = 1024; - const size_t threads = 128; - - // Check inputs are on the same device - at::TensorArg distances_t{distances, "distances", 1}, - pix_to_face_t{pix_to_face, "pix_to_face", 2}; - at::CheckedFrom c = "SigmoidAlphaBlendForwardCuda"; - at::checkAllSameGPU(c, {distances_t, pix_to_face_t}); - - // Set the device for the kernel launch based on the device of distances - at::cuda::CUDAGuard device_guard(distances.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - if (distances.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return alphas; - } - - AT_DISPATCH_FLOATING_TYPES( - distances.scalar_type(), "sigmoid_alpha_blend_kernel", ([&] { - // clang-format off - SigmoidAlphaBlendForwardKernel<<>>( - distances.packed_accessor64(), - pix_to_face.packed_accessor64(), - alphas.packed_accessor64(), - sigma, - N, - H, - W, - K); - // clang-format on - })); - - AT_CUDA_CHECK(cudaGetLastError()); - return alphas; -} - -template -__global__ void SigmoidAlphaBlendBackwardKernel( - // clang-format off - const at::PackedTensorAccessor64 grad_alphas, // (N, H, W) - const at::PackedTensorAccessor64 alphas, // (N, H, W) - const at::PackedTensorAccessor64 distances, // (N, H, W, K) - const at::PackedTensorAccessor64 pix_to_face, // (N, H, W, K) - at::PackedTensorAccessor64 grad_distances, // (N, H, W) - // clang-format on - const scalar_t sigma, - const int N, - const int H, - const int W, - const int K) { - // Parallelize over each of the top K faces for each pixel in images of - // size H * W * K, for each image in the batch of size N. - - // Get block and thread index. - const int n = blockIdx.x; - const int num_pixels = H * W * K; - const int num_threads = gridDim.y * blockDim.x; - const int tid = blockIdx.y * blockDim.x + threadIdx.x; - - for (int t_i = tid; t_i < num_pixels; t_i += num_threads) { - // Convert linear index to 3D index. - int yi = t_i / (W * K); - int xi = (t_i % (W * K)) / K; - int k = (t_i % (W * K)) % K; - - const scalar_t alpha = 1.0 - alphas[n][yi][xi]; - const scalar_t grad_alpha = grad_alphas[n][yi][xi]; - const int f = pix_to_face[n][yi][xi][k]; - - // Sentinel value is -1 indicating no face overlaps the pixel. - if (f >= 0) { - // The distance is negative if a pixel is inside a face and positive - // outside the face. Therefore use -1.0 * the distance to get the - // correct sign. - scalar_t dist = -1.0 * distances[n][yi][xi][k]; - - // Calculate the sigmoid probability. - scalar_t prob = 1. / (1. + exp(-dist / sigma)); - - grad_distances[n][yi][xi][k] = grad_alpha * (-1.0 / sigma) * prob * alpha; - } - } -} - -at::Tensor SigmoidAlphaBlendBackwardCuda( - const at::Tensor& grad_alphas, // (N, H, W) - const at::Tensor& alphas, // (N, H, W) - const at::Tensor& distances, // (N, H, W, K) - const at::Tensor& pix_to_face, // (N, H, W, K) - float sigma) { - const int N = distances.size(0); - const int H = distances.size(1); - const int W = distances.size(2); - const int K = distances.size(3); - - at::Tensor grad_distances = at::zeros({N, H, W, K}, distances.options()); - - const dim3 threads(512); - const dim3 blocks(N, 1024 / N + 1); - - at::TensorArg grad_alphas_t{grad_alphas, "grad_alphas", 1}, - alphas_t{alphas, "alphas", 2}, distances_t{distances, "distances", 3}, - pix_to_face_t{pix_to_face, "pix_to_face", 4}; - at::CheckedFrom c = "SigmoidAlphaBlendBackwardCuda"; - at::checkAllSameGPU(c, {grad_alphas_t, alphas_t, distances_t, pix_to_face_t}); - - // Set the device for the kernel launch based on the device of distances - at::cuda::CUDAGuard device_guard(alphas.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - if (alphas.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return grad_alphas; - } - - AT_DISPATCH_FLOATING_TYPES( - distances.scalar_type(), "sigmoid_alpha_blend_backward_kernel", ([&] { - SigmoidAlphaBlendBackwardKernel< - scalar_t><<>>( - // clang-format off - grad_alphas.packed_accessor64(), - alphas.packed_accessor64(), - distances.packed_accessor64(), - pix_to_face.packed_accessor64(), - grad_distances.packed_accessor64(), - // clang-format on - sigma, - N, - H, - W, - K); - })); - - AT_CUDA_CHECK(cudaGetLastError()); - return grad_distances; -} diff --git a/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.h b/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.h deleted file mode 100644 index d424c769c03c7df8b9bd32d6ac1d52b25befb2de..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include - -// clang-format off -// Function to blend the top K faces per pixel based on the 2d euclidean distance -// from the center of the pixel to the face. This method is adapted from [1]. -// The output can be used to set the alpha value in an RGBA image. -// Args: -// pix_to_face: LongTensor of shape (N, H, W, K), indices of faces overlapping -// with each pixel, where N is the batch size, H, W are the dimensions of the -// image and K is the number of faces rasterized per pixel. -// distances: FloatTensor of shape (N, H, W, K), 2d euclidean distance of each pixel -// relative to the faces in pix_to_face -// sigma: float, parameter which controls the width of the sigmoid for blending -// Returns: -// alphas: FloatTensor of shape (N, H, W), the blended values for each pixel -// in the image. -// -// [1] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for -// Image-based 3D Reasoning' -// clang-format on -at::Tensor SigmoidAlphaBlendForwardCpu( - const at::Tensor& distances, - const at::Tensor& pix_to_face, - const float sigma); - -#ifdef WITH_CUDA -at::Tensor SigmoidAlphaBlendForwardCuda( - const at::Tensor& distances, - const at::Tensor& pix_to_face, - const float sigma); -#endif - -// clang-format off -// Args: -// grad_alphas: FloatTensor of shape (N, H, W), upstream gradients for alphas -// alphas: FloatTensor of shape (N, H, W), the alpha values from the forward pass -// pix_to_face: LongTensor of shape (N, H, W, K), indices of faces overlapping -// with each pixel, where N is the batch size, H, W are the dimensions of the -// image, and K is the number of faces rasterized per pixel -// distances: FloatTensor of shape (N, H, W, K), 2d euclidean distance of each pixel -// to the corresponding faces in pix_to_face -// sigma: float, parameter which controls the width of the sigmoid for blending -// Returns: -// grad_distances: FloatTensor of shape (N, H, W, K) -// clang-format on -at::Tensor SigmoidAlphaBlendBackwardCpu( - const at::Tensor& grad_alphas, - const at::Tensor& alphas, - const at::Tensor& distances, - const at::Tensor& pix_to_face, - const float sigma); - -#ifdef WITH_CUDA -at::Tensor SigmoidAlphaBlendBackwardCuda( - const at::Tensor& grad_alphas, - const at::Tensor& alphas, - const at::Tensor& distances, - const at::Tensor& pix_to_face, - const float sigma); -#endif - -// Implementation which is exposed. -at::Tensor -SigmoidAlphaBlend(at::Tensor& distances, at::Tensor& pix_to_face, float sigma) { - if (distances.is_cuda() && pix_to_face.is_cuda()) { -#ifdef WITH_CUDA - return SigmoidAlphaBlendForwardCuda(distances, pix_to_face, sigma); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return SigmoidAlphaBlendForwardCpu(distances, pix_to_face, sigma); -} - -// Implementation which is exposed. -at::Tensor SigmoidAlphaBlendBackward( - const at::Tensor& grad_alphas, - const at::Tensor& alphas, - const at::Tensor& distances, - const at::Tensor& pix_to_face, - const float sigma) { - if (distances.is_cuda() && pix_to_face.is_cuda() && alphas.is_cuda() && - grad_alphas.is_cuda()) { -#ifdef WITH_CUDA - return SigmoidAlphaBlendBackwardCuda( - grad_alphas, alphas, distances, pix_to_face, sigma); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return SigmoidAlphaBlendBackwardCpu( - grad_alphas, alphas, distances, pix_to_face, sigma); -} diff --git a/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend_cpu.cpp b/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend_cpu.cpp deleted file mode 100644 index 8a19516726f320e206402f7e78a37661603be76b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/blending/sigmoid_alpha_blend_cpu.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -at::Tensor SigmoidAlphaBlendForwardCpu( - const at::Tensor& distances, // (N, H, W, K) - const at::Tensor& pix_to_face, // (N, H, W, K) - const float sigma) { - const int N = distances.size(0); - const int H = distances.size(1); - const int W = distances.size(2); - const int K = distances.size(3); - - torch::Tensor out = torch::empty({N, H, W}, distances.options()); - - auto distances_a = distances.accessor(); - auto pix_to_face_a = pix_to_face.accessor(); - auto out_a = out.accessor(); - - // Iterate over the images in the batch. - for (int n = 0; n < N; ++n) { - // Iterate through the horizontal lines of the image from top to bottom. - for (int h = 0; h < H; ++h) { - // Iterate over the pixels on this horizontal line, left to right. - for (int w = 0; w < W; ++w) { - float alpha = 1.0; - - // Loop through the top K faces for each pixel. - for (int k = 0; k < K; ++k) { - const int f = pix_to_face_a[n][h][w][k]; - if (f < 0) { - // Sentinel value is -1 indicating no face overlaps the pixel. - continue; - } - // The distance is negative if a pixel is inside a face and positive - // outside the face. Therefore use -1.0 * the distance to get the - // correct sign. - float dist = -1.0 * distances_a[n][h][w][k]; - - // Calculate the sigmoid probability. - float prob = 1. / (1. + exp(-dist / sigma)); - - // The product ensures that alpha will be 0.0 if at least 1 - // face fully covers the pixel as for that face, prob will be 1.0. - // This results in a multiplication by 0.0 because of the (1.0 - prob) - // term. Therefore 1.0 - alpha will be 1.0. - alpha *= 1.0 - prob; - } - out_a[n][h][w] = 1.0 - alpha; - } - } - } - return out; -} - -at::Tensor SigmoidAlphaBlendBackwardCpu( - const at::Tensor& grad_alphas, // (N, H, W) - const at::Tensor& alphas, // (N, H, W) - const at::Tensor& distances, // (N, H, W, K) - const at::Tensor& pix_to_face, // (N, H, W, K) - const float sigma) { - const int N = distances.size(0); - const int H = distances.size(1); - const int W = distances.size(2); - const int K = distances.size(3); - - auto distances_a = distances.accessor(); - auto pix_to_face_a = pix_to_face.accessor(); - auto alphas_a = alphas.accessor(); - auto grad_alphas_a = grad_alphas.accessor(); - - torch::Tensor grad_distances = - torch::zeros({N, H, W, K}, distances.options()); - auto grad_distances_a = grad_distances.accessor(); - - // Iterate over the images in the batch. - for (int n = 0; n < N; ++n) { - // Iterate through the horizontal lines of the image from top to bottom. - for (int h = 0; h < H; ++h) { - // Iterate over the pixels on this horizontal line, left to right. - for (int w = 0; w < W; ++w) { - // Get the alpha value from the forward pass and the - // upstream gradient. - const float alpha = 1.0 - alphas_a[n][h][w]; - const float grad_alpha = grad_alphas_a[n][h][w]; - - // Loop through the top K faces for each pixel. - for (int k = 0; k < K; ++k) { - const int f = pix_to_face_a[n][h][w][k]; - if (f < 0) { - // Sentinel value is -1 indicating no face overlaps the pixel - continue; - } - // The distance is negative if a pixel is inside a face and positive - // outside the face. Therefore use -1.0 * distance to get the - // correct sign. - float dist = -1.0 * distances_a[n][h][w][k]; - - // Calculate the sigmoid probability. - float prob = 1. / (1. + exp(-dist / sigma)); - - // clang-format off - // We need to take the derivative of alpha w.r.t to the distance. - // alpha = 1.0 - (1.0- sigmoid(-x)) * (1.0 - sigmoid(-x2)) * ... * (1.0 - sigmoid(-xn)) - // - // Note that d/dx sigmoid(x) = sigmoid(x) * (1.0 - sigmoid(x)) - // - // This gives: - // d_alpha/d_dist = -1.0 * -1.0 * sigmoid(-x)(1. - sigmoid(-x)) * (-1.0/sigma) - // * ((1.0 - sigmoid(-x2) * ... * (1.0 - sigmoid(-xn)) - // = (-1.0/sigma) * prob * (1.0 - prob) * alpha/(1.0 - prob) - // = (-1.0/sigma) * prob * alpha - // clang-format on - grad_distances_a[n][h][w][k] = - grad_alpha * (-1.0 / sigma) * prob * alpha; - } - } - } - } - return grad_distances; -} diff --git a/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.cu b/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.cu deleted file mode 100644 index 679d8a8231b45678c6ed95a1705e6c72edef454d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.cu +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include - -#include -#include - -#include -#include - -__constant__ const float kEpsilon = 1e-9; - -// TODO(gkioxari) support all data types once AtomicAdd supports doubles. -// Currently, support is for floats only. -__global__ void alphaCompositeCudaForwardKernel( - // clang-format off - at::PackedTensorAccessor64 result, - const at::PackedTensorAccessor64 features, - const at::PackedTensorAccessor64 alphas, - const at::PackedTensorAccessor64 points_idx) { - // clang-format on - const int64_t batch_size = result.size(0); - const int64_t C = features.size(0); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - - // Get the batch and index - const int batch = blockIdx.x; - - const int num_pixels = C * H * W; - const int num_threads = gridDim.y * blockDim.x; - const int tid = blockIdx.y * blockDim.x + threadIdx.x; - - // Iterate over each feature in each pixel - for (int pid = tid; pid < num_pixels; pid += num_threads) { - int ch = pid / (H * W); - int j = (pid % (H * W)) / W; - int i = (pid % (H * W)) % W; - - // alphacomposite the different values - float cum_alpha = 1.; - // Iterate through the closest K points for this pixel - for (int k = 0; k < points_idx.size(1); ++k) { - int n_idx = points_idx[batch][k][j][i]; - - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - - float alpha = alphas[batch][k][j][i]; - // TODO(gkioxari) It might be more efficient to have threads write in a - // local variable, and move atomicAdd outside of the loop such that - // atomicAdd is executed once per thread. - atomicAdd( - &result[batch][ch][j][i], features[ch][n_idx] * cum_alpha * alpha); - cum_alpha = cum_alpha * (1 - alpha); - } - } -} - -// TODO(gkioxari) support all data types once AtomicAdd supports doubles. -// Currently, support is for floats only. -__global__ void alphaCompositeCudaBackwardKernel( - // clang-format off - at::PackedTensorAccessor64 grad_features, - at::PackedTensorAccessor64 grad_alphas, - const at::PackedTensorAccessor64 grad_outputs, - const at::PackedTensorAccessor64 features, - const at::PackedTensorAccessor64 alphas, - const at::PackedTensorAccessor64 points_idx) { - // clang-format on - const int64_t batch_size = points_idx.size(0); - const int64_t C = features.size(0); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - - // Get the batch and index - const int batch = blockIdx.x; - - const int num_pixels = C * H * W; - const int num_threads = gridDim.y * blockDim.x; - const int tid = blockIdx.y * blockDim.x + threadIdx.x; - - // Parallelize over each feature in each pixel in images of size H * W, - // for each image in the batch of size batch_size - for (int pid = tid; pid < num_pixels; pid += num_threads) { - int ch = pid / (H * W); - int j = (pid % (H * W)) / W; - int i = (pid % (H * W)) % W; - - // alphacomposite the different values - float cum_alpha = 1.; - // Iterate through the closest K points for this pixel - for (int k = 0; k < points_idx.size(1); ++k) { - int n_idx = points_idx[batch][k][j][i]; - - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - float alpha = alphas[batch][k][j][i]; - - // TODO(gkioxari) It might be more efficient to have threads write in a - // local variable, and move atomicAdd outside of the loop such that - // atomicAdd is executed once per thread. - atomicAdd( - &grad_alphas[batch][k][j][i], - cum_alpha * features[ch][n_idx] * grad_outputs[batch][ch][j][i]); - atomicAdd( - &grad_features[ch][n_idx], - cum_alpha * alpha * grad_outputs[batch][ch][j][i]); - - // Iterate over all (K-1) nearest points to update gradient - for (int t = 0; t < k; ++t) { - int t_idx = points_idx[batch][t][j][i]; - // Sentinel value is -1, indicating no point overlaps this pixel - if (t_idx < 0) { - continue; - } - float alpha_tvalue = alphas[batch][t][j][i]; - // TODO(gkioxari) It might be more efficient to have threads write in a - // local variable, and move atomicAdd outside of the loop such that - // atomicAdd is executed once per thread. - atomicAdd( - &grad_alphas[batch][t][j][i], - -grad_outputs[batch][ch][j][i] * features[ch][n_idx] * cum_alpha * - alpha / (1 - alpha_tvalue + kEpsilon)); - } - - cum_alpha = cum_alpha * (1 - alphas[batch][k][j][i]); - } - } -} - -at::Tensor alphaCompositeCudaForward( - const at::Tensor& features, - const at::Tensor& alphas, - const at::Tensor& points_idx) { - // Check inputs are on the same device - at::TensorArg features_t{features, "features", 1}, - alphas_t{alphas, "alphas", 2}, points_idx_t{points_idx, "points_idx", 3}; - at::CheckedFrom c = "alphaCompositeCudaForward"; - at::checkAllSameGPU(c, {features_t, alphas_t, points_idx_t}); - at::checkAllSameType(c, {features_t, alphas_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(features.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t batch_size = points_idx.size(0); - const int64_t C = features.size(0); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - - auto result = at::zeros({batch_size, C, H, W}, features.options()); - - if (result.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return result; - } - - const dim3 threadsPerBlock(64); - const dim3 numBlocks(batch_size, 1024 / batch_size + 1); - - // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports - // doubles. Currently, support is for floats only. - alphaCompositeCudaForwardKernel<<>>( - // clang-format off - // As we are using packed accessors here the tensors - // do not need to be made contiguous. - result.packed_accessor64(), - features.packed_accessor64(), - alphas.packed_accessor64(), - points_idx.packed_accessor64()); - // clang-format on - AT_CUDA_CHECK(cudaGetLastError()); - return result; -} - -std::tuple alphaCompositeCudaBackward( - const at::Tensor& grad_outputs, - const at::Tensor& features, - const at::Tensor& alphas, - const at::Tensor& points_idx) { - // Check inputs are on the same device - at::TensorArg grad_outputs_t{grad_outputs, "grad_outputs", 1}, - features_t{features, "features", 2}, alphas_t{alphas, "alphas", 3}, - points_idx_t{points_idx, "points_idx", 4}; - at::CheckedFrom c = "alphaCompositeCudaBackward"; - at::checkAllSameGPU(c, {grad_outputs_t, features_t, alphas_t, points_idx_t}); - at::checkAllSameType(c, {grad_outputs_t, features_t, alphas_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(features.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - auto grad_features = at::zeros_like(features); - auto grad_alphas = at::zeros_like(alphas); - - if (grad_features.numel() == 0 || grad_alphas.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_features, grad_alphas); - } - - const int64_t bs = alphas.size(0); - - const dim3 threadsPerBlock(64); - const dim3 numBlocks(bs, 1024 / bs + 1); - - // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports - // doubles. Currently, support is for floats only. - alphaCompositeCudaBackwardKernel<<>>( - // clang-format off - // As we are using packed accessors here the tensors - // do not need to be made contiguous. - grad_features.packed_accessor64(), - grad_alphas.packed_accessor64(), - grad_outputs.packed_accessor64(), - features.packed_accessor64(), - alphas.packed_accessor64(), - points_idx.packed_accessor64()); - // clang-format on - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_features, grad_alphas); -} diff --git a/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.h b/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.h deleted file mode 100644 index 44ba1bf0fd2e848e682db4a46c0badeefb3c02e5..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include "utils/pytorch3d_cutils.h" - -#include - -// Perform alpha compositing of points in a z-buffer. -// -// Inputs: -// features: FloatTensor of shape (C, P) which gives the features -// of each point where C is the size of the feature and -// P the number of points. -// alphas: FloatTensor of shape (N, points_per_pixel, H, W) where -// points_per_pixel is the number of points in the z-buffer -// sorted in z-order, and (H, W) is the image size. -// points_idx: IntTensor of shape (N, points_per_pixel, H, W) giving the -// indices of the nearest points at each pixel, sorted in z-order. -// Returns: -// weighted_fs: FloatTensor of shape (N, C, H, W) giving the accumulated -// feature for each point. Concretely, it gives: -// weighted_fs[b,c,i,j] = sum_k cum_alpha_k * -// features[c,points_idx[b,k,i,j]] -// where cum_alpha_k = -// alphas[b,k,i,j] * prod_l=0..k-1 (1 - alphas[b,l,i,j]) - -// CUDA declarations -#ifdef WITH_CUDA -torch::Tensor alphaCompositeCudaForward( - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); - -std::tuple alphaCompositeCudaBackward( - const torch::Tensor& grad_outputs, - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); -#endif - -// C++ declarations -torch::Tensor alphaCompositeCpuForward( - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); - -std::tuple alphaCompositeCpuBackward( - const torch::Tensor& grad_outputs, - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); - -torch::Tensor alphaCompositeForward( - torch::Tensor& features, - torch::Tensor& alphas, - torch::Tensor& points_idx) { - features = features.contiguous(); - alphas = alphas.contiguous(); - points_idx = points_idx.contiguous(); - - if (features.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(features); - CHECK_CUDA(alphas); - CHECK_CUDA(points_idx); - return alphaCompositeCudaForward(features, alphas, points_idx); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return alphaCompositeCpuForward(features, alphas, points_idx); - } -} - -std::tuple alphaCompositeBackward( - torch::Tensor& grad_outputs, - torch::Tensor& features, - torch::Tensor& alphas, - torch::Tensor& points_idx) { - grad_outputs = grad_outputs.contiguous(); - features = features.contiguous(); - alphas = alphas.contiguous(); - points_idx = points_idx.contiguous(); - - if (grad_outputs.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(grad_outputs); - CHECK_CUDA(features); - CHECK_CUDA(alphas); - CHECK_CUDA(points_idx); - - return alphaCompositeCudaBackward( - grad_outputs, features, alphas, points_idx); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return alphaCompositeCpuBackward( - grad_outputs, features, alphas, points_idx); - } -} diff --git a/pytorch3d/pytorch3d/csrc/compositing/alpha_composite_cpu.cpp b/pytorch3d/pytorch3d/csrc/compositing/alpha_composite_cpu.cpp deleted file mode 100644 index 41bc0ec76794228e7f770f0436453306ffc8aec0..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/compositing/alpha_composite_cpu.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include - -#include -#include - -// Epsilon float -const float kEps = 1e-9; - -torch::Tensor alphaCompositeCpuForward( - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx) { - const int64_t B = points_idx.size(0); - const int64_t K = points_idx.size(1); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - const int64_t C = features.size(0); - - torch::Tensor result = torch::zeros({B, C, H, W}, features.options()); - - auto features_a = features.accessor(); - auto alphas_a = alphas.accessor(); - auto points_idx_a = points_idx.accessor(); - auto result_a = result.accessor(); - - // Iterate over the batch - for (int b = 0; b < B; ++b) { - // Iterate over the features - for (int c = 0; c < C; ++c) { - // Iterate through the horizontal lines of the image from top to bottom - for (int j = 0; j < H; ++j) { - // Iterate over pixels in a horizontal line, left to right - for (int i = 0; i < W; ++i) { - float cum_alpha = 1.; - // Iterate through the closest K points for this pixel - for (int k = 0; k < K; ++k) { - int64_t n_idx = points_idx_a[b][k][j][i]; - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - float alpha = alphas_a[b][k][j][i]; - result_a[b][c][j][i] += cum_alpha * alpha * features_a[c][n_idx]; - cum_alpha = cum_alpha * (1 - alpha); - } - } - } - } - } - return result; -} - -std::tuple alphaCompositeCpuBackward( - const torch::Tensor& grad_outputs, - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx) { - torch::Tensor grad_features = torch::zeros_like(features); - torch::Tensor grad_alphas = torch::zeros_like(alphas); - - const int64_t B = points_idx.size(0); - const int64_t K = points_idx.size(1); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - const int64_t C = features.size(0); - - auto grad_outputs_a = grad_outputs.accessor(); - auto features_a = features.accessor(); - auto alphas_a = alphas.accessor(); - auto points_idx_a = points_idx.accessor(); - auto grad_features_a = grad_features.accessor(); - auto grad_alphas_a = grad_alphas.accessor(); - - // Iterate over the batch - for (int b = 0; b < B; ++b) { - // Iterate over the features - for (int c = 0; c < C; ++c) { - // Iterate through the horizontal lines of the image from top to bottom - for (int j = 0; j < H; ++j) { - // Iterate over pixels in a horizontal line, left to right - for (int i = 0; i < W; ++i) { - float cum_alpha = 1.; - // Iterate through the closest K points for this pixel - for (int k = 0; k < K; ++k) { - int64_t n_idx = points_idx_a[b][k][j][i]; - // Sentinal value is -1, indicating no point overlaps this pixel - if (n_idx < 0) { - continue; - } - float alpha = alphas_a[b][k][j][i]; - grad_alphas_a[b][k][j][i] += - grad_outputs_a[b][c][j][i] * features_a[c][n_idx] * cum_alpha; - grad_features_a[c][n_idx] += - grad_outputs_a[b][c][j][i] * cum_alpha * alpha; - - // Iterate over all (K-1) nearer points to update gradient - for (int t = 0; t < k; t++) { - int64_t t_idx = points_idx_a[b][t][j][i]; - // Sentinal value is -1, indicating no point overlaps this pixel - if (t_idx < 0) { - continue; - } - float alpha_tvalue = alphas_a[b][t][j][i]; - grad_alphas_a[b][t][j][i] -= grad_outputs_a[b][c][j][i] * - features_a[c][n_idx] * cum_alpha * alpha / - (1 - alpha_tvalue + kEps); - } - - cum_alpha = cum_alpha * (1 - alpha); - } - } - } - } - } - return std::make_tuple(grad_features, grad_alphas); -} diff --git a/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.cu b/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.cu deleted file mode 100644 index 984647172f9a15277eab70b15158a9441355490d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.cu +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include - -#include -#include - -#include -#include - -__constant__ const float kEpsilon = 1e-4; - -// TODO(gkioxari) support all data types once AtomicAdd supports doubles. -// Currently, support is for floats only. -__global__ void weightedSumNormCudaForwardKernel( - // clang-format off - at::PackedTensorAccessor64 result, - const at::PackedTensorAccessor64 features, - const at::PackedTensorAccessor64 alphas, - const at::PackedTensorAccessor64 points_idx) { - // clang-format on - const int64_t batch_size = result.size(0); - const int64_t C = features.size(0); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - - // Get the batch and index - const int batch = blockIdx.x; - - const int num_pixels = C * H * W; - const int num_threads = gridDim.y * blockDim.x; - const int tid = blockIdx.y * blockDim.x + threadIdx.x; - - // Parallelize over each feature in each pixel in images of size H * W, - // for each image in the batch of size batch_size - for (int pid = tid; pid < num_pixels; pid += num_threads) { - int ch = pid / (H * W); - int j = (pid % (H * W)) / W; - int i = (pid % (H * W)) % W; - - // Store the accumulated alpha value - float cum_alpha = 0.; - // Iterate through the closest K points for this pixel - for (int k = 0; k < points_idx.size(1); ++k) { - int n_idx = points_idx[batch][k][j][i]; - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - - cum_alpha += alphas[batch][k][j][i]; - } - - if (cum_alpha < kEpsilon) { - cum_alpha = kEpsilon; - } - - // Iterate through the closest K points for this pixel - for (int k = 0; k < points_idx.size(1); ++k) { - int n_idx = points_idx[batch][k][j][i]; - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - float alpha = alphas[batch][k][j][i]; - // TODO(gkioxari) It might be more efficient to have threads write in a - // local variable, and move atomicAdd outside of the loop such that - // atomicAdd is executed once per thread. - atomicAdd( - &result[batch][ch][j][i], features[ch][n_idx] * alpha / cum_alpha); - } - } -} - -// TODO(gkioxari) support all data types once AtomicAdd supports doubles. -// Currently, support is for floats only. -__global__ void weightedSumNormCudaBackwardKernel( - // clang-format off - at::PackedTensorAccessor64 grad_features, - at::PackedTensorAccessor64 grad_alphas, - const at::PackedTensorAccessor64 grad_outputs, - const at::PackedTensorAccessor64 features, - const at::PackedTensorAccessor64 alphas, - const at::PackedTensorAccessor64 points_idx) { - // clang-format on - const int64_t batch_size = points_idx.size(0); - const int64_t C = features.size(0); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - - // Get the batch and index - const int batch = blockIdx.x; - - const int num_pixels = C * W * H; - const int num_threads = gridDim.y * blockDim.x; - const int tid = blockIdx.y * blockDim.x + threadIdx.x; - - // Parallelize over each feature in each pixel in images of size H * W, - // for each image in the batch of size batch_size - for (int pid = tid; pid < num_pixels; pid += num_threads) { - int ch = pid / (H * W); - int j = (pid % (H * W)) / W; - int i = (pid % (H * W)) % W; - - float sum_alpha = 0.; - float sum_alphafs = 0.; - // Iterate through the closest K points for this pixel to calculate the - // cumulative sum of the alphas for this pixel - for (int k = 0; k < points_idx.size(1); ++k) { - int n_idx = points_idx[batch][k][j][i]; - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - - sum_alpha += alphas[batch][k][j][i]; - sum_alphafs += alphas[batch][k][j][i] * features[ch][n_idx]; - } - - if (sum_alpha < kEpsilon) { - sum_alpha = kEpsilon; - } - - // Iterate again through the closest K points for this pixel to calculate - // the gradient. - for (int k = 0; k < points_idx.size(1); ++k) { - int n_idx = points_idx[batch][k][j][i]; - - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - float alpha = alphas[batch][k][j][i]; - - // TODO(gkioxari) It might be more efficient to have threads write in a - // local variable, and move atomicAdd outside of the loop such that - // atomicAdd is executed once per thread. - atomicAdd( - &grad_alphas[batch][k][j][i], - (features[ch][n_idx] * sum_alpha - sum_alphafs) / - (sum_alpha * sum_alpha) * grad_outputs[batch][ch][j][i]); - atomicAdd( - &grad_features[ch][n_idx], - alpha * grad_outputs[batch][ch][j][i] / sum_alpha); - } - } -} - -at::Tensor weightedSumNormCudaForward( - const at::Tensor& features, - const at::Tensor& alphas, - const at::Tensor& points_idx) { - // Check inputs are on the same device - at::TensorArg features_t{features, "features", 1}, - alphas_t{alphas, "alphas", 2}, points_idx_t{points_idx, "points_idx", 3}; - at::CheckedFrom c = "weightedSumNormCudaForward"; - at::checkAllSameGPU(c, {features_t, alphas_t, points_idx_t}); - at::checkAllSameType(c, {features_t, alphas_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(features.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t batch_size = points_idx.size(0); - const int64_t C = features.size(0); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - - auto result = at::zeros({batch_size, C, H, W}, features.options()); - - if (result.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return result; - } - - const dim3 threadsPerBlock(64); - const dim3 numBlocks(batch_size, 1024 / batch_size + 1); - - // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports - // doubles. Currently, support is for floats only. - // clang-format off - weightedSumNormCudaForwardKernel<<>>( - // As we are using packed accessors here the tensors - // do not need to be made contiguous. - result.packed_accessor64(), - features.packed_accessor64(), - alphas.packed_accessor64(), - points_idx.packed_accessor64()); - // clang-format on - - AT_CUDA_CHECK(cudaGetLastError()); - return result; -} - -std::tuple weightedSumNormCudaBackward( - const at::Tensor& grad_outputs, - const at::Tensor& features, - const at::Tensor& alphas, - const at::Tensor& points_idx) { - // Check inputs are on the same device - at::TensorArg grad_outputs_t{grad_outputs, "grad_outputs", 1}, - features_t{features, "features", 2}, alphas_t{alphas, "alphas", 3}, - points_idx_t{points_idx, "points_idx", 4}; - at::CheckedFrom c = "weightedSumNormCudaBackward"; - at::checkAllSameGPU(c, {grad_outputs_t, features_t, alphas_t, points_idx_t}); - at::checkAllSameType(c, {grad_outputs_t, features_t, alphas_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(features.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - auto grad_features = at::zeros_like(features); - auto grad_alphas = at::zeros_like(alphas); - - if (grad_features.numel() == 0 || grad_alphas.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_features, grad_alphas); - } - - const int64_t bs = points_idx.size(0); - - const dim3 threadsPerBlock(64); - const dim3 numBlocks(bs, 1024 / bs + 1); - - // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports - // doubles. Currently, support is for floats only. - weightedSumNormCudaBackwardKernel<<>>( - // clang-format off - // As we are using packed accessors here the tensors - // do not need to be made contiguous. - grad_features.packed_accessor64(), - grad_alphas.packed_accessor64(), - grad_outputs.packed_accessor64(), - features.packed_accessor64(), - alphas.packed_accessor64(), - points_idx.packed_accessor64()); - // clang-format on - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_features, grad_alphas); -} diff --git a/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.h b/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.h deleted file mode 100644 index 5d0a5f5be08267ad33da8b704814c6c6c333930d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include "utils/pytorch3d_cutils.h" - -#include - -// Perform normalized weighted sum compositing of points in a z-buffer. -// -// Inputs: -// features: FloatTensor of shape (C, P) which gives the features -// of each point where C is the size of the feature and -// P the number of points. -// alphas: FloatTensor of shape (N, points_per_pixel, H, W) where -// points_per_pixel is the number of points in the z-buffer -// sorted in z-order, and (H, W) is the image size. -// points_idx: IntTensor of shape (N, points_per_pixel, H, W) giving the -// indices of the nearest points at each pixel, sorted in z-order. -// Returns: -// weighted_fs: FloatTensor of shape (N, C, H, W) giving the accumulated -// feature in each point. Concretely, it gives: -// weighted_fs[b,c,i,j] = sum_k alphas[b,k,i,j] * -// features[c,points_idx[b,k,i,j]] / sum_k alphas[b,k,i,j] - -// CUDA declarations -#ifdef WITH_CUDA -torch::Tensor weightedSumNormCudaForward( - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); - -std::tuple weightedSumNormCudaBackward( - const torch::Tensor& grad_outputs, - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); -#endif - -// C++ declarations -torch::Tensor weightedSumNormCpuForward( - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); - -std::tuple weightedSumNormCpuBackward( - const torch::Tensor& grad_outputs, - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); - -torch::Tensor weightedSumNormForward( - torch::Tensor& features, - torch::Tensor& alphas, - torch::Tensor& points_idx) { - features = features.contiguous(); - alphas = alphas.contiguous(); - points_idx = points_idx.contiguous(); - - if (features.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(features); - CHECK_CUDA(alphas); - CHECK_CUDA(points_idx); - - return weightedSumNormCudaForward(features, alphas, points_idx); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return weightedSumNormCpuForward(features, alphas, points_idx); - } -} - -std::tuple weightedSumNormBackward( - torch::Tensor& grad_outputs, - torch::Tensor& features, - torch::Tensor& alphas, - torch::Tensor& points_idx) { - grad_outputs = grad_outputs.contiguous(); - features = features.contiguous(); - alphas = alphas.contiguous(); - points_idx = points_idx.contiguous(); - - if (grad_outputs.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(grad_outputs); - CHECK_CUDA(features); - CHECK_CUDA(alphas); - CHECK_CUDA(points_idx); - - return weightedSumNormCudaBackward( - grad_outputs, features, alphas, points_idx); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return weightedSumNormCpuBackward( - grad_outputs, features, alphas, points_idx); - } -} diff --git a/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum_cpu.cpp b/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum_cpu.cpp deleted file mode 100644 index 840ef3d24ae652fb42384afc755c0a889543e649..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/compositing/norm_weighted_sum_cpu.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include - -#include -#include - -// Epsilon float -const float kEps = 1e-4; - -torch::Tensor weightedSumNormCpuForward( - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx) { - const int64_t B = points_idx.size(0); - const int64_t K = points_idx.size(1); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - const int64_t C = features.size(0); - - torch::Tensor result = torch::zeros({B, C, H, W}, features.options()); - - auto features_a = features.accessor(); - auto alphas_a = alphas.accessor(); - auto points_idx_a = points_idx.accessor(); - auto result_a = result.accessor(); - - // Iterate over the batch - for (int b = 0; b < B; ++b) { - // Iterate oer the features - for (int c = 0; c < C; ++c) { - // Iterate through the horizontal lines of the image from top to bottom - for (int j = 0; j < H; ++j) { - // Iterate over pixels in a horizontal line, left to right - for (int i = 0; i < W; ++i) { - float t_alpha = 0.; - for (int k = 0; k < K; ++k) { - int64_t n_idx = points_idx_a[b][k][j][i]; - if (n_idx < 0) { - continue; - } - - t_alpha += alphas_a[b][k][j][i]; - } - - if (t_alpha < kEps) { - t_alpha = kEps; - } - - // Iterate over the different zs to combine - for (int k = 0; k < K; ++k) { - int64_t n_idx = points_idx_a[b][k][j][i]; - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - float alpha = alphas_a[b][k][j][i]; - result_a[b][c][j][i] += alpha * features_a[c][n_idx] / t_alpha; - } - } - } - } - } - return result; -} - -std::tuple weightedSumNormCpuBackward( - const torch::Tensor& grad_outputs, - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx) { - torch::Tensor grad_features = torch::zeros_like(features); - torch::Tensor grad_alphas = torch::zeros_like(alphas); - - const int64_t B = points_idx.size(0); - const int64_t K = points_idx.size(1); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - const int64_t C = features.size(0); - - auto grad_outputs_a = grad_outputs.accessor(); - auto features_a = features.accessor(); - auto alphas_a = alphas.accessor(); - auto points_idx_a = points_idx.accessor(); - auto grad_features_a = grad_features.accessor(); - auto grad_alphas_a = grad_alphas.accessor(); - - // Iterate over the batch - for (int b = 0; b < B; ++b) { - // Iterate oer the features - for (int c = 0; c < C; ++c) { - // Iterate through the horizontal lines of the image from top to bottom - for (int j = 0; j < H; ++j) { - // Iterate over pixels in a horizontal line, left to right - for (int i = 0; i < W; ++i) { - float t_alpha = 0.; - float t_alphafs = 0.; - // Iterate through the closest K points for this pixel - for (int k = 0; k < K; ++k) { - int64_t n_idx = points_idx_a[b][k][j][i]; - // Sentinel value is -1, indicating no point overlaps this pixel - if (n_idx < 0) { - continue; - } - - t_alpha += alphas_a[b][k][j][i]; - t_alphafs += alphas_a[b][k][j][i] * features_a[c][n_idx]; - } - - if (t_alpha < kEps) { - t_alpha = kEps; - } - - // Iterate through the closest K points for this pixel ordered by z - // distance. - for (int k = 0; k < K; ++k) { - int64_t n_idx = points_idx_a[b][k][j][i]; - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - float alpha = alphas_a[b][k][j][i]; - grad_alphas_a[b][k][j][i] += grad_outputs_a[b][c][j][i] * - (features_a[c][n_idx] * t_alpha - t_alphafs) / - (t_alpha * t_alpha); - grad_features_a[c][n_idx] += - grad_outputs_a[b][c][j][i] * alpha / t_alpha; - } - } - } - } - } - return std::make_tuple(grad_features, grad_alphas); -} diff --git a/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.cu b/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.cu deleted file mode 100644 index 17a35bf924438408a0f039daac4c719b4496c716..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.cu +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include - -#include -#include - -#include -#include - -// TODO(gkioxari) support all data types once AtomicAdd supports doubles. -// Currently, support is for floats only. -__global__ void weightedSumCudaForwardKernel( - // clang-format off - at::PackedTensorAccessor64 result, - const at::PackedTensorAccessor64 features, - const at::PackedTensorAccessor64 alphas, - const at::PackedTensorAccessor64 points_idx) { - // clang-format on - const int64_t batch_size = result.size(0); - const int64_t C = features.size(0); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - - // Get the batch and index - const int batch = blockIdx.x; - - const int num_pixels = C * H * W; - const int num_threads = gridDim.y * blockDim.x; - const int tid = blockIdx.y * blockDim.x + threadIdx.x; - - // Parallelize over each feature in each pixel in images of size H * W, - // for each image in the batch of size batch_size - for (int pid = tid; pid < num_pixels; pid += num_threads) { - int ch = pid / (H * W); - int j = (pid % (H * W)) / W; - int i = (pid % (H * W)) % W; - - // Iterate through the closest K points for this pixel - for (int k = 0; k < points_idx.size(1); ++k) { - int n_idx = points_idx[batch][k][j][i]; - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - - // Accumulate the values - float alpha = alphas[batch][k][j][i]; - // TODO(gkioxari) It might be more efficient to have threads write in a - // local variable, and move atomicAdd outside of the loop such that - // atomicAdd is executed once per thread. - atomicAdd(&result[batch][ch][j][i], features[ch][n_idx] * alpha); - } - } -} - -// TODO(gkioxari) support all data types once AtomicAdd supports doubles. -// Currently, support is for floats only. -__global__ void weightedSumCudaBackwardKernel( - // clang-format off - at::PackedTensorAccessor64 grad_features, - at::PackedTensorAccessor64 grad_alphas, - const at::PackedTensorAccessor64 grad_outputs, - const at::PackedTensorAccessor64 features, - const at::PackedTensorAccessor64 alphas, - const at::PackedTensorAccessor64 points_idx) { - // clang-format on - const int64_t batch_size = points_idx.size(0); - const int64_t C = features.size(0); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - - // Get the batch and index - const int batch = blockIdx.x; - - const int num_pixels = C * H * W; - const int num_threads = gridDim.y * blockDim.x; - const int tid = blockIdx.y * blockDim.x + threadIdx.x; - - // Iterate over each pixel to compute the contribution to the - // gradient for the features and weights - for (int pid = tid; pid < num_pixels; pid += num_threads) { - int ch = pid / (H * W); - int j = (pid % (H * W)) / W; - int i = (pid % (H * W)) % W; - - // Iterate through the closest K points for this pixel - for (int k = 0; k < points_idx.size(1); ++k) { - int n_idx = points_idx[batch][k][j][i]; - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - float alpha = alphas[batch][k][j][i]; - - // TODO(gkioxari) It might be more efficient to have threads write in a - // local variable, and move atomicAdd outside of the loop such that - // atomicAdd is executed once per thread. - atomicAdd( - &grad_alphas[batch][k][j][i], - features[ch][n_idx] * grad_outputs[batch][ch][j][i]); - atomicAdd( - &grad_features[ch][n_idx], alpha * grad_outputs[batch][ch][j][i]); - } - } -} - -at::Tensor weightedSumCudaForward( - const at::Tensor& features, - const at::Tensor& alphas, - const at::Tensor& points_idx) { - // Check inputs are on the same device - at::TensorArg features_t{features, "features", 1}, - alphas_t{alphas, "alphas", 2}, points_idx_t{points_idx, "points_idx", 3}; - at::CheckedFrom c = "weightedSumCudaForward"; - at::checkAllSameGPU(c, {features_t, alphas_t, points_idx_t}); - at::checkAllSameType(c, {features_t, alphas_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(features.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t batch_size = points_idx.size(0); - const int64_t C = features.size(0); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - - auto result = at::zeros({batch_size, C, H, W}, features.options()); - - if (result.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return result; - } - - const dim3 threadsPerBlock(64); - const dim3 numBlocks(batch_size, 1024 / batch_size + 1); - - // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports - // doubles. Currently, support is for floats only. - weightedSumCudaForwardKernel<<>>( - // clang-format off - // As we are using packed accessors here the tensors - // do not need to be made contiguous. - result.packed_accessor64(), - features.packed_accessor64(), - alphas.packed_accessor64(), - points_idx.packed_accessor64()); - // clang-format on - AT_CUDA_CHECK(cudaGetLastError()); - return result; -} - -std::tuple weightedSumCudaBackward( - const at::Tensor& grad_outputs, - const at::Tensor& features, - const at::Tensor& alphas, - const at::Tensor& points_idx) { - // Check inputs are on the same device - at::TensorArg grad_outputs_t{grad_outputs, "grad_outputs", 1}, - features_t{features, "features", 2}, alphas_t{alphas, "alphas", 3}, - points_idx_t{points_idx, "points_idx", 4}; - at::CheckedFrom c = "weightedSumCudaBackward"; - at::checkAllSameGPU(c, {grad_outputs_t, features_t, alphas_t, points_idx_t}); - at::checkAllSameType(c, {grad_outputs_t, features_t, alphas_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(features.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - auto grad_features = at::zeros_like(features); - auto grad_alphas = at::zeros_like(alphas); - - if (grad_features.numel() == 0 || grad_alphas.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_features, grad_alphas); - } - - const int64_t bs = points_idx.size(0); - - const dim3 threadsPerBlock(64); - const dim3 numBlocks(bs, 1024 / bs + 1); - - // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports - // doubles. Currently, support is for floats only. - weightedSumCudaBackwardKernel<<>>( - // clang-format off - // As we are using packed accessors here the tensors - // do not need to be made contiguous. - grad_features.packed_accessor64(), - grad_alphas.packed_accessor64(), - grad_outputs.packed_accessor64(), - features.packed_accessor64(), - alphas.packed_accessor64(), - points_idx.packed_accessor64()); - // clang-format on - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_features, grad_alphas); -} diff --git a/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.h b/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.h deleted file mode 100644 index 0be6e4988419da7710b8f9c412caf0f8699cf35f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/compositing/weighted_sum.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include "utils/pytorch3d_cutils.h" - -#include - -// Perform weighted sum compositing of points in a z-buffer. -// -// Inputs: -// features: FloatTensor of shape (C, P) which gives the features -// of each point where C is the size of the feature and -// P the number of points. -// alphas: FloatTensor of shape (N, points_per_pixel, H, W) where -// points_per_pixel is the number of points in the z-buffer -// sorted in z-order, and (H, W) is the image size. -// points_idx: IntTensor of shape (N, points_per_pixel, W, W) giving the -// indices of the nearest points at each pixel, sorted in z-order. -// Returns: -// weighted_fs: FloatTensor of shape (N, C, H, W) giving the accumulated -// feature in each point. Concretely, it gives: -// weighted_fs[b,c,i,j] = sum_k alphas[b,k,i,j] * -// features[c,points_idx[b,k,i,j]] - -// CUDA declarations -#ifdef WITH_CUDA -torch::Tensor weightedSumCudaForward( - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); - -std::tuple weightedSumCudaBackward( - const torch::Tensor& grad_outputs, - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); -#endif - -// C++ declarations -torch::Tensor weightedSumCpuForward( - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); - -std::tuple weightedSumCpuBackward( - const torch::Tensor& grad_outputs, - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx); - -torch::Tensor weightedSumForward( - torch::Tensor& features, - torch::Tensor& alphas, - torch::Tensor& points_idx) { - features = features.contiguous(); - alphas = alphas.contiguous(); - points_idx = points_idx.contiguous(); - - if (features.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(features); - CHECK_CUDA(alphas); - CHECK_CUDA(points_idx); - return weightedSumCudaForward(features, alphas, points_idx); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return weightedSumCpuForward(features, alphas, points_idx); - } -} - -std::tuple weightedSumBackward( - torch::Tensor& grad_outputs, - torch::Tensor& features, - torch::Tensor& alphas, - torch::Tensor& points_idx) { - grad_outputs = grad_outputs.contiguous(); - features = features.contiguous(); - alphas = alphas.contiguous(); - points_idx = points_idx.contiguous(); - - if (grad_outputs.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(grad_outputs); - CHECK_CUDA(features); - CHECK_CUDA(alphas); - CHECK_CUDA(points_idx); - - return weightedSumCudaBackward(grad_outputs, features, alphas, points_idx); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return weightedSumCpuBackward(grad_outputs, features, alphas, points_idx); - } -} diff --git a/pytorch3d/pytorch3d/csrc/compositing/weighted_sum_cpu.cpp b/pytorch3d/pytorch3d/csrc/compositing/weighted_sum_cpu.cpp deleted file mode 100644 index b7bddee3c9791647352b686d368dd2e6adccf27f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/compositing/weighted_sum_cpu.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include - -#include -#include - -torch::Tensor weightedSumCpuForward( - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx) { - const int64_t B = points_idx.size(0); - const int64_t K = points_idx.size(1); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - const int64_t C = features.size(0); - - torch::Tensor result = torch::zeros({B, C, H, W}, features.options()); - - auto features_a = features.accessor(); - auto alphas_a = alphas.accessor(); - auto points_idx_a = points_idx.accessor(); - auto result_a = result.accessor(); - - // Iterate over the batch - for (int b = 0; b < B; ++b) { - // Iterate over the features - for (int c = 0; c < C; ++c) { - // Iterate through the horizontal lines of the image from top to bottom - for (int j = 0; j < H; ++j) { - // Iterate over pixels in a horizontal line, left to right - for (int i = 0; i < W; ++i) { - // Iterate through the closest K points for this pixel - for (int k = 0; k < K; ++k) { - int64_t n_idx = points_idx_a[b][k][j][i]; - // Sentinel value is -1 indicating no point overlaps the pixel - if (n_idx < 0) { - continue; - } - - float alpha = alphas_a[b][k][j][i]; - result_a[b][c][j][i] += alpha * features_a[c][n_idx]; - } - } - } - } - } - return result; -} - -std::tuple weightedSumCpuBackward( - const torch::Tensor& grad_outputs, - const torch::Tensor& features, - const torch::Tensor& alphas, - const torch::Tensor& points_idx) { - const int64_t B = points_idx.size(0); - const int64_t K = points_idx.size(1); - const int64_t H = points_idx.size(2); - const int64_t W = points_idx.size(3); - const int64_t C = features.size(0); - - torch::Tensor grad_features = torch::zeros_like(features); - torch::Tensor grad_alphas = torch::zeros_like(alphas); - - auto grad_outputs_a = grad_outputs.accessor(); - auto features_a = features.accessor(); - auto alphas_a = alphas.accessor(); - auto points_idx_a = points_idx.accessor(); - auto grad_features_a = grad_features.accessor(); - auto grad_alphas_a = grad_alphas.accessor(); - - // Iterate over the batch - for (int b = 0; b < B; ++b) { - // Iterate oer the features - for (int c = 0; c < C; ++c) { - // Iterate through the horizontal lines of the image from top to bottom - for (int j = 0; j < H; ++j) { - // Iterate over pixels in a horizontal line, left to right - for (int i = 0; i < W; ++i) { - // Iterate through the closest K points for this pixel - for (int k = 0; k < K; ++k) { - int64_t n_idx = points_idx_a[b][k][j][i]; - // Sentinal value is -1, indicating no point overlaps this pixel - if (n_idx < 0) { - continue; - } - - float alpha = alphas_a[b][k][j][i]; - grad_alphas_a[b][k][j][i] += - grad_outputs_a[b][c][j][i] * features_a[c][n_idx]; - grad_features_a[c][n_idx] += grad_outputs_a[b][c][j][i] * alpha; - } - } - } - } - } - return std::make_tuple(grad_features, grad_alphas); -} diff --git a/pytorch3d/pytorch3d/csrc/ext.cpp b/pytorch3d/pytorch3d/csrc/ext.cpp deleted file mode 100644 index 6a17dbb0ce77bc13d90003889c0abff853b09be6..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/ext.cpp +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -// clang-format off -#include "./pulsar/global.h" // Include before . -#include -// clang-format on -#include "./pulsar/pytorch/renderer.h" -#include "./pulsar/pytorch/tensor_util.h" -#include "ball_query/ball_query.h" -#include "blending/sigmoid_alpha_blend.h" -#include "compositing/alpha_composite.h" -#include "compositing/norm_weighted_sum.h" -#include "compositing/weighted_sum.h" -#include "face_areas_normals/face_areas_normals.h" -#include "gather_scatter/gather_scatter.h" -#include "interp_face_attrs/interp_face_attrs.h" -#include "iou_box3d/iou_box3d.h" -#include "knn/knn.h" -#include "marching_cubes/marching_cubes.h" -#include "mesh_normal_consistency/mesh_normal_consistency.h" -#include "packed_to_padded_tensor/packed_to_padded_tensor.h" -#include "point_mesh/point_mesh_cuda.h" -#include "points_to_volumes/points_to_volumes.h" -#include "rasterize_meshes/rasterize_meshes.h" -#include "rasterize_points/rasterize_points.h" -#include "sample_farthest_points/sample_farthest_points.h" -#include "sample_pdf/sample_pdf.h" - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("face_areas_normals_forward", &FaceAreasNormalsForward); - m.def("face_areas_normals_backward", &FaceAreasNormalsBackward); - m.def("packed_to_padded", &PackedToPadded); - m.def("padded_to_packed", &PaddedToPacked); - m.def("interp_face_attrs_forward", &InterpFaceAttrsForward); - m.def("interp_face_attrs_backward", &InterpFaceAttrsBackward); -#ifdef WITH_CUDA - m.def("knn_check_version", &KnnCheckVersion); -#endif - m.def("knn_points_idx", &KNearestNeighborIdx); - m.def("knn_points_backward", &KNearestNeighborBackward); - m.def("ball_query", &BallQuery); - m.def("sample_farthest_points", &FarthestPointSampling); - m.def( - "mesh_normal_consistency_find_verts", &MeshNormalConsistencyFindVertices); - m.def("gather_scatter", &GatherScatter); - m.def("points_to_volumes_forward", PointsToVolumesForward); - m.def("points_to_volumes_backward", PointsToVolumesBackward); - m.def("rasterize_points", &RasterizePoints); - m.def("rasterize_points_backward", &RasterizePointsBackward); - m.def("rasterize_meshes_backward", &RasterizeMeshesBackward); - m.def("rasterize_meshes", &RasterizeMeshes); - m.def("sigmoid_alpha_blend", &SigmoidAlphaBlend); - m.def("sigmoid_alpha_blend_backward", &SigmoidAlphaBlendBackward); - - // Accumulation functions - m.def("accum_weightedsumnorm", &weightedSumNormForward); - m.def("accum_weightedsum", &weightedSumForward); - m.def("accum_alphacomposite", &alphaCompositeForward); - m.def("accum_weightedsumnorm_backward", &weightedSumNormBackward); - m.def("accum_weightedsum_backward", &weightedSumBackward); - m.def("accum_alphacomposite_backward", &alphaCompositeBackward); - - // These are only visible for testing; users should not call them directly - m.def("_rasterize_points_coarse", &RasterizePointsCoarse); - m.def("_rasterize_points_naive", &RasterizePointsNaive); - m.def("_rasterize_meshes_naive", &RasterizeMeshesNaive); - m.def("_rasterize_meshes_coarse", &RasterizeMeshesCoarse); - m.def("_rasterize_meshes_fine", &RasterizeMeshesFine); - - // PointEdge distance functions - m.def("point_edge_dist_forward", &PointEdgeDistanceForward); - m.def("point_edge_dist_backward", &PointEdgeDistanceBackward); - m.def("edge_point_dist_forward", &EdgePointDistanceForward); - m.def("edge_point_dist_backward", &EdgePointDistanceBackward); - m.def("point_edge_array_dist_forward", &PointEdgeArrayDistanceForward); - m.def("point_edge_array_dist_backward", &PointEdgeArrayDistanceBackward); - - // PointFace distance functions - m.def("point_face_dist_forward", &PointFaceDistanceForward); - m.def("point_face_dist_backward", &PointFaceDistanceBackward); - m.def("face_point_dist_forward", &FacePointDistanceForward); - m.def("face_point_dist_backward", &FacePointDistanceBackward); - m.def("point_face_array_dist_forward", &PointFaceArrayDistanceForward); - m.def("point_face_array_dist_backward", &PointFaceArrayDistanceBackward); - - // Sample PDF - m.def("sample_pdf", &SamplePdf); - - // 3D IoU - m.def("iou_box3d", &IoUBox3D); - - // Marching cubes - m.def("marching_cubes", &MarchingCubes); - - // Pulsar. -#ifdef PULSAR_LOGGING_ENABLED - c10::ShowLogInfoToStderr(); -#endif - py::class_< - pulsar::pytorch::Renderer, - std::shared_ptr>(m, "PulsarRenderer") - .def(py::init< - const uint&, - const uint&, - const uint&, - const bool&, - const bool&, - const float&, - const uint&, - const uint&>()) - .def( - "__eq__", - [](const pulsar::pytorch::Renderer& a, - const pulsar::pytorch::Renderer& b) { return a == b; }, - py::is_operator()) - .def( - "__ne__", - [](const pulsar::pytorch::Renderer& a, - const pulsar::pytorch::Renderer& b) { return !(a == b); }, - py::is_operator()) - .def( - "__repr__", - [](const pulsar::pytorch::Renderer& self) { - std::stringstream ss; - ss << self; - return ss.str(); - }) - .def( - "forward", - &pulsar::pytorch::Renderer::forward, - py::arg("vert_pos"), - py::arg("vert_col"), - py::arg("vert_radii"), - - py::arg("cam_pos"), - py::arg("pixel_0_0_center"), - py::arg("pixel_vec_x"), - py::arg("pixel_vec_y"), - py::arg("focal_length"), - py::arg("principal_point_offsets"), - - py::arg("gamma"), - py::arg("max_depth"), - py::arg("min_depth") /* = 0.f*/, - py::arg( - "bg_col") /* = at::nullopt not exposed properly in pytorch 1.1. */ - , - py::arg("opacity") /* = at::nullopt ... */, - py::arg("percent_allowed_difference") = 0.01f, - py::arg("max_n_hits") = MAX_UINT, - py::arg("mode") = 0) - .def("backward", &pulsar::pytorch::Renderer::backward) - .def_property( - "device_tracker", - [](const pulsar::pytorch::Renderer& self) { - return self.device_tracker; - }, - [](pulsar::pytorch::Renderer& self, const torch::Tensor& val) { - self.device_tracker = val; - }) - .def_property_readonly("width", &pulsar::pytorch::Renderer::width) - .def_property_readonly("height", &pulsar::pytorch::Renderer::height) - .def_property_readonly( - "max_num_balls", &pulsar::pytorch::Renderer::max_num_balls) - .def_property_readonly( - "orthogonal", &pulsar::pytorch::Renderer::orthogonal) - .def_property_readonly( - "right_handed", &pulsar::pytorch::Renderer::right_handed) - .def_property_readonly("n_track", &pulsar::pytorch::Renderer::n_track); - m.def( - "pulsar_sphere_ids_from_result_info_nograd", - &pulsar::pytorch::sphere_ids_from_result_info_nograd); - // Constants. - m.attr("EPS") = py::float_(EPS); - m.attr("MAX_FLOAT") = py::float_(MAX_FLOAT); - m.attr("MAX_INT") = py::int_(MAX_INT); - m.attr("MAX_UINT") = py::int_(MAX_UINT); - m.attr("MAX_USHORT") = py::int_(MAX_USHORT); - m.attr("PULSAR_MAX_GRAD_SPHERES") = py::int_(MAX_GRAD_SPHERES); -} diff --git a/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu b/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu deleted file mode 100644 index 58aeb20fcfd2e5d51ab93054f176f9a2e4962ca4..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include - -template -__global__ void FaceAreasNormalsForwardKernel( - const scalar_t* __restrict__ verts, - const int64_t* __restrict__ faces, - scalar_t* __restrict__ face_areas, - scalar_t* __restrict__ face_normals, - const size_t V, - const size_t F) { - const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; - const size_t stride = gridDim.x * blockDim.x; - - // Faces split evenly over the number of threads in the grid. - // Each thread computes the area & normal of its respective faces and adds it - // to the global face_areas tensor. - for (size_t f = tid; f < F; f += stride) { - const int64_t i0 = faces[3 * f + 0]; - const int64_t i1 = faces[3 * f + 1]; - const int64_t i2 = faces[3 * f + 2]; - - const scalar_t v0_x = verts[3 * i0 + 0]; - const scalar_t v0_y = verts[3 * i0 + 1]; - const scalar_t v0_z = verts[3 * i0 + 2]; - - const scalar_t v1_x = verts[3 * i1 + 0]; - const scalar_t v1_y = verts[3 * i1 + 1]; - const scalar_t v1_z = verts[3 * i1 + 2]; - - const scalar_t v2_x = verts[3 * i2 + 0]; - const scalar_t v2_y = verts[3 * i2 + 1]; - const scalar_t v2_z = verts[3 * i2 + 2]; - - const scalar_t ax = v1_x - v0_x; - const scalar_t ay = v1_y - v0_y; - const scalar_t az = v1_z - v0_z; - - const scalar_t bx = v2_x - v0_x; - const scalar_t by = v2_y - v0_y; - const scalar_t bz = v2_z - v0_z; - - const scalar_t cx = ay * bz - az * by; - const scalar_t cy = az * bx - ax * bz; - const scalar_t cz = ax * by - ay * bx; - - scalar_t norm = sqrt(cx * cx + cy * cy + cz * cz); - face_areas[f] = norm / 2.0; - norm = (norm < 1e-6) ? 1e-6 : norm; // max(norm, 1e-6) - face_normals[3 * f + 0] = cx / norm; - face_normals[3 * f + 1] = cy / norm; - face_normals[3 * f + 2] = cz / norm; - } -} - -// TODO(gkioxari) support all data types once AtomicAdd supports doubles. -// Currently, support is for floats only. -__global__ void FaceAreasNormalsBackwardKernel( - const float* __restrict__ grad_areas, - const float* __restrict__ grad_normals, - const float* __restrict__ verts, - const int64_t* __restrict__ faces, - float* __restrict__ grad_verts, - const size_t V, - const size_t F) { - const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; - const size_t stride = gridDim.x * blockDim.x; - - // Faces split evenly over the number of threads in the grid. - // Each thread computes the area & normal of its respective faces and adds it - // to the global face_areas tensor. - for (size_t f = tid; f < F; f += stride) { - const int64_t i0 = faces[3 * f + 0]; - const int64_t i1 = faces[3 * f + 1]; - const int64_t i2 = faces[3 * f + 2]; - - const float v0_x = verts[3 * i0 + 0]; - const float v0_y = verts[3 * i0 + 1]; - const float v0_z = verts[3 * i0 + 2]; - - const float v1_x = verts[3 * i1 + 0]; - const float v1_y = verts[3 * i1 + 1]; - const float v1_z = verts[3 * i1 + 2]; - - const float v2_x = verts[3 * i2 + 0]; - const float v2_y = verts[3 * i2 + 1]; - const float v2_z = verts[3 * i2 + 2]; - - const float ax = v1_x - v0_x; - const float ay = v1_y - v0_y; - const float az = v1_z - v0_z; - - const float bx = v2_x - v0_x; - const float by = v2_y - v0_y; - const float bz = v2_z - v0_z; - - const float cx = ay * bz - az * by; - const float cy = az * bx - ax * bz; - const float cz = ax * by - ay * bx; - - float norm = sqrt(cx * cx + cy * cy + cz * cz); - norm = (norm < 1e-6) ? 1e-6 : norm; // max(norm, 1e-6) - float inv_norm = 1. / norm; - float inv_norm_2 = pow(inv_norm, 2.0f); - float inv_norm_3 = pow(inv_norm, 3.0f); - - // We compute gradients with respect to the input vertices. - // For each vertex, gradients come from grad_areas and grad_normals. - // eg, grad_v0_x = (d / d v0_x) - // = \sum_f (d / d areas[f]) * (d areas[f] / d v0_x) - // + (d / d normals[f, 0]) * (d normals[f, 0] / d v0_x) - // + (d / d normals[f, 1]) * (d normals[f, 1] / d v0_x) - // + (d / d normals[f, 2]) * (d normals[f, 2] / d v0_x) - // with (d / d areas[f]) = grad_areas[f] and - // (d / d normals[f, j]) = grad_normals[f][j]. - // The equations below are derived after taking - // derivatives wrt to the vertices (fun times!). - - // grad v0 coming from grad areas and grad normals - const float grad_v0_x = - ((-az + bz) * cy + (-by + ay) * cz) / 2.0 * inv_norm * grad_areas[f] + - -cx * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_3 * - grad_normals[3 * f + 0] + - ((-az + bz) - cy * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_2) * - inv_norm * grad_normals[3 * f + 1] + - ((-by + ay) - cz * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_2) * - inv_norm * grad_normals[3 * f + 2]; - atomicAdd(grad_verts + 3 * i0 + 0, grad_v0_x); - - const float grad_v0_y = - ((-bz + az) * cx + (-ax + bx) * cz) / 2.0 * inv_norm * grad_areas[f] + - ((-bz + az) - cx * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_2) * - inv_norm * grad_normals[3 * f + 0] + - -cy * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_3 * - grad_normals[3 * f + 1] + - ((-ax + bx) - cz * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_2) * - inv_norm * grad_normals[3 * f + 2]; - atomicAdd(grad_verts + 3 * i0 + 1, grad_v0_y); - - const float grad_v0_z = - ((-ay + by) * cx + (-bx + ax) * cy) / 2.0 * inv_norm * grad_areas[f] + - ((-ay + by) - cx * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_2) * - inv_norm * grad_normals[3 * f + 0] + - ((-bx + ax) - cy * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_2) * - inv_norm * grad_normals[3 * f + 1] + - -cz * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_3 * - grad_normals[3 * f + 2]; - atomicAdd(grad_verts + 3 * i0 + 2, grad_v0_z); - - // grad v1 coming from grad areas and grad normals - const float grad_v1_x = - (by * cz - bz * cy) / 2.0 * inv_norm * grad_areas[f] + - -cx * (by * cz - bz * cy) * inv_norm_3 * grad_normals[3 * f + 0] + - (-bz - cy * (by * cz - bz * cy) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 1] + - (by - cz * (by * cz - bz * cy) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 2]; - atomicAdd(grad_verts + 3 * i1 + 0, grad_v1_x); - - const float grad_v1_y = - (bz * cx - bx * cz) / 2.0 * inv_norm * grad_areas[f] + - (bz - cx * (bz * cx - bx * cz) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 0] + - -cy * (bz * cx - bx * cz) * inv_norm_3 * grad_normals[3 * f + 1] + - (-bx - cz * (bz * cx - bx * cz) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 2]; - atomicAdd(grad_verts + 3 * i1 + 1, grad_v1_y); - - const float grad_v1_z = - (bx * cy - by * cx) / 2.0 * inv_norm * grad_areas[f] + - (-by - cx * (bx * cy - by * cx) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 0] + - (bx - cx * (bx * cy - by * cx) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 1] + - -cz * (bx * cy - by * cx) * inv_norm_3 * grad_normals[3 * f + 2]; - atomicAdd(grad_verts + 3 * i1 + 2, grad_v1_z); - - // grad v2 coming from grad areas - const float grad_v2_x = - (az * cy - ay * cz) / 2.0 * inv_norm * grad_areas[f] + - -cx * (az * cy - ay * cz) * inv_norm_3 * grad_normals[3 * f + 0] + - (az - cy * (az * cy - ay * cz) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 1] + - (-ay - cz * (az * cy - ay * cz) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 2]; - atomicAdd(grad_verts + 3 * i2 + 0, grad_v2_x); - - const float grad_v2_y = - (ax * cz - az * cx) / 2.0 * inv_norm * grad_areas[f] + - (-az - cx * (ax * cz - az * cx) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 0] + - -cy * (ax * cz - az * cx) * inv_norm_3 * grad_normals[3 * f + 1] + - (ax - cz * (ax * cz - az * cx) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 2]; - atomicAdd(grad_verts + 3 * i2 + 1, grad_v2_y); - - const float grad_v2_z = - (ay * cx - ax * cy) / 2.0 * inv_norm * grad_areas[f] + - (ay - cx * (ay * cx - ax * cy) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 0] + - (-ax - cy * (ay * cx - ax * cy) * inv_norm_2) * inv_norm * - grad_normals[3 * f + 1] + - -cz * (ay * cx - ax * cy) * inv_norm_3 * grad_normals[3 * f + 2]; - atomicAdd(grad_verts + 3 * i2 + 2, grad_v2_z); - } -} - -std::tuple FaceAreasNormalsForwardCuda( - const at::Tensor verts, - const at::Tensor faces) { - const auto V = verts.size(0); - const auto F = faces.size(0); - - // Check inputs are on the same device - at::TensorArg verts_t{verts, "verts", 1}, faces_t{faces, "faces", 2}; - at::CheckedFrom c = "FaceAreasNormalsForwardCuda"; - at::checkAllSameGPU(c, {verts_t, faces_t}); - - // Set the device for the kernel launch based on the device of verts - at::cuda::CUDAGuard device_guard(verts.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - at::Tensor areas = at::empty({F}, verts.options()); - at::Tensor normals = at::empty({F, 3}, verts.options()); - - if (areas.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(areas, normals); - } - - const int blocks = 64; - const int threads = 512; - - AT_DISPATCH_FLOATING_TYPES( - verts.scalar_type(), "face_areas_normals_forward_cuda", ([&] { - FaceAreasNormalsForwardKernel<<>>( - verts.contiguous().data_ptr(), - faces.contiguous().data_ptr(), - areas.data_ptr(), - normals.data_ptr(), - V, - F); - })); - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(areas, normals); -} - -at::Tensor FaceAreasNormalsBackwardCuda( - const at::Tensor grad_areas, - const at::Tensor grad_normals, - const at::Tensor verts, - const at::Tensor faces) { - // Check inputs are on the same device - at::TensorArg verts_t{verts, "verts", 1}, faces_t{faces, "faces", 2}, - grad_areas_t{grad_areas, "grad_areas", 3}, - grad_normals_t{grad_normals, "grad_normals", 4}; - at::CheckedFrom c = "FaceAreasNormalsBackwardCuda"; - at::checkAllSameGPU(c, {verts_t, faces_t, grad_areas_t, grad_normals_t}); - // This is nondeterministic because atomicAdd - at::globalContext().alertNotDeterministic("FaceAreasNormalsBackwardCuda"); - - // Set the device for the kernel launch based on the device of verts - at::cuda::CUDAGuard device_guard(verts.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const auto V = verts.size(0); - const auto F = faces.size(0); - - at::Tensor grad_verts = at::zeros({V, 3}, grad_areas.options()); - - if (grad_verts.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return grad_verts; - } - - const int blocks = 64; - const int threads = 512; - // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports - // doubles. Currently, support is for floats only. - FaceAreasNormalsBackwardKernel<<>>( - grad_areas.contiguous().data_ptr(), - grad_normals.contiguous().data_ptr(), - verts.contiguous().data_ptr(), - faces.contiguous().data_ptr(), - grad_verts.data_ptr(), - V, - F); - - AT_CUDA_CHECK(cudaGetLastError()); - return grad_verts; -} diff --git a/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.h b/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.h deleted file mode 100644 index 6df37c12e4c81cc9c03375bad3751baafeb473aa..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include "utils/pytorch3d_cutils.h" - -// Compute areas of mesh faces using packed representation. -// -// Inputs: -// verts: FloatTensor of shape (V, 3) giving vertex positions. -// faces: LongTensor of shape (F, 3) giving faces. -// -// Returns: -// areas: FloatTensor of shape (F,) where areas[f] is the area of faces[f]. -// normals: FloatTensor of shape (F, 3) where normals[f] is the normal of -// faces[f] -// - -// Cpu implementation. -std::tuple FaceAreasNormalsForwardCpu( - const at::Tensor verts, - const at::Tensor faces); -// Cpu implementation -at::Tensor FaceAreasNormalsBackwardCpu( - const at::Tensor grad_areas, - const at::Tensor grad_normals, - const at::Tensor verts, - const at::Tensor faces); - -#ifdef WITH_CUDA -// Cuda implementation. -std::tuple FaceAreasNormalsForwardCuda( - const at::Tensor verts, - const at::Tensor faces); -// Cuda implementation. -at::Tensor FaceAreasNormalsBackwardCuda( - const at::Tensor grad_areas, - const at::Tensor grad_normals, - const at::Tensor verts, - const at::Tensor faces); -#endif - -// Implementation which is exposed. -std::tuple FaceAreasNormalsForward( - const at::Tensor verts, - const at::Tensor faces) { - if (verts.is_cuda() && faces.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(verts); - CHECK_CUDA(faces); - return FaceAreasNormalsForwardCuda(verts, faces); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return FaceAreasNormalsForwardCpu(verts, faces); -} - -// Implementation which is exposed. -at::Tensor FaceAreasNormalsBackward( - const at::Tensor grad_areas, - const at::Tensor grad_normals, - const at::Tensor verts, - const at::Tensor faces) { - if (verts.is_cuda() && faces.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(verts); - CHECK_CUDA(faces); - CHECK_CUDA(grad_areas); - CHECK_CUDA(grad_normals); - return FaceAreasNormalsBackwardCuda(grad_areas, grad_normals, verts, faces); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return FaceAreasNormalsBackwardCpu(grad_areas, grad_normals, verts, faces); -} diff --git a/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals_cpu.cpp b/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals_cpu.cpp deleted file mode 100644 index 1871ac7d4044467d1322ba32e300d513c1d5118e..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/face_areas_normals/face_areas_normals_cpu.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include - -std::tuple FaceAreasNormalsForwardCpu( - const at::Tensor verts, - const at::Tensor faces) { - const int F = faces.size(0); - - at::Tensor areas = at::empty({F}, verts.options()); - at::Tensor normals = at::empty({F, 3}, verts.options()); - - auto verts_a = verts.accessor(); - auto faces_a = faces.accessor(); - auto areas_a = areas.accessor(); - auto normals_a = normals.accessor(); - - for (int f = 0; f < F; ++f) { - const int64_t i0 = faces_a[f][0]; - const int64_t i1 = faces_a[f][1]; - const int64_t i2 = faces_a[f][2]; - - const float v0_x = verts_a[i0][0]; - const float v0_y = verts_a[i0][1]; - const float v0_z = verts_a[i0][2]; - - const float v1_x = verts_a[i1][0]; - const float v1_y = verts_a[i1][1]; - const float v1_z = verts_a[i1][2]; - - const float v2_x = verts_a[i2][0]; - const float v2_y = verts_a[i2][1]; - const float v2_z = verts_a[i2][2]; - - const float ax = v1_x - v0_x; - const float ay = v1_y - v0_y; - const float az = v1_z - v0_z; - - const float bx = v2_x - v0_x; - const float by = v2_y - v0_y; - const float bz = v2_z - v0_z; - - const float cx = ay * bz - az * by; - const float cy = az * bx - ax * bz; - const float cz = ax * by - ay * bx; - - float norm = sqrt(cx * cx + cy * cy + cz * cz); - areas_a[f] = norm / 2.0; - norm = (norm < 1e-6) ? 1e-6 : norm; // max(norm, 1e-6) - normals_a[f][0] = cx / norm; - normals_a[f][1] = cy / norm; - normals_a[f][2] = cz / norm; - } - return std::make_tuple(areas, normals); -} - -at::Tensor FaceAreasNormalsBackwardCpu( - const at::Tensor grad_areas, - const at::Tensor grad_normals, - const at::Tensor verts, - const at::Tensor faces) { - const int V = verts.size(0); - const int F = faces.size(0); - - at::Tensor grad_verts = at::zeros({V, 3}, grad_areas.options()); - - auto grad_areas_a = grad_areas.accessor(); - auto grad_normals_a = grad_normals.accessor(); - auto verts_a = verts.accessor(); - auto faces_a = faces.accessor(); - auto grad_verts_a = grad_verts.accessor(); - - for (int f = 0; f < F; ++f) { - const int64_t i0 = faces_a[f][0]; - const int64_t i1 = faces_a[f][1]; - const int64_t i2 = faces_a[f][2]; - - const float v0_x = verts_a[i0][0]; - const float v0_y = verts_a[i0][1]; - const float v0_z = verts_a[i0][2]; - - const float v1_x = verts_a[i1][0]; - const float v1_y = verts_a[i1][1]; - const float v1_z = verts_a[i1][2]; - - const float v2_x = verts_a[i2][0]; - const float v2_y = verts_a[i2][1]; - const float v2_z = verts_a[i2][2]; - - const float ax = v1_x - v0_x; - const float ay = v1_y - v0_y; - const float az = v1_z - v0_z; - - const float bx = v2_x - v0_x; - const float by = v2_y - v0_y; - const float bz = v2_z - v0_z; - - const float cx = ay * bz - az * by; - const float cy = az * bx - ax * bz; - const float cz = ax * by - ay * bx; - - float norm = sqrt(cx * cx + cy * cy + cz * cz); - norm = (norm < 1e-6) ? 1e-6 : norm; // max(norm, 1e-6) - float inv_norm = 1. / norm; - float inv_norm_2 = pow(inv_norm, 2.0f); - float inv_norm_3 = pow(inv_norm, 3.0f); - - // We compute gradients with respect to the input vertices. - // For each vertex, gradients come from grad_areas and grad_normals. - // eg, grad_v0_x = (d / d v0_x) - // = \sum_f (d / d areas[f]) * (d areas[f] / d v0_x) - // + (d / d normals[f, 0]) * (d normals[f, 0] / d v0_x) - // + (d / d normals[f, 1]) * (d normals[f, 1] / d v0_x) - // + (d / d normals[f, 2]) * (d normals[f, 2] / d v0_x) - // with (d / d areas[f]) = grad_areas[f] and - // (d / d normals[f, j]) = grad_normals[f][j]. - // The equations below are derived after taking - // derivatives wrt to the vertices (fun times!). - - // grad v0 coming from grad areas and grad normals - const float grad_v0_x = - ((-az + bz) * cy + (-by + ay) * cz) / 2.0 * inv_norm * grad_areas_a[f] + - -cx * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_3 * - grad_normals_a[f][0] + - ((-az + bz) - cy * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_2) * - inv_norm * grad_normals_a[f][1] + - ((-by + ay) - cz * ((-az + bz) * cy + (-by + ay) * cz) * inv_norm_2) * - inv_norm * grad_normals_a[f][2]; - grad_verts_a[i0][0] += grad_v0_x; - - const float grad_v0_y = - ((-bz + az) * cx + (-ax + bx) * cz) / 2.0 * inv_norm * grad_areas_a[f] + - ((-bz + az) - cx * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_2) * - inv_norm * grad_normals_a[f][0] + - -cy * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_3 * - grad_normals_a[f][1] + - ((-ax + bx) - cz * ((-bz + az) * cx + (-ax + bx) * cz) * inv_norm_2) * - inv_norm * grad_normals_a[f][2]; - grad_verts[i0][1] += grad_v0_y; - - const float grad_v0_z = - ((-ay + by) * cx + (-bx + ax) * cy) / 2.0 * inv_norm * grad_areas_a[f] + - ((-ay + by) - cx * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_2) * - inv_norm * grad_normals_a[f][0] + - ((-bx + ax) - cy * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_2) * - inv_norm * grad_normals_a[f][1] + - -cz * ((-ay + by) * cx + (-bx + ax) * cy) * inv_norm_3 * - grad_normals_a[f][2]; - grad_verts[i0][2] += grad_v0_z; - - // grad v1 coming from grad areas and grad normals - const float grad_v1_x = - (by * cz - bz * cy) / 2.0 * inv_norm * grad_areas_a[f] + - -cx * (by * cz - bz * cy) * inv_norm_3 * grad_normals_a[f][0] + - (-bz - cy * (by * cz - bz * cy) * inv_norm_2) * inv_norm * - grad_normals_a[f][1] + - (by - cz * (by * cz - bz * cy) * inv_norm_2) * inv_norm * - grad_normals_a[f][2]; - grad_verts[i1][0] += grad_v1_x; - - const float grad_v1_y = - (bz * cx - bx * cz) / 2.0 * inv_norm * grad_areas_a[f] + - (bz - cx * (bz * cx - bx * cz) * inv_norm_2) * inv_norm * - grad_normals_a[f][0] + - -cy * (bz * cx - bx * cz) * inv_norm_3 * grad_normals_a[f][1] + - (-bx - cz * (bz * cx - bx * cz) * inv_norm_2) * inv_norm * - grad_normals_a[f][2]; - grad_verts[i1][1] += grad_v1_y; - - const float grad_v1_z = - (bx * cy - by * cx) / 2.0 * inv_norm * grad_areas_a[f] + - (-by - cx * (bx * cy - by * cx) * inv_norm_2) * inv_norm * - grad_normals_a[f][0] + - (bx - cx * (bx * cy - by * cx) * inv_norm_2) * inv_norm * - grad_normals_a[f][1] + - -cz * (bx * cy - by * cx) * inv_norm_3 * grad_normals_a[f][2]; - grad_verts[i1][2] += grad_v1_z; - - // grad v2 coming from grad areas - const float grad_v2_x = - (az * cy - ay * cz) / 2.0 * inv_norm * grad_areas_a[f] + - -cx * (az * cy - ay * cz) * inv_norm_3 * grad_normals_a[f][0] + - (az - cy * (az * cy - ay * cz) * inv_norm_2) * inv_norm * - grad_normals_a[f][1] + - (-ay - cz * (az * cy - ay * cz) * inv_norm_2) * inv_norm * - grad_normals_a[f][2]; - grad_verts[i2][0] += grad_v2_x; - - const float grad_v2_y = - (ax * cz - az * cx) / 2.0 * inv_norm * grad_areas_a[f] + - (-az - cx * (ax * cz - az * cx) * inv_norm_2) * inv_norm * - grad_normals_a[f][0] + - -cy * (ax * cz - az * cx) * inv_norm_3 * grad_normals_a[f][1] + - (ax - cz * (ax * cz - az * cx) * inv_norm_2) * inv_norm * - grad_normals_a[f][2]; - grad_verts[i2][1] += grad_v2_y; - - const float grad_v2_z = - (ay * cx - ax * cy) / 2.0 * inv_norm * grad_areas_a[f] + - (ay - cx * (ay * cx - ax * cy) * inv_norm_2) * inv_norm * - grad_normals_a[f][0] + - (-ax - cy * (ay * cx - ax * cy) * inv_norm_2) * inv_norm * - grad_normals_a[f][1] + - -cz * (ay * cx - ax * cy) * inv_norm_3 * grad_normals_a[f][2]; - grad_verts[i2][2] += grad_v2_z; - } - return grad_verts; -} diff --git a/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.cu b/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.cu deleted file mode 100644 index 1ec1a6f27a2476375f3d140c3c8fb440fb92c04f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.cu +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -// TODO(T47953967) to make this cuda kernel support all datatypes. -__global__ void GatherScatterCudaKernel( - const float* __restrict__ input, - const int64_t* __restrict__ edges, - float* __restrict__ output, - bool directed, - bool backward, - const size_t V, - const size_t D, - const size_t E) { - const int tid = threadIdx.x; - - // Reverse the vertex order if backward. - const int v0_idx = backward ? 1 : 0; - const int v1_idx = backward ? 0 : 1; - - // Edges are split evenly across the blocks. - for (int e = blockIdx.x; e < E; e += gridDim.x) { - // Get indices of vertices which form the edge. - const int64_t v0 = edges[2 * e + v0_idx]; - const int64_t v1 = edges[2 * e + v1_idx]; - - // Split vertex features evenly across threads. - // This implementation will be quite wasteful when D<128 since there will be - // a lot of threads doing nothing. - for (int d = tid; d < D; d += blockDim.x) { - const float val = input[v1 * D + d]; - float* address = output + v0 * D + d; - atomicAdd(address, val); - if (!directed) { - const float val = input[v0 * D + d]; - float* address = output + v1 * D + d; - atomicAdd(address, val); - } - } - __syncthreads(); - } -} - -at::Tensor GatherScatterCuda( - const at::Tensor& input, - const at::Tensor& edges, - bool directed, - bool backward) { - // Check inputs are on the same device - at::TensorArg input_t{input, "input", 1}, edges_t{edges, "edges", 2}; - at::CheckedFrom c = "GatherScatterCuda"; - at::checkAllSameGPU(c, {input_t, edges_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(input.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const auto num_vertices = input.size(0); - const auto input_feature_dim = input.size(1); - const auto num_edges = edges.size(0); - - auto output = at::zeros({num_vertices, input_feature_dim}, input.options()); - const size_t threads = 128; - const size_t max_blocks = 1920; - const size_t blocks = num_edges < max_blocks ? num_edges : max_blocks; - - if (output.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return output; - } - - GatherScatterCudaKernel<<>>( - input.contiguous().data_ptr(), - edges.contiguous().data_ptr(), - output.data_ptr(), - directed, - backward, - num_vertices, - input_feature_dim, - num_edges); - AT_CUDA_CHECK(cudaGetLastError()); - return output; -} diff --git a/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.h b/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.h deleted file mode 100644 index 9ab9574f2df2861a9d57162e0c5f0ccc746ce206..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include "utils/pytorch3d_cutils.h" - -// Fused gather scatter operation for aggregating features of neighbor nodes -// in a graph. This gather scatter operation is specific to graphs as edge -// indices are used as input. -// -// Args: -// input: float32 Tensor of shape (V, D) where V is the number of vertices -// and D is the feature dimension. -// edges: int64 Tensor of shape (E, 2) giving the indices of the vertices that -// make up the edge. E is the number of edges. -// directed: Bool indicating if edges in the graph are directed. For a -// directed graph v0 -> v1 the updated feature for v0 depends on v1. -// backward: Bool indicating if the operation is the backward pass. -// -// Returns: -// output: float32 Tensor of same shape as input. - -at::Tensor GatherScatterCuda( - const at::Tensor& input, - const at::Tensor& edges, - bool directed, - bool backward); - -at::Tensor GatherScatterCpu( - const at::Tensor& input, - const at::Tensor& edges, - bool directed, - bool backward); - -// Exposed implementation. -at::Tensor GatherScatter( - const at::Tensor& input, - const at::Tensor& edges, - bool directed, - bool backward) { - if (input.is_cuda() && edges.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(input); - CHECK_CUDA(edges); - return GatherScatterCuda(input, edges, directed, backward); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return GatherScatterCpu(input, edges, directed, backward); -} diff --git a/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter_cpu.cpp b/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter_cpu.cpp deleted file mode 100644 index 8511e125519cf50f6b538da1adc33b39e4b16171..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/gather_scatter/gather_scatter_cpu.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include - -at::Tensor GatherScatterCpu( - const at::Tensor& input, - const at::Tensor& edges, - bool directed, - bool backward) { - const auto num_vertices = input.size(0); - const auto input_feature_dim = input.size(1); - const auto num_edges = edges.size(0); - - auto output = at::zeros({num_vertices, input_feature_dim}, input.options()); - - auto input_a = input.accessor(); - auto edges_a = edges.accessor(); - auto output_a = output.accessor(); - const int v0_idx = backward ? 1 : 0; - const int v1_idx = backward ? 0 : 1; - - for (int e = 0; e < num_edges; ++e) { - // Get indices of vertices which form the edge. - const int64_t v0 = edges_a[e][v0_idx]; - const int64_t v1 = edges_a[e][v1_idx]; - - for (int d = 0; d < input_feature_dim; ++d) { - output_a[v0][d] += input_a[v1][d]; - if (!directed) { - output_a[v1][d] += input_a[v0][d]; - } - } - } - return output; -} diff --git a/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu b/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu deleted file mode 100644 index 6bd2a80d972f2f4f7d76c0cf7d97d534ab3c55fe..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include - -template -__global__ void InterpFaceAttrsForwardKernel( - const int64_t* __restrict__ pix_to_face, // (P,) - const scalar_t* __restrict__ barycentric_coords, // (P, 3) - const scalar_t* __restrict__ face_attrs, // (F, 3, D) - scalar_t* pix_attrs, // (P, D) - const size_t P, - const size_t F, - const size_t D) { - const int tid = threadIdx.x + blockIdx.x * blockDim.x; - const int num_threads = blockDim.x * gridDim.x; - for (int pd = tid; pd < P * D; pd += num_threads) { - const int p = pd / D; - const int d = pd % D; - const int64_t f = pix_to_face[p]; - if (f < 0) { - continue; - } - scalar_t pix_attr = 0.0; - for (int i = 0; i < 3; ++i) { - scalar_t weight = barycentric_coords[p * 3 + i]; - scalar_t vert_attr = face_attrs[f * 3 * D + i * D + d]; - pix_attr += weight * vert_attr; - } - pix_attrs[p * D + d] = pix_attr; - } -} - -at::Tensor InterpFaceAttrsForwardCuda( - const at::Tensor& pix_to_face, - const at::Tensor& barycentric_coords, - const at::Tensor& face_attrs) { - // Make sure all inputs are on the same device - at::TensorArg pix_to_face_t{pix_to_face, "pix_to_face", 1}, - barycentric_coords_t{barycentric_coords, "barycentric_coords", 2}, - face_attrs_t{face_attrs, "face_attributes", 3}; - at::CheckedFrom c = "InterpFaceAttrsForwardCuda"; - at::checkAllSameGPU(c, {pix_to_face_t, barycentric_coords_t, face_attrs_t}); - at::checkAllSameType(c, {barycentric_coords_t, face_attrs_t}); - - // Set the device for the kernel launch based on the input - at::cuda::CUDAGuard device_guard(pix_to_face.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const auto P = pix_to_face.size(0); - const auto F = face_attrs.size(0); - const auto D = face_attrs.size(2); - - TORCH_CHECK( - barycentric_coords.size(0) == P && barycentric_coords.size(1) == 3, - "barycentric_coords must have size (P, 3)"); - TORCH_CHECK(face_attrs.size(1) == 3, "face_attrs must have size (F, 3, D)"); - - auto pix_attrs = at::zeros({P, D}, face_attrs.options()); - const int threads = 1024; - const int blocks = 512; - AT_DISPATCH_FLOATING_TYPES( - face_attrs.scalar_type(), "interp_face_attrs_cuda", ([&] { - InterpFaceAttrsForwardKernel<<>>( - pix_to_face.contiguous().data_ptr(), - barycentric_coords.contiguous().data_ptr(), - face_attrs.contiguous().data_ptr(), - pix_attrs.contiguous().data_ptr(), - P, - F, - D); - })); - AT_CUDA_CHECK(cudaGetLastError()); - return pix_attrs; -} - -template -__global__ void InterpFaceAttrsBackwardKernel( - const int64_t* __restrict__ pix_to_face, // (P,) - const scalar_t* __restrict__ barycentric_coords, // (P, 3) - const scalar_t* __restrict__ face_attrs, // (F, 3, D) - const scalar_t* __restrict__ grad_pix_attrs, // (P, D) - scalar_t* __restrict__ grad_barycentric_coords, // (P, 3) - scalar_t* __restrict__ grad_face_attrs, // (F, 3, D) - const size_t P, - const size_t F, - const size_t D) { - const int tid = threadIdx.x + blockIdx.x * blockDim.x; - const int num_threads = blockDim.x * gridDim.x; - for (int pd = tid; pd < P * D; pd += num_threads) { - const int p = pd / D; - const int d = pd % D; - const int64_t f = pix_to_face[p]; - if (f < 0) { - continue; - } - scalar_t upstream_grad = grad_pix_attrs[p * D + d]; - for (int i = 0; i < 3; ++i) { - scalar_t weight = barycentric_coords[p * 3 + i]; - scalar_t vert_attr = face_attrs[f * 3 * D + i * D + d]; - scalar_t grad_bary_down = vert_attr * upstream_grad; - scalar_t grad_face_down = weight * upstream_grad; - atomicAdd(grad_barycentric_coords + p * 3 + i, grad_bary_down); - atomicAdd(grad_face_attrs + f * 3 * D + i * D + d, grad_face_down); - } - } -} - -std::tuple InterpFaceAttrsBackwardCuda( - const at::Tensor& pix_to_face, - const at::Tensor& barycentric_coords, - const at::Tensor& face_attrs, - const at::Tensor& grad_pix_attrs) { - // Make sure all inputs are on the same device - at::TensorArg pix_to_face_t{pix_to_face, "pix_to_face", 1}, - barycentric_coords_t{barycentric_coords, "barycentric_coords", 2}, - face_attrs_t{face_attrs, "face_attributes", 3}, - grad_pix_attrs_t{grad_pix_attrs, "pix_attrs", 4}; - at::CheckedFrom c = "InterpFaceAttrsBackwarduda"; - at::checkAllSameGPU( - c, {pix_to_face_t, barycentric_coords_t, face_attrs_t, grad_pix_attrs_t}); - at::checkAllSameType( - c, {barycentric_coords_t, face_attrs_t, grad_pix_attrs_t}); - - // This is nondeterministic because atomicAdd - at::globalContext().alertNotDeterministic("InterpFaceAttrsBackwardCuda"); - - // Set the device for the kernel launch based on the input - at::cuda::CUDAGuard device_guard(pix_to_face.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const auto P = pix_to_face.size(0); - const auto F = face_attrs.size(0); - const auto D = face_attrs.size(2); - - TORCH_CHECK( - barycentric_coords.size(0) == P && barycentric_coords.size(1) == 3, - "barycentric_coords must have size (P, 3)"); - TORCH_CHECK(face_attrs.size(1) == 3, "face_attrs must have size (F, 3, D)"); - TORCH_CHECK( - grad_pix_attrs.size(0) == P && grad_pix_attrs.size(1) == D, - "grad_pix_attrs must have size (P, D)"); - - auto grad_barycentric_coords = at::zeros_like(barycentric_coords); - auto grad_face_attrs = at::zeros_like(face_attrs); - const int threads = 1024; - const int blocks = 512; - // Only allow float for now. - // TODO: Add support for double once we fix atomicAdd - // clang-format off - InterpFaceAttrsBackwardKernel<<>>( - pix_to_face.contiguous().data_ptr(), - barycentric_coords.contiguous().data_ptr(), - face_attrs.contiguous().data_ptr(), - grad_pix_attrs.contiguous().data_ptr(), - grad_barycentric_coords.contiguous().data_ptr(), - grad_face_attrs.contiguous().data_ptr(), - P, F, D); - AT_CUDA_CHECK(cudaGetLastError()); - // clang-format on - return std::make_tuple(grad_barycentric_coords, grad_face_attrs); -} diff --git a/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h b/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h deleted file mode 100644 index 5ba144621777eed15759f95f196466734bcaf077..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include "utils/pytorch3d_cutils.h" - -// Interpolates per-face attributes (forward pass) -// -// Inputs: -// pix_to_face: LongTensor of shape (P,) giving a face index for each pixel. -// Each element should be < F, the total number of faces. -// Face indices < 0 indicate that the pixel is not covered by a face. -// barycentric_coords: FloatTensor of shape (P, 3) giving barycentric coords. -// face_attrs: FloatTensor of shape (F, 3, D) giving a D-dimensional -// value for each vertex of each face. -// -// Returns: -// pix_attributes: FloatTensor of shape (P, D) giving an interpolated value -// for each pixel. - -// CPU implementation -at::Tensor InterpFaceAttrsForwardCpu( - const at::Tensor& pix_to_face, - const at::Tensor& barycentric_coords, - const at::Tensor& face_attrs) { - AT_ERROR("Not Implemented"); - return pix_to_face; -} - -#ifdef WITH_CUDA -// Cuda implementation. -at::Tensor InterpFaceAttrsForwardCuda( - const at::Tensor& pix_to_face, - const at::Tensor& barycentric_coords, - const at::Tensor& face_attrs); -#endif - -// General implementation -at::Tensor InterpFaceAttrsForward( - const at::Tensor& pix_to_face, - const at::Tensor& barycentric_coords, - const at::Tensor& face_attrs) { - if (pix_to_face.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(face_attrs); - CHECK_CUDA(barycentric_coords); - return InterpFaceAttrsForwardCuda( - pix_to_face, barycentric_coords, face_attrs); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return InterpFaceAttrsForwardCpu(pix_to_face, barycentric_coords, face_attrs); -} - -// Interpolates per-face attributes (backward pass) -// -// Inputs: -// pix_to_face: LongTensor of shape (P,) giving a face index for each pixel. -// Each element should be < F, the total number of faces. -// Face indices < 0 indicate that the pixel is not covered by a face. -// barycentric_coords: FloatTensor of shape (P, 3) giving barycentric coords. -// face_attrs: FloatTensor of shape (F, 3, D) giving a D-dimensional -// value for each vertex of each face. -// grad_pix_attrs: Upstream gradients of shape (P, D) -// -// Returns a tuple of: -// grad_barycentric_coords: FloatTensor of shape (P, 3) -// grad_face_attrs: FloatTensor of shape (F, 3, D) - -std::tuple InterpFaceAttrsBackwardCpu( - const at::Tensor& pix_to_face, - const at::Tensor& barycentric_coords, - const at::Tensor& face_attrs, - const at::Tensor& grad_pix_attrs) { - AT_ERROR("Not Implemented"); - return std::make_tuple(pix_to_face, pix_to_face); -} - -std::tuple InterpFaceAttrsBackwardCuda( - const at::Tensor& pix_to_face, - const at::Tensor& barycentric_coords, - const at::Tensor& face_attrs, - const at::Tensor& grad_pix_attrs); - -std::tuple InterpFaceAttrsBackward( - const at::Tensor& pix_to_face, - const at::Tensor& barycentric_coords, - const at::Tensor& face_attrs, - const at::Tensor& grad_pix_attrs) { - if (pix_to_face.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(face_attrs); - CHECK_CUDA(barycentric_coords); - CHECK_CUDA(grad_pix_attrs); - return InterpFaceAttrsBackwardCuda( - pix_to_face, barycentric_coords, face_attrs, grad_pix_attrs); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return InterpFaceAttrsBackwardCpu( - pix_to_face, barycentric_coords, face_attrs, grad_pix_attrs); -} diff --git a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.cu b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.cu deleted file mode 100644 index a315550f639ba9353016d8012db453f6d952a5b0..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.cu +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include "iou_box3d/iou_utils.cuh" - -// Parallelize over N*M computations which can each be done -// independently -__global__ void IoUBox3DKernel( - const at::PackedTensorAccessor64 boxes1, - const at::PackedTensorAccessor64 boxes2, - at::PackedTensorAccessor64 vols, - at::PackedTensorAccessor64 ious) { - const size_t N = boxes1.size(0); - const size_t M = boxes2.size(0); - - const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; - const size_t stride = gridDim.x * blockDim.x; - - FaceVerts box1_tris[NUM_TRIS]; - FaceVerts box2_tris[NUM_TRIS]; - FaceVerts box1_planes[NUM_PLANES]; - FaceVerts box2_planes[NUM_PLANES]; - - for (size_t i = tid; i < N * M; i += stride) { - const size_t n = i / M; // box1 index - const size_t m = i % M; // box2 index - - // Convert to array of structs of face vertices i.e. effectively (F, 3, 3) - // FaceVerts is a data type defined in iou_utils.cuh - GetBoxTris(boxes1[n], box1_tris); - GetBoxTris(boxes2[m], box2_tris); - - // Calculate the position of the center of the box which is used in - // several calculations. This requires a tensor as input. - const float3 box1_center = BoxCenter(boxes1[n]); - const float3 box2_center = BoxCenter(boxes2[m]); - - // Convert to an array of face vertices - GetBoxPlanes(boxes1[n], box1_planes); - GetBoxPlanes(boxes2[m], box2_planes); - - // Get Box Volumes - const float box1_vol = BoxVolume(box1_tris, box1_center, NUM_TRIS); - const float box2_vol = BoxVolume(box2_tris, box2_center, NUM_TRIS); - - // Tris in Box1 intersection with Planes in Box2 - // Initialize box1 intersecting faces. MAX_TRIS is the - // max faces possible in the intersecting shape. - // TODO: determine if the value of MAX_TRIS is sufficient or - // if we should store the max tris for each NxM computation - // and throw an error if any exceeds the max. - FaceVerts box1_intersect[MAX_TRIS]; - for (int j = 0; j < NUM_TRIS; ++j) { - // Initialize the faces from the box - box1_intersect[j] = box1_tris[j]; - } - // Get the count of the actual number of faces in the intersecting shape - int box1_count = BoxIntersections(box2_planes, box2_center, box1_intersect); - - // Tris in Box2 intersection with Planes in Box1 - FaceVerts box2_intersect[MAX_TRIS]; - for (int j = 0; j < NUM_TRIS; ++j) { - box2_intersect[j] = box2_tris[j]; - } - const int box2_count = - BoxIntersections(box1_planes, box1_center, box2_intersect); - - // If there are overlapping regions in Box2, remove any coplanar faces - if (box2_count > 0) { - // Identify if any triangles in Box2 are coplanar with Box1 - Keep tri2_keep[MAX_TRIS]; - for (int j = 0; j < MAX_TRIS; ++j) { - // Initialize the valid faces to be true - tri2_keep[j].keep = j < box2_count ? true : false; - } - for (int b1 = 0; b1 < box1_count; ++b1) { - for (int b2 = 0; b2 < box2_count; ++b2) { - const bool is_coplanar = - IsCoplanarTriTri(box1_intersect[b1], box2_intersect[b2]); - const float area = FaceArea(box1_intersect[b1]); - if ((is_coplanar) && (area > aEpsilon)) { - tri2_keep[b2].keep = false; - } - } - } - - // Keep only the non coplanar triangles in Box2 - add them to the - // Box1 triangles. - for (int b2 = 0; b2 < box2_count; ++b2) { - if (tri2_keep[b2].keep) { - box1_intersect[box1_count] = box2_intersect[b2]; - // box1_count will determine the total faces in the - // intersecting shape - box1_count++; - } - } - } - - // Initialize the vol and iou to 0.0 in case there are no triangles - // in the intersecting shape. - float vol = 0.0; - float iou = 0.0; - - // If there are triangles in the intersecting shape - if (box1_count > 0) { - // The intersecting shape is a polyhedron made up of the - // triangular faces that are all now in box1_intersect. - // Calculate the polyhedron center - const float3 poly_center = PolyhedronCenter(box1_intersect, box1_count); - // Compute intersecting polyhedron volume - vol = BoxVolume(box1_intersect, poly_center, box1_count); - // Compute IoU - iou = vol / (box1_vol + box2_vol - vol); - } - - // Write the volume and IoU to global memory - vols[n][m] = vol; - ious[n][m] = iou; - } -} - -std::tuple IoUBox3DCuda( - const at::Tensor& boxes1, // (N, 8, 3) - const at::Tensor& boxes2) { // (M, 8, 3) - // Check inputs are on the same device - at::TensorArg boxes1_t{boxes1, "boxes1", 1}, boxes2_t{boxes2, "boxes2", 2}; - at::CheckedFrom c = "IoUBox3DCuda"; - at::checkAllSameGPU(c, {boxes1_t, boxes2_t}); - at::checkAllSameType(c, {boxes1_t, boxes2_t}); - - // Set the device for the kernel launch based on the device of boxes1 - at::cuda::CUDAGuard device_guard(boxes1.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - TORCH_CHECK(boxes2.size(2) == boxes1.size(2), "Boxes must have shape (8, 3)"); - - TORCH_CHECK( - (boxes2.size(1) == 8) && (boxes1.size(1) == 8), - "Boxes must have shape (8, 3)"); - - const int64_t N = boxes1.size(0); - const int64_t M = boxes2.size(0); - - auto vols = at::zeros({N, M}, boxes1.options()); - auto ious = at::zeros({N, M}, boxes1.options()); - - if (vols.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(vols, ious); - } - - const size_t blocks = 512; - const size_t threads = 256; - - IoUBox3DKernel<<>>( - boxes1.packed_accessor64(), - boxes2.packed_accessor64(), - vols.packed_accessor64(), - ious.packed_accessor64()); - - AT_CUDA_CHECK(cudaGetLastError()); - - return std::make_tuple(vols, ious); -} diff --git a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.h b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.h deleted file mode 100644 index 84f752b0718ab1bd495315358e0fa976d6fe8b22..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include "utils/pytorch3d_cutils.h" - -// Calculate the intersection volume and IoU metric for two batches of boxes -// -// Args: -// boxes1: tensor of shape (N, 8, 3) of the coordinates of the 1st boxes -// boxes2: tensor of shape (M, 8, 3) of the coordinates of the 2nd boxes -// Returns: -// vol: (N, M) tensor of the volume of the intersecting convex shapes -// iou: (N, M) tensor of the intersection over union which is -// defined as: `iou = vol / (vol1 + vol2 - vol)` - -// CPU implementation -std::tuple IoUBox3DCpu( - const at::Tensor& boxes1, - const at::Tensor& boxes2); - -// CUDA implementation -std::tuple IoUBox3DCuda( - const at::Tensor& boxes1, - const at::Tensor& boxes2); - -// Implementation which is exposed -inline std::tuple IoUBox3D( - const at::Tensor& boxes1, - const at::Tensor& boxes2) { - if (boxes1.is_cuda() || boxes2.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(boxes1); - CHECK_CUDA(boxes2); - return IoUBox3DCuda(boxes1.contiguous(), boxes2.contiguous()); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return IoUBox3DCpu(boxes1.contiguous(), boxes2.contiguous()); -} diff --git a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d_cpu.cpp b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d_cpu.cpp deleted file mode 100644 index 3bc66de4e2db720984ea0917517a60e5ce601c7d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_box3d_cpu.cpp +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include "iou_box3d/iou_utils.h" - -std::tuple IoUBox3DCpu( - const at::Tensor& boxes1, - const at::Tensor& boxes2) { - const int N = boxes1.size(0); - const int M = boxes2.size(0); - auto float_opts = boxes1.options().dtype(torch::kFloat32); - torch::Tensor vols = torch::zeros({N, M}, float_opts); - torch::Tensor ious = torch::zeros({N, M}, float_opts); - - // Create tensor accessors - auto boxes1_a = boxes1.accessor(); - auto boxes2_a = boxes2.accessor(); - auto vols_a = vols.accessor(); - auto ious_a = ious.accessor(); - - // Iterate through the N boxes in boxes1 - for (int n = 0; n < N; ++n) { - const auto& box1 = boxes1_a[n]; - // Convert to vector of face vertices i.e. effectively (F, 3, 3) - // face_verts is a data type defined in iou_utils.h - const face_verts box1_tris = GetBoxTris(box1); - - // Calculate the position of the center of the box which is used in - // several calculations. This requires a tensor as input. - const vec3 box1_center = BoxCenter(boxes1[n]); - - // Convert to vector of face vertices i.e. effectively (P, 4, 3) - const face_verts box1_planes = GetBoxPlanes(box1); - - // Get Box Volumes - const float box1_vol = BoxVolume(box1_tris, box1_center); - - // Iterate through the M boxes in boxes2 - for (int m = 0; m < M; ++m) { - // Repeat above steps for box2 - // TODO: check if caching these value helps performance. - const auto& box2 = boxes2_a[m]; - const face_verts box2_tris = GetBoxTris(box2); - const vec3 box2_center = BoxCenter(boxes2[m]); - const face_verts box2_planes = GetBoxPlanes(box2); - const float box2_vol = BoxVolume(box2_tris, box2_center); - - // Every triangle in one box will be compared to each plane in the other - // box. There are 3 possible outcomes: - // 1. If the triangle is fully inside, then it will - // remain as is. - // 2. If the triagnle it is fully outside, it will be removed. - // 3. If the triangle intersects with the (infinite) plane, it - // will be broken into subtriangles such that each subtriangle is full - // inside the plane and part of the intersecting tetrahedron. - - // Tris in Box1 -> Planes in Box2 - face_verts box1_intersect = - BoxIntersections(box1_tris, box2_planes, box2_center); - // Tris in Box2 -> Planes in Box1 - face_verts box2_intersect = - BoxIntersections(box2_tris, box1_planes, box1_center); - - // If there are overlapping regions in Box2, remove any coplanar faces - if (box2_intersect.size() > 0) { - // Identify if any triangles in Box2 are coplanar with Box1 - std::vector tri2_keep(box2_intersect.size()); - std::fill(tri2_keep.begin(), tri2_keep.end(), 1); - for (int b1 = 0; b1 < box1_intersect.size(); ++b1) { - for (int b2 = 0; b2 < box2_intersect.size(); ++b2) { - const bool is_coplanar = - IsCoplanarTriTri(box1_intersect[b1], box2_intersect[b2]); - const float area = FaceArea(box1_intersect[b1]); - if ((is_coplanar) && (area > aEpsilon)) { - tri2_keep[b2] = 0; - } - } - } - - // Keep only the non coplanar triangles in Box2 - add them to the - // Box1 triangles. - for (int b2 = 0; b2 < box2_intersect.size(); ++b2) { - if (tri2_keep[b2] == 1) { - box1_intersect.push_back((box2_intersect[b2])); - } - } - } - - // Initialize the vol and iou to 0.0 in case there are no triangles - // in the intersecting shape. - float vol = 0.0; - float iou = 0.0; - - // If there are triangles in the intersecting shape - if (box1_intersect.size() > 0) { - // The intersecting shape is a polyhedron made up of the - // triangular faces that are all now in box1_intersect. - // Calculate the polyhedron center - const vec3 polyhedron_center = PolyhedronCenter(box1_intersect); - // Compute intersecting polyhedron volume - vol = BoxVolume(box1_intersect, polyhedron_center); - // Compute IoU - iou = vol / (box1_vol + box2_vol - vol); - } - // Save out volume and IoU - vols_a[n][m] = vol; - ious_a[n][m] = iou; - } - } - return std::make_tuple(vols, ious); -} diff --git a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.cuh b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.cuh deleted file mode 100644 index 5ad5b165dfdc0b76df4d4d2d23bdff7b19e482ec..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.cuh +++ /dev/null @@ -1,737 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include "utils/float_math.cuh" - -// dEpsilon: Used in dot products and is used to assess whether two unit vectors -// are orthogonal (or coplanar). It's an epsilon on cos(ΞΈ). -// With dEpsilon = 0.001, two unit vectors are considered co-planar -// if their ΞΈ = 2.5 deg. -__constant__ const float dEpsilon = 1e-3; -// aEpsilon: Used once in main function to check for small face areas -__constant__ const float aEpsilon = 1e-4; -// kEpsilon: Used only for norm(u) = u/max(||u||, kEpsilon) -__constant__ const float kEpsilon = 1e-8; - -/* -_PLANES and _TRIS define the 4- and 3-connectivity -of the 8 box corners. -_PLANES gives the quad faces of the 3D box -_TRIS gives the triangle faces of the 3D box -*/ -const int NUM_PLANES = 6; -const int NUM_TRIS = 12; -// This is required for iniitalizing the faces -// in the intersecting shape -const int MAX_TRIS = 100; - -// Create data types for representing the -// verts for each face and the indices. -// We will use struct arrays for representing -// the data for each box and intersecting -// triangles -struct FaceVerts { - float3 v0; - float3 v1; - float3 v2; - float3 v3; // Can be empty for triangles -}; - -struct FaceVertsIdx { - int v0; - int v1; - int v2; - int v3; // Can be empty for triangles -}; - -// This is used when deciding which faces to -// keep that are not coplanar -struct Keep { - bool keep; -}; - -__device__ FaceVertsIdx _PLANES[] = { - {0, 1, 2, 3}, - {3, 2, 6, 7}, - {0, 1, 5, 4}, - {0, 3, 7, 4}, - {1, 5, 6, 2}, - {4, 5, 6, 7}, -}; -__device__ FaceVertsIdx _TRIS[] = { - {0, 1, 2}, - {0, 3, 2}, - {4, 5, 6}, - {4, 6, 7}, - {1, 5, 6}, - {1, 6, 2}, - {0, 4, 7}, - {0, 7, 3}, - {3, 2, 6}, - {3, 6, 7}, - {0, 1, 5}, - {0, 4, 5}, -}; - -// Args -// box: (8, 3) tensor accessor for the box vertices -// box_tris: Array of structs of type FaceVerts, -// effectively (F, 3, 3) where the coordinates of the -// verts for each face will be saved to. -// -// Returns: None (output saved to box_tris) -// -template -__device__ inline void GetBoxTris(const Box& box, BoxTris& box_tris) { - for (int t = 0; t < NUM_TRIS; ++t) { - const float3 v0 = make_float3( - box[_TRIS[t].v0][0], box[_TRIS[t].v0][1], box[_TRIS[t].v0][2]); - const float3 v1 = make_float3( - box[_TRIS[t].v1][0], box[_TRIS[t].v1][1], box[_TRIS[t].v1][2]); - const float3 v2 = make_float3( - box[_TRIS[t].v2][0], box[_TRIS[t].v2][1], box[_TRIS[t].v2][2]); - box_tris[t] = {v0, v1, v2}; - } -} - -// Args -// box: (8, 3) tensor accessor for the box vertices -// box_planes: Array of structs of type FaceVerts, effectively (P, 4, 3) -// where the coordinates of the verts for the four corners of each plane -// will be saved to -// -// Returns: None (output saved to box_planes) -// -template -__device__ inline void GetBoxPlanes( - const Box& box, - FaceVertsBoxPlanes& box_planes) { - for (int t = 0; t < NUM_PLANES; ++t) { - const float3 v0 = make_float3( - box[_PLANES[t].v0][0], box[_PLANES[t].v0][1], box[_PLANES[t].v0][2]); - const float3 v1 = make_float3( - box[_PLANES[t].v1][0], box[_PLANES[t].v1][1], box[_PLANES[t].v1][2]); - const float3 v2 = make_float3( - box[_PLANES[t].v2][0], box[_PLANES[t].v2][1], box[_PLANES[t].v2][2]); - const float3 v3 = make_float3( - box[_PLANES[t].v3][0], box[_PLANES[t].v3][1], box[_PLANES[t].v3][2]); - box_planes[t] = {v0, v1, v2, v3}; - } -} - -// The geometric center of a list of vertices. -// -// Args -// vertices: A list of float3 vertices {v0, ..., vN}. -// -// Returns -// float3: Geometric center of the vertices. -// -__device__ inline float3 FaceCenter( - std::initializer_list vertices) { - auto sumVertices = float3{}; - for (const auto& vertex : vertices) { - sumVertices = sumVertices + vertex; - } - return sumVertices / vertices.size(); -} - -// The normal of a plane spanned by vectors e0 and e1 -// -// Args -// e0, e1: float3 vectors defining a plane -// -// Returns -// float3: normal of the plane -// -__device__ inline float3 GetNormal(const float3 e0, const float3 e1) { - float3 n = cross(e0, e1); - n = n / std::fmaxf(norm(n), kEpsilon); - return n; -} - -// The normal of a face with vertices (v0, v1, v2) or (v0, ..., v3). -// We find the "best" edges connecting the face center to the vertices, -// such that the cross product between the edges is maximized. -// -// Args -// vertices: a list of float3 coordinates of the vertices. -// -// Returns -// float3: center of the plane -// -__device__ inline float3 FaceNormal( - std::initializer_list vertices) { - const auto faceCenter = FaceCenter(vertices); - auto normal = float3(); - auto maxDist = -1; - for (auto v1 = vertices.begin(); v1 != vertices.end() - 1; ++v1) { - for (auto v2 = v1 + 1; v2 != vertices.end(); ++v2) { - const auto v1ToCenter = *v1 - faceCenter; - const auto v2ToCenter = *v2 - faceCenter; - const auto dist = norm(cross(v1ToCenter, v2ToCenter)); - if (dist > maxDist) { - normal = GetNormal(v1ToCenter, v2ToCenter); - maxDist = dist; - } - } - } - return normal; -} - -// The area of the face defined by vertices (v0, v1, v2) -// Define e0 to be the edge connecting (v1, v0) -// Define e1 to be the edge connecting (v2, v0) -// Area is the norm of the cross product of e0, e1 divided by 2.0 -// -// Args -// tri: FaceVerts of float3 coordinates of the vertices of the face -// -// Returns -// float: area for the face -// -__device__ inline float FaceArea(const FaceVerts& tri) { - // Get verts for face 1 - const float3 n = cross(tri.v1 - tri.v0, tri.v2 - tri.v0); - return norm(n) / 2.0; -} - -// The normal of a box plane defined by the verts in `plane` such that it -// points toward the centroid of the box given by `center`. -// -// Args -// plane: float3 coordinates of the vertices of the plane -// center: float3 coordinates of the center of the box from -// which the plane originated -// -// Returns -// float3: normal for the plane such that it points towards -// the center of the box -// -template -__device__ inline float3 PlaneNormalDirection( - const FaceVertsPlane& plane, - const float3& center) { - // The plane's center - const float3 plane_center = - FaceCenter({plane.v0, plane.v1, plane.v2, plane.v3}); - - // The plane's normal - float3 n = FaceNormal({plane.v0, plane.v1, plane.v2, plane.v3}); - - // We project the center on the plane defined by (v0, v1, v2, v3) - // We can write center = plane_center + a * e0 + b * e1 + c * n - // We know that = 0 and = 0 and - // is the dot product between a and b. - // This means we can solve for c as: - // c =
- // =
- const float c = dot((center - plane_center), n); - - // If c is negative, then we revert the direction of n such that n - // points "inside" - if (c < 0.0f) { - n = -1.0f * n; - } - - return n; -} - -// Calculate the volume of the box by summing the volume of -// each of the tetrahedrons formed with a triangle face and -// the box centroid. -// -// Args -// box_tris: vector of float3 coordinates of the vertices of each -// of the triangles in the box -// box_center: float3 coordinates of the center of the box -// -// Returns -// float: volume of the box -// -template -__device__ inline float BoxVolume( - const BoxTris& box_tris, - const float3& box_center, - const int num_tris) { - float box_vol = 0.0; - // Iterate through each triange, calculate the area of the - // tetrahedron formed with the box_center and sum them - for (int t = 0; t < num_tris; ++t) { - // Subtract the center: - float3 v0 = box_tris[t].v0; - float3 v1 = box_tris[t].v1; - float3 v2 = box_tris[t].v2; - - v0 = v0 - box_center; - v1 = v1 - box_center; - v2 = v2 - box_center; - - // Compute the area - const float area = dot(v0, cross(v1, v2)); - const float vol = abs(area) / 6.0; - box_vol = box_vol + vol; - } - return box_vol; -} - -// Compute the box center as the mean of the verts -// -// Args -// box_verts: (8, 3) tensor of the corner vertices of the box -// -// Returns -// float3: coordinates of the center of the box -// -template -__device__ inline float3 BoxCenter(const Box box_verts) { - float x = 0.0; - float y = 0.0; - float z = 0.0; - const int num_verts = box_verts.size(0); // Should be 8 - // Sum all x, y, z, and take the mean - for (int t = 0; t < num_verts; ++t) { - x = x + box_verts[t][0]; - y = y + box_verts[t][1]; - z = z + box_verts[t][2]; - } - // Take the mean of all the vertex positions - x = x / num_verts; - y = y / num_verts; - z = z / num_verts; - const float3 center = make_float3(x, y, z); - return center; -} - -// Compute the polyhedron center as the mean of the face centers -// of the triangle faces -// -// Args -// tris: vector of float3 coordinates of the -// vertices of each of the triangles in the polyhedron -// -// Returns -// float3: coordinates of the center of the polyhedron -// -template -__device__ inline float3 PolyhedronCenter( - const Tris& tris, - const int num_tris) { - float x = 0.0; - float y = 0.0; - float z = 0.0; - - // Find the center point of each face - for (int t = 0; t < num_tris; ++t) { - const float3 v0 = tris[t].v0; - const float3 v1 = tris[t].v1; - const float3 v2 = tris[t].v2; - const float x_face = (v0.x + v1.x + v2.x) / 3.0; - const float y_face = (v0.y + v1.y + v2.y) / 3.0; - const float z_face = (v0.z + v1.z + v2.z) / 3.0; - x = x + x_face; - y = y + y_face; - z = z + z_face; - } - - // Take the mean of the centers of all faces - x = x / num_tris; - y = y / num_tris; - z = z / num_tris; - - const float3 center = make_float3(x, y, z); - return center; -} - -// Compute a boolean indicator for whether a point -// is inside a plane, where inside refers to whether -// or not the point has a component in the -// normal direction of the plane. -// -// Args -// plane: vector of float3 coordinates of the -// vertices of each of the triangles in the box -// normal: float3 of the direction of the plane normal -// point: float3 of the position of the point of interest -// -// Returns -// bool: whether or not the point is inside the plane -// -__device__ inline bool -IsInside(const FaceVerts& plane, const float3& normal, const float3& point) { - // The center of the plane - const float3 plane_ctr = FaceCenter({plane.v0, plane.v1, plane.v2, plane.v3}); - - // Every point p can be written as p = plane_ctr + a e0 + b e1 + c n - // Solving for c: - // c = (point - plane_ctr - a * e0 - b * e1).dot(n) - // We know that = 0 and = 0 - // So the calculation can be simplified as: - const float c = dot((point - plane_ctr), normal); - const bool inside = c >= 0.0f; - return inside; -} - -// Find the point of intersection between a plane -// and a line given by the end points (p0, p1) -// -// Args -// plane: vector of float3 coordinates of the -// vertices of each of the triangles in the box -// normal: float3 of the direction of the plane normal -// p0, p1: float3 of the start and end point of the line -// -// Returns -// float3: position of the intersection point -// -__device__ inline float3 PlaneEdgeIntersection( - const FaceVerts& plane, - const float3& normal, - const float3& p0, - const float3& p1) { - // The center of the plane - const float3 plane_ctr = FaceCenter({plane.v0, plane.v1, plane.v2, plane.v3}); - - // The point of intersection can be parametrized - // p = p0 + a (p1 - p0) where a in [0, 1] - // We want to find a such that p is on plane - //

= 0 - - float3 direc = p1 - p0; - direc = direc / fmaxf(norm(direc), kEpsilon); - - float3 p = (p1 + p0) / 2.0f; - - if (abs(dot(direc, normal)) >= dEpsilon) { - const float top = -1.0f * dot(p0 - plane_ctr, normal); - const float bot = dot(p1 - p0, normal); - const float a = top / bot; - p = p0 + a * (p1 - p0); - } - - return p; -} - -// Compute the most distant points between two sets of vertices -// -// Args -// verts1, verts2: list of float3 defining the list of vertices -// -// Returns -// v1m, v2m: float3 vectors of the most distant points -// in verts1 and verts2 respectively -// -__device__ inline std::tuple ArgMaxVerts( - std::initializer_list verts1, - std::initializer_list verts2) { - auto v1m = float3(); - auto v2m = float3(); - float maxdist = -1.0f; - - for (const auto& v1 : verts1) { - for (const auto& v2 : verts2) { - if (norm(v1 - v2) > maxdist) { - v1m = v1; - v2m = v2; - maxdist = norm(v1 - v2); - } - } - } - return std::make_tuple(v1m, v2m); -} - -// Compute a boolean indicator for whether or not two faces -// are coplanar -// -// Args -// tri1, tri2: FaceVerts struct of the vertex coordinates of -// the triangle face -// -// Returns -// bool: whether or not the two faces are coplanar -// -__device__ inline bool IsCoplanarTriTri( - const FaceVerts& tri1, - const FaceVerts& tri2) { - const float3 tri1_ctr = FaceCenter({tri1.v0, tri1.v1, tri1.v2}); - const float3 tri1_n = FaceNormal({tri1.v0, tri1.v1, tri1.v2}); - - const float3 tri2_ctr = FaceCenter({tri2.v0, tri2.v1, tri2.v2}); - const float3 tri2_n = FaceNormal({tri2.v0, tri2.v1, tri2.v2}); - - // Check if parallel - const bool check1 = abs(dot(tri1_n, tri2_n)) > 1 - dEpsilon; - - // Compute most distant points - const auto v1mAndv2m = - ArgMaxVerts({tri1.v0, tri1.v1, tri1.v2}, {tri2.v0, tri2.v1, tri2.v2}); - const auto v1m = std::get<0>(v1mAndv2m); - const auto v2m = std::get<1>(v1mAndv2m); - - float3 n12m = v1m - v2m; - n12m = n12m / fmaxf(norm(n12m), kEpsilon); - - const bool check2 = (abs(dot(n12m, tri1_n)) < dEpsilon) || - (abs(dot(n12m, tri2_n)) < dEpsilon); - - return (check1 && check2); -} - -// Compute a boolean indicator for whether or not a triangular and a planar -// face are coplanar -// -// Args -// tri, plane: FaceVerts struct of the vertex coordinates of -// the triangle and planar face -// normal: the normal direction of the plane pointing "inside" -// -// Returns -// bool: whether or not the two faces are coplanar -// -__device__ inline bool IsCoplanarTriPlane( - const FaceVerts& tri, - const FaceVerts& plane, - const float3& normal) { - const float3 tri_ctr = FaceCenter({tri.v0, tri.v1, tri.v2}); - const float3 nt = FaceNormal({tri.v0, tri.v1, tri.v2}); - - // check if parallel - const bool check1 = abs(dot(nt, normal)) > 1 - dEpsilon; - - // Compute most distant points - const auto v1mAndv2m = ArgMaxVerts( - {tri.v0, tri.v1, tri.v2}, {plane.v0, plane.v1, plane.v2, plane.v3}); - const auto v1m = std::get<0>(v1mAndv2m); - const auto v2m = std::get<1>(v1mAndv2m); - - float3 n12m = v1m - v2m; - n12m = n12m / fmaxf(norm(n12m), kEpsilon); - - const bool check2 = abs(dot(n12m, normal)) < dEpsilon; - - return (check1 && check2); -} - -// Triangle is clipped into a quadrilateral -// based on the intersection points with the plane. -// Then the quadrilateral is divided into two triangles. -// -// Args -// plane: vector of float3 coordinates of the -// vertices of each of the triangles in the box -// normal: float3 of the direction of the plane normal -// vout: float3 of the point in the triangle which is outside -// the plane -// vin1, vin2: float3 of the points in the triangle which are -// inside the plane -// face_verts_out: Array of structs of type FaceVerts, -// with the coordinates of the new triangle faces -// formed after clipping. -// All triangles are now "inside" the plane. -// -// Returns: -// count: (int) number of new faces after clipping the triangle -// i.e. the valid faces which have been saved -// to face_verts_out -// -template -__device__ inline int ClipTriByPlaneOneOut( - const FaceVerts& plane, - const float3& normal, - const float3& vout, - const float3& vin1, - const float3& vin2, - FaceVertsBox& face_verts_out) { - // point of intersection between plane and (vin1, vout) - const float3 pint1 = PlaneEdgeIntersection(plane, normal, vin1, vout); - // point of intersection between plane and (vin2, vout) - const float3 pint2 = PlaneEdgeIntersection(plane, normal, vin2, vout); - - face_verts_out[0] = {vin1, pint1, pint2}; - face_verts_out[1] = {vin1, pint2, vin2}; - - return 2; -} - -// Triangle is clipped into a smaller triangle based -// on the intersection points with the plane. -// -// Args -// plane: vector of float3 coordinates of the -// vertices of each of the triangles in the box -// normal: float3 of the direction of the plane normal -// vout1, vout2: float3 of the points in the triangle which are -// outside the plane -// vin: float3 of the point in the triangle which is inside -// the plane -// face_verts_out: Array of structs of type FaceVerts, -// with the coordinates of the new triangle faces -// formed after clipping. -// All triangles are now "inside" the plane. -// -// Returns: -// count: (int) number of new faces after clipping the triangle -// i.e. the valid faces which have been saved -// to face_verts_out -// -template -__device__ inline int ClipTriByPlaneTwoOut( - const FaceVerts& plane, - const float3& normal, - const float3& vout1, - const float3& vout2, - const float3& vin, - FaceVertsBox& face_verts_out) { - // point of intersection between plane and (vin, vout1) - const float3 pint1 = PlaneEdgeIntersection(plane, normal, vin, vout1); - // point of intersection between plane and (vin, vout2) - const float3 pint2 = PlaneEdgeIntersection(plane, normal, vin, vout2); - - face_verts_out[0] = {vin, pint1, pint2}; - - return 1; -} - -// Clip the triangle faces so that they lie within the -// plane, creating new triangle faces where necessary. -// -// Args -// plane: Array of structs of type FaceVerts with the coordinates -// of the vertices of each of the triangles in the box -// tri: Array of structs of type FaceVerts with the vertex -// coordinates of the triangle faces -// normal: float3 of the direction of the plane normal -// face_verts_out: Array of structs of type FaceVerts, -// with the coordinates of the new triangle faces -// formed after clipping. -// All triangles are now "inside" the plane. -// -// Returns: -// count: (int) number of new faces after clipping the triangle -// i.e. the valid faces which have been saved -// to face_verts_out -// -template -__device__ inline int ClipTriByPlane( - const FaceVerts& plane, - const FaceVerts& tri, - const float3& normal, - FaceVertsBox& face_verts_out) { - // Get Triangle vertices - const float3 v0 = tri.v0; - const float3 v1 = tri.v1; - const float3 v2 = tri.v2; - - // Check each of the triangle vertices to see if it is inside the plane - const bool isin0 = IsInside(plane, normal, v0); - const bool isin1 = IsInside(plane, normal, v1); - const bool isin2 = IsInside(plane, normal, v2); - - // Check coplanar - const bool iscoplanar = IsCoplanarTriPlane(tri, plane, normal); - if (iscoplanar) { - // Return input vertices - face_verts_out[0] = {v0, v1, v2}; - return 1; - } - - // All in - if (isin0 && isin1 && isin2) { - // Return input vertices - face_verts_out[0] = {v0, v1, v2}; - return 1; - } - - // All out - if (!isin0 && !isin1 && !isin2) { - return 0; - } - - // One vert out - if (isin0 && isin1 && !isin2) { - return ClipTriByPlaneOneOut(plane, normal, v2, v0, v1, face_verts_out); - } - if (isin0 && !isin1 && isin2) { - return ClipTriByPlaneOneOut(plane, normal, v1, v0, v2, face_verts_out); - } - if (!isin0 && isin1 && isin2) { - return ClipTriByPlaneOneOut(plane, normal, v0, v1, v2, face_verts_out); - } - - // Two verts out - if (isin0 && !isin1 && !isin2) { - return ClipTriByPlaneTwoOut(plane, normal, v1, v2, v0, face_verts_out); - } - if (!isin0 && !isin1 && isin2) { - return ClipTriByPlaneTwoOut(plane, normal, v0, v1, v2, face_verts_out); - } - if (!isin0 && isin1 && !isin2) { - return ClipTriByPlaneTwoOut(plane, normal, v0, v2, v1, face_verts_out); - } - - // Else return empty (should not be reached) - return 0; -} - -// Get the triangles from each box which are part of the -// intersecting polyhedron by computing the intersection -// points with each of the planes. -// -// Args -// planes: Array of structs of type FaceVerts with the coordinates -// of the vertices of each of the triangles in the box -// center: float3 coordinates of the center of the box from which -// the planes originate -// face_verts_out: Array of structs of type FaceVerts, -// where the coordinates of the new triangle faces -// formed after clipping will be saved to. -// All triangles are now "inside" the plane. -// -// Returns: -// count: (int) number of faces in the intersecting shape -// i.e. the valid faces which have been saved -// to face_verts_out -// -template -__device__ inline int BoxIntersections( - const FaceVertsPlane& planes, - const float3& center, - FaceVertsBox& face_verts_out) { - // Initialize num tris to 12 - int num_tris = NUM_TRIS; - for (int p = 0; p < NUM_PLANES; ++p) { - // Get plane normal direction - const float3 n2 = PlaneNormalDirection(planes[p], center); - // Create intermediate vector to store the updated tris - FaceVerts tri_verts_updated[MAX_TRIS]; - int offset = 0; - - // Iterate through triangles in face_verts_out - // for the valid tris given by num_tris - for (int t = 0; t < num_tris; ++t) { - // Clip tri by plane, can max be split into 2 triangles - FaceVerts tri_updated[2]; - const int count = - ClipTriByPlane(planes[p], face_verts_out[t], n2, tri_updated); - // Add to the tri_verts_updated output if not empty - for (int v = 0; v < count; ++v) { - tri_verts_updated[offset] = tri_updated[v]; - offset++; - } - } - // Update the face_verts_out tris - num_tris = offset; - for (int j = 0; j < num_tris; ++j) { - face_verts_out[j] = tri_verts_updated[j]; - } - } - return num_tris; -} diff --git a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.h b/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.h deleted file mode 100644 index 283822a112daa9bec5e2e2fe083fa983210273ca..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/iou_box3d/iou_utils.h +++ /dev/null @@ -1,733 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "utils/vec3.h" - -// dEpsilon: Used in dot products and is used to assess whether two unit vectors -// are orthogonal (or coplanar). It's an epsilon on cos(ΞΈ). -// With dEpsilon = 0.001, two unit vectors are considered co-planar -// if their ΞΈ = 2.5 deg. -const auto dEpsilon = 1e-3; -// aEpsilon: Used once in main function to check for small face areas -const auto aEpsilon = 1e-4; -// kEpsilon: Used only for norm(u) = u/max(||u||, kEpsilon) -const auto kEpsilon = 1e-8; - -/* -_PLANES and _TRIS define the 4- and 3-connectivity -of the 8 box corners. -_PLANES gives the quad faces of the 3D box -_TRIS gives the triangle faces of the 3D box -*/ -const int NUM_PLANES = 6; -const int NUM_TRIS = 12; -const int _PLANES[6][4] = { - {0, 1, 2, 3}, - {3, 2, 6, 7}, - {0, 1, 5, 4}, - {0, 3, 7, 4}, - {1, 5, 6, 2}, - {4, 5, 6, 7}, -}; -const int _TRIS[12][3] = { - {0, 1, 2}, - {0, 3, 2}, - {4, 5, 6}, - {4, 6, 7}, - {1, 5, 6}, - {1, 6, 2}, - {0, 4, 7}, - {0, 7, 3}, - {3, 2, 6}, - {3, 6, 7}, - {0, 1, 5}, - {0, 4, 5}, -}; - -// Create a new data type for representing the -// verts for each face which can be triangle or plane. -// This helps make the code more readable. -using face_verts = std::vector>>; - -// Args -// box: (8, 3) tensor accessor for the box vertices -// plane_idx: index of the plane in the box -// vert_idx: index of the vertex in the plane -// -// Returns -// vec3 (x, y, x) vertex coordinates -// -template -inline vec3 -ExtractVertsPlane(const Box& box, const int plane_idx, const int vert_idx) { - return vec3( - box[_PLANES[plane_idx][vert_idx]][0], - box[_PLANES[plane_idx][vert_idx]][1], - box[_PLANES[plane_idx][vert_idx]][2]); -} - -// Args -// box: (8, 3) tensor accessor for the box vertices -// tri_idx: index of the triangle face in the box -// vert_idx: index of the vertex in the triangle -// -// Returns -// vec3 (x, y, x) vertex coordinates -// -template -inline vec3 -ExtractVertsTri(const Box& box, const int tri_idx, const int vert_idx) { - return vec3( - box[_TRIS[tri_idx][vert_idx]][0], - box[_TRIS[tri_idx][vert_idx]][1], - box[_TRIS[tri_idx][vert_idx]][2]); -} - -// Args -// box: (8, 3) tensor accessor for the box vertices -// -// Returns -// std::vector>> effectively (F, 3, 3) -// coordinates of the verts for each face -// -template -inline face_verts GetBoxTris(const Box& box) { - face_verts box_tris; - for (int t = 0; t < NUM_TRIS; ++t) { - vec3 v0 = ExtractVertsTri(box, t, 0); - vec3 v1 = ExtractVertsTri(box, t, 1); - vec3 v2 = ExtractVertsTri(box, t, 2); - box_tris.push_back({v0, v1, v2}); - } - return box_tris; -} - -// Args -// box: (8, 3) tensor accessor for the box vertices -// -// Returns -// std::vector>> effectively (P, 3, 3) -// coordinates of the 4 verts for each plane -// -template -inline face_verts GetBoxPlanes(const Box& box) { - face_verts box_planes; - for (int t = 0; t < NUM_PLANES; ++t) { - vec3 v0 = ExtractVertsPlane(box, t, 0); - vec3 v1 = ExtractVertsPlane(box, t, 1); - vec3 v2 = ExtractVertsPlane(box, t, 2); - vec3 v3 = ExtractVertsPlane(box, t, 3); - box_planes.push_back({v0, v1, v2, v3}); - } - return box_planes; -} - -// The normal of a plane spanned by vectors e0 and e1 -// -// Args -// e0, e1: vec3 vectors defining a plane -// -// Returns -// vec3: normal of the plane -// -inline vec3 GetNormal(const vec3 e0, const vec3 e1) { - vec3 n = cross(e0, e1); - n = n / std::fmaxf(norm(n), kEpsilon); - return n; -} - -// The center of a triangle tri -// -// Args -// tri: vec3 coordinates of the vertices of the triangle -// -// Returns -// vec3: center of the triangle -// -inline vec3 TriCenter(const std::vector>& tri) { - // Vertices of the triangle - const vec3 v0 = tri[0]; - const vec3 v1 = tri[1]; - const vec3 v2 = tri[2]; - - return (v0 + v1 + v2) / 3.0f; -} - -// The normal of the triangle defined by vertices (v0, v1, v2) -// We find the "best" edges connecting the face center to the vertices, -// such that the cross product between the edges is maximized. -// -// Args -// tri: vec3 coordinates of the vertices of the face -// -// Returns -// vec3: normal for the face -// -inline vec3 TriNormal(const std::vector>& tri) { - // Get center of triangle - const vec3 ctr = TriCenter(tri); - - // find the "best" normal as cross product of edges from center - float max_dist = -1.0f; - vec3 n = {0.0f, 0.0f, 0.0f}; - for (int i = 0; i < 2; ++i) { - for (int j = i + 1; j < 3; ++j) { - const float dist = norm(cross(tri[i] - ctr, tri[j] - ctr)); - if (dist > max_dist) { - n = GetNormal(tri[i] - ctr, tri[j] - ctr); - } - } - } - return n; -} - -// The center of a plane -// -// Args -// plane: vec3 coordinates of the vertices of the plane -// -// Returns -// vec3: center of the plane -// -inline vec3 PlaneCenter(const std::vector>& plane) { - // Vertices of the plane - const vec3 v0 = plane[0]; - const vec3 v1 = plane[1]; - const vec3 v2 = plane[2]; - const vec3 v3 = plane[3]; - - return (v0 + v1 + v2 + v3) / 4.0f; -} - -// The normal of a planar face with vertices (v0, v1, v2, v3) -// We find the "best" edges connecting the face center to the vertices, -// such that the cross product between the edges is maximized. -// -// Args -// plane: vec3 coordinates of the vertices of the planar face -// -// Returns -// vec3: normal of the planar face -// -inline vec3 PlaneNormal(const std::vector>& plane) { - // Get center of planar face - vec3 ctr = PlaneCenter(plane); - - // find the "best" normal as cross product of edges from center - float max_dist = -1.0f; - vec3 n = {0.0f, 0.0f, 0.0f}; - for (int i = 0; i < 3; ++i) { - for (int j = i + 1; j < 4; ++j) { - const float dist = norm(cross(plane[i] - ctr, plane[j] - ctr)); - if (dist > max_dist) { - n = GetNormal(plane[i] - ctr, plane[j] - ctr); - } - } - } - return n; -} - -// The area of the face defined by vertices (v0, v1, v2) -// Define e0 to be the edge connecting (v1, v0) -// Define e1 to be the edge connecting (v2, v0) -// Area is the norm of the cross product of e0, e1 divided by 2.0 -// -// Args -// tri: vec3 coordinates of the vertices of the face -// -// Returns -// float: area for the face -// -inline float FaceArea(const std::vector>& tri) { - // Get verts for face - const vec3 v0 = tri[0]; - const vec3 v1 = tri[1]; - const vec3 v2 = tri[2]; - const vec3 n = cross(v1 - v0, v2 - v0); - return norm(n) / 2.0; -} - -// The normal of a box plane defined by the verts in `plane` such that it -// points toward the centroid of the box given by `center`. -// -// Args -// plane: vec3 coordinates of the vertices of the plane -// center: vec3 coordinates of the center of the box from -// which the plane originated -// -// Returns -// vec3: normal for the plane such that it points towards -// the center of the box -// -inline vec3 PlaneNormalDirection( - const std::vector>& plane, - const vec3& center) { - // The plane's center & normal - const vec3 plane_center = PlaneCenter(plane); - vec3 n = PlaneNormal(plane); - - // We project the center on the plane defined by (v0, v1, v2, v3) - // We can write center = plane_center + a * e0 + b * e1 + c * n - // We know that = 0 and = 0 and - // is the dot product between a and b. - // This means we can solve for c as: - // c =

- // =
- const float c = dot((center - plane_center), n); - - // If c is negative, then we revert the direction of n such that n - // points "inside" - if (c < 0.0f) { - n = -1.0f * n; - } - - return n; -} - -// Calculate the volume of the box by summing the volume of -// each of the tetrahedrons formed with a triangle face and -// the box centroid. -// -// Args -// box_tris: vector of vec3 coordinates of the vertices of each -// of the triangles in the box -// box_center: vec3 coordinates of the center of the box -// -// Returns -// float: volume of the box -// -inline float BoxVolume( - const face_verts& box_tris, - const vec3& box_center) { - float box_vol = 0.0; - // Iterate through each triange, calculate the area of the - // tetrahedron formed with the box_center and sum them - for (int t = 0; t < box_tris.size(); ++t) { - // Subtract the center: - const vec3 v0 = box_tris[t][0] - box_center; - const vec3 v1 = box_tris[t][1] - box_center; - const vec3 v2 = box_tris[t][2] - box_center; - - // Compute the area - const float area = dot(v0, cross(v1, v2)); - const float vol = std::abs(area) / 6.0; - box_vol = box_vol + vol; - } - return box_vol; -} - -// Compute the box center as the mean of the verts -// -// Args -// box_verts: (8, 3) tensor of the corner vertices of the box -// -// Returns -// vec3: coordinates of the center of the box -// -inline vec3 BoxCenter(const at::Tensor& box_verts) { - const auto& box_center_t = at::mean(box_verts, 0); - const vec3 box_center( - box_center_t[0].item(), - box_center_t[1].item(), - box_center_t[2].item()); - return box_center; -} - -// Compute the polyhedron center as the mean of the face centers -// of the triangle faces -// -// Args -// tris: vector of vec3 coordinates of the -// vertices of each of the triangles in the polyhedron -// -// Returns -// vec3: coordinates of the center of the polyhedron -// -inline vec3 PolyhedronCenter(const face_verts& tris) { - float x = 0.0; - float y = 0.0; - float z = 0.0; - const int num_tris = tris.size(); - - // Find the center point of each face - for (int t = 0; t < num_tris; ++t) { - const vec3 v0 = tris[t][0]; - const vec3 v1 = tris[t][1]; - const vec3 v2 = tris[t][2]; - const float x_face = (v0.x + v1.x + v2.x) / 3.0; - const float y_face = (v0.y + v1.y + v2.y) / 3.0; - const float z_face = (v0.z + v1.z + v2.z) / 3.0; - x = x + x_face; - y = y + y_face; - z = z + z_face; - } - - // Take the mean of the centers of all faces - x = x / num_tris; - y = y / num_tris; - z = z / num_tris; - - const vec3 center(x, y, z); - return center; -} - -// Compute a boolean indicator for whether a point -// is inside a plane, where inside refers to whether -// or not the point has a component in the -// normal direction of the plane. -// -// Args -// plane: vector of vec3 coordinates of the -// vertices of each of the triangles in the box -// normal: vec3 of the direction of the plane normal -// point: vec3 of the position of the point of interest -// -// Returns -// bool: whether or not the point is inside the plane -// -inline bool IsInside( - const std::vector>& plane, - const vec3& normal, - const vec3& point) { - // The center of the plane - const vec3 plane_ctr = PlaneCenter(plane); - - // Every point p can be written as p = plane_ctr + a e0 + b e1 + c n - // Solving for c: - // c = (point - plane_ctr - a * e0 - b * e1).dot(n) - // We know that = 0 and = 0 - // So the calculation can be simplified as: - const float c = dot((point - plane_ctr), normal); - const bool inside = c >= 0.0f; - return inside; -} - -// Find the point of intersection between a plane -// and a line given by the end points (p0, p1) -// -// Args -// plane: vector of vec3 coordinates of the -// vertices of each of the triangles in the box -// normal: vec3 of the direction of the plane normal -// p0, p1: vec3 of the start and end point of the line -// -// Returns -// vec3: position of the intersection point -// -inline vec3 PlaneEdgeIntersection( - const std::vector>& plane, - const vec3& normal, - const vec3& p0, - const vec3& p1) { - // The center of the plane - const vec3 plane_ctr = PlaneCenter(plane); - - // The point of intersection can be parametrized - // p = p0 + a (p1 - p0) where a in [0, 1] - // We want to find a such that p is on plane - //

= 0 - - vec3 direc = p1 - p0; - direc = direc / std::fmaxf(norm(direc), kEpsilon); - - vec3 p = (p1 + p0) / 2.0f; - - if (std::abs(dot(direc, normal)) >= dEpsilon) { - const float top = -1.0f * dot(p0 - plane_ctr, normal); - const float bot = dot(p1 - p0, normal); - const float a = top / bot; - p = p0 + a * (p1 - p0); - } - return p; -} - -// Compute the most distant points between two sets of vertices -// -// Args -// verts1, verts2: vec3 defining the list of vertices -// -// Returns -// v1m, v2m: vec3 vectors of the most distant points -// in verts1 and verts2 respectively -// -inline std::tuple, vec3> ArgMaxVerts( - const std::vector>& verts1, - const std::vector>& verts2) { - vec3 v1m = {0.0f, 0.0f, 0.0f}; - vec3 v2m = {0.0f, 0.0f, 0.0f}; - float maxdist = -1.0f; - - for (const auto& v1 : verts1) { - for (const auto& v2 : verts2) { - if (norm(v1 - v2) > maxdist) { - v1m = v1; - v2m = v2; - maxdist = norm(v1 - v2); - } - } - } - return std::make_tuple(v1m, v2m); -} - -// Compute a boolean indicator for whether or not two faces -// are coplanar -// -// Args -// tri1, tri2: std:vector of the vertex coordinates of -// triangle faces -// -// Returns -// bool: whether or not the two faces are coplanar -// -inline bool IsCoplanarTriTri( - const std::vector>& tri1, - const std::vector>& tri2) { - // Get normal for tri 1 - const vec3 n1 = TriNormal(tri1); - - // Get normal for tri 2 - const vec3 n2 = TriNormal(tri2); - - // Check if parallel - const bool check1 = std::abs(dot(n1, n2)) > 1 - dEpsilon; - - // Compute most distant points - auto argvs = ArgMaxVerts(tri1, tri2); - const auto v1m = std::get<0>(argvs); - const auto v2m = std::get<1>(argvs); - - vec3 n12m = v1m - v2m; - n12m = n12m / std::fmaxf(norm(n12m), kEpsilon); - - const bool check2 = (std::abs(dot(n12m, n1)) < dEpsilon) || - (std::abs(dot(n12m, n2)) < dEpsilon); - - return (check1 && check2); -} - -// Compute a boolean indicator for whether or not a triangular and a planar -// face are coplanar -// -// Args -// tri, plane: std:vector of the vertex coordinates of -// triangular face and planar face -// normal: the normal direction of the plane pointing "inside" -// -// Returns -// bool: whether or not the two faces are coplanar -// -inline bool IsCoplanarTriPlane( - const std::vector>& tri, - const std::vector>& plane, - const vec3& normal) { - // Get normal for tri - const vec3 nt = TriNormal(tri); - - // check if parallel - const bool check1 = std::abs(dot(nt, normal)) > 1 - dEpsilon; - - // Compute most distant points - auto argvs = ArgMaxVerts(tri, plane); - const auto v1m = std::get<0>(argvs); - const auto v2m = std::get<1>(argvs); - - vec3 n12m = v1m - v2m; - n12m = n12m / std::fmaxf(norm(n12m), kEpsilon); - - const bool check2 = std::abs(dot(n12m, normal)) < dEpsilon; - - return (check1 && check2); -} - -// Triangle is clipped into a quadrilateral -// based on the intersection points with the plane. -// Then the quadrilateral is divided into two triangles. -// -// Args -// plane: vector of vec3 coordinates of the -// vertices of each of the triangles in the box -// normal: vec3 of the direction of the plane normal -// vout: vec3 of the point in the triangle which is outside -// the plane -// vin1, vin2: vec3 of the points in the triangle which are -// inside the plane -// -// Returns -// std::vector>: vector of vertex coordinates -// of the new triangle faces -// -inline face_verts ClipTriByPlaneOneOut( - const std::vector>& plane, - const vec3& normal, - const vec3& vout, - const vec3& vin1, - const vec3& vin2) { - // point of intersection between plane and (vin1, vout) - const vec3 pint1 = PlaneEdgeIntersection(plane, normal, vin1, vout); - // point of intersection between plane and (vin2, vout) - const vec3 pint2 = PlaneEdgeIntersection(plane, normal, vin2, vout); - const face_verts face_verts = {{vin1, pint1, pint2}, {vin1, pint2, vin2}}; - return face_verts; -} - -// Triangle is clipped into a smaller triangle based -// on the intersection points with the plane. -// -// Args -// plane: vector of vec3 coordinates of the -// vertices of each of the triangles in the box -// normal: vec3 of the direction of the plane normal -// vout1, vout2: vec3 of the points in the triangle which are -// outside the plane -// vin: vec3 of the point in the triangle which is inside -// the plane -// Returns -// std::vector>: vector of vertex coordinates -// of the new triangle face -// -inline face_verts ClipTriByPlaneTwoOut( - const std::vector>& plane, - const vec3& normal, - const vec3& vout1, - const vec3& vout2, - const vec3& vin) { - // point of intersection between plane and (vin, vout1) - const vec3 pint1 = PlaneEdgeIntersection(plane, normal, vin, vout1); - // point of intersection between plane and (vin, vout2) - const vec3 pint2 = PlaneEdgeIntersection(plane, normal, vin, vout2); - const face_verts face_verts = {{vin, pint1, pint2}}; - return face_verts; -} - -// Clip the triangle faces so that they lie within the -// plane, creating new triangle faces where necessary. -// -// Args -// plane: vector of vec3 coordinates of the -// vertices of each of the triangles in the box -// tri: std:vector of the vertex coordinates of the -// triangle faces -// normal: vec3 of the direction of the plane normal -// -// Returns -// std::vector>: vector of vertex coordinates -// of the new triangle faces formed after clipping. -// All triangles are now "inside" the plane. -// -inline face_verts ClipTriByPlane( - const std::vector>& plane, - const std::vector>& tri, - const vec3& normal) { - // Get Triangle vertices - const vec3 v0 = tri[0]; - const vec3 v1 = tri[1]; - const vec3 v2 = tri[2]; - - // Check coplanar - const bool iscoplanar = IsCoplanarTriPlane(tri, plane, normal); - if (iscoplanar) { - // Return input vertices - face_verts tris = {{v0, v1, v2}}; - return tris; - } - - // Check each of the triangle vertices to see if it is inside the plane - const bool isin0 = IsInside(plane, normal, v0); - const bool isin1 = IsInside(plane, normal, v1); - const bool isin2 = IsInside(plane, normal, v2); - - // All in - if (isin0 && isin1 && isin2) { - // Return input vertices - face_verts tris = {{v0, v1, v2}}; - return tris; - } - - face_verts empty_tris = {}; - // All out - if (!isin0 && !isin1 && !isin2) { - return empty_tris; - } - - // One vert out - if (isin0 && isin1 && !isin2) { - return ClipTriByPlaneOneOut(plane, normal, v2, v0, v1); - } - if (isin0 && !isin1 && isin2) { - return ClipTriByPlaneOneOut(plane, normal, v1, v0, v2); - } - if (!isin0 && isin1 && isin2) { - return ClipTriByPlaneOneOut(plane, normal, v0, v1, v2); - } - - // Two verts out - if (isin0 && !isin1 && !isin2) { - return ClipTriByPlaneTwoOut(plane, normal, v1, v2, v0); - } - if (!isin0 && !isin1 && isin2) { - return ClipTriByPlaneTwoOut(plane, normal, v0, v1, v2); - } - if (!isin0 && isin1 && !isin2) { - return ClipTriByPlaneTwoOut(plane, normal, v0, v2, v1); - } - - // Else return empty (should not be reached) - return empty_tris; -} - -// Get the triangles from each box which are part of the -// intersecting polyhedron by computing the intersection -// points with each of the planes. -// -// Args -// tris: vertex coordinates of all the triangle faces -// in the box -// planes: vertex coordinates of all the planes in the box -// center: vec3 coordinates of the center of the box from which -// the planes originate -// -// Returns -// std::vector>> vector of vertex coordinates -// of the new triangle faces formed after clipping. -// All triangles are now "inside" the planes. -// -inline face_verts BoxIntersections( - const face_verts& tris, - const face_verts& planes, - const vec3& center) { - // Create a new vector to avoid modifying in place - face_verts out_tris = tris; - for (int p = 0; p < NUM_PLANES; ++p) { - // Get plane normal direction - const vec3 n2 = PlaneNormalDirection(planes[p], center); - // Iterate through triangles in tris - // Create intermediate vector to store the updated tris - face_verts tri_verts_updated; - for (int t = 0; t < out_tris.size(); ++t) { - // Clip tri by plane - const face_verts tri_updated = ClipTriByPlane(planes[p], out_tris[t], n2); - // Add to the tri_verts_updated output if not empty - for (int v = 0; v < tri_updated.size(); ++v) { - tri_verts_updated.push_back(tri_updated[v]); - } - } - // Update the tris - out_tris = tri_verts_updated; - } - return out_tris; -} diff --git a/pytorch3d/pytorch3d/csrc/knn/knn.cu b/pytorch3d/pytorch3d/csrc/knn/knn.cu deleted file mode 100644 index 93a3060b2294af481e1df2a829910a199d47f533..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/knn/knn.cu +++ /dev/null @@ -1,587 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include - -#include "utils/dispatch.cuh" -#include "utils/mink.cuh" - -// A chunk of work is blocksize-many points of P1. -// The number of potential chunks to do is N*(1+(P1-1)/blocksize) -// call (1+(P1-1)/blocksize) chunks_per_cloud -// These chunks are divided among the gridSize-many blocks. -// In block b, we work on chunks b, b+gridSize, b+2*gridSize etc . -// In chunk i, we work on cloud i/chunks_per_cloud on points starting from -// blocksize*(i%chunks_per_cloud). - -template -__global__ void KNearestNeighborKernelV0( - const scalar_t* __restrict__ points1, - const scalar_t* __restrict__ points2, - const int64_t* __restrict__ lengths1, - const int64_t* __restrict__ lengths2, - scalar_t* __restrict__ dists, - int64_t* __restrict__ idxs, - const size_t N, - const size_t P1, - const size_t P2, - const size_t D, - const size_t K, - const size_t norm) { - // Store both dists and indices for knn in global memory. - const int64_t chunks_per_cloud = (1 + (P1 - 1) / blockDim.x); - const int64_t chunks_to_do = N * chunks_per_cloud; - for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) { - const int64_t n = chunk / chunks_per_cloud; - const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud); - int64_t p1 = start_point + threadIdx.x; - if (p1 >= lengths1[n]) - continue; - int offset = n * P1 * K + p1 * K; - int64_t length2 = lengths2[n]; - MinK mink(dists + offset, idxs + offset, K); - for (int p2 = 0; p2 < length2; ++p2) { - // Find the distance between points1[n, p1] and points[n, p2] - scalar_t dist = 0; - for (int d = 0; d < D; ++d) { - scalar_t coord1 = points1[n * P1 * D + p1 * D + d]; - scalar_t coord2 = points2[n * P2 * D + p2 * D + d]; - scalar_t diff = coord1 - coord2; - scalar_t norm_diff = (norm == 2) ? (diff * diff) : abs(diff); - dist += norm_diff; - } - mink.add(dist, p2); - } - } -} - -template -__global__ void KNearestNeighborKernelV1( - const scalar_t* __restrict__ points1, - const scalar_t* __restrict__ points2, - const int64_t* __restrict__ lengths1, - const int64_t* __restrict__ lengths2, - scalar_t* __restrict__ dists, - int64_t* __restrict__ idxs, - const size_t N, - const size_t P1, - const size_t P2, - const size_t K, - const size_t norm) { - // Same idea as the previous version, but hoist D into a template argument - // so we can cache the current point in a thread-local array. We still store - // the current best K dists and indices in global memory, so this should work - // for very large K and fairly large D. - scalar_t cur_point[D]; - const int64_t chunks_per_cloud = (1 + (P1 - 1) / blockDim.x); - const int64_t chunks_to_do = N * chunks_per_cloud; - for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) { - const int64_t n = chunk / chunks_per_cloud; - const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud); - int64_t p1 = start_point + threadIdx.x; - if (p1 >= lengths1[n]) - continue; - for (int d = 0; d < D; ++d) { - cur_point[d] = points1[n * P1 * D + p1 * D + d]; - } - int offset = n * P1 * K + p1 * K; - int64_t length2 = lengths2[n]; - MinK mink(dists + offset, idxs + offset, K); - for (int p2 = 0; p2 < length2; ++p2) { - // Find the distance between cur_point and points[n, p2] - scalar_t dist = 0; - for (int d = 0; d < D; ++d) { - scalar_t diff = cur_point[d] - points2[n * P2 * D + p2 * D + d]; - scalar_t norm_diff = (norm == 2) ? (diff * diff) : abs(diff); - dist += norm_diff; - } - mink.add(dist, p2); - } - } -} - -// This is a shim functor to allow us to dispatch using DispatchKernel1D -template -struct KNearestNeighborV1Functor { - static void run( - size_t blocks, - size_t threads, - const scalar_t* __restrict__ points1, - const scalar_t* __restrict__ points2, - const int64_t* __restrict__ lengths1, - const int64_t* __restrict__ lengths2, - scalar_t* __restrict__ dists, - int64_t* __restrict__ idxs, - const size_t N, - const size_t P1, - const size_t P2, - const size_t K, - const size_t norm) { - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - KNearestNeighborKernelV1<<>>( - points1, points2, lengths1, lengths2, dists, idxs, N, P1, P2, K, norm); - } -}; - -template -__global__ void KNearestNeighborKernelV2( - const scalar_t* __restrict__ points1, - const scalar_t* __restrict__ points2, - const int64_t* __restrict__ lengths1, - const int64_t* __restrict__ lengths2, - scalar_t* __restrict__ dists, - int64_t* __restrict__ idxs, - const int64_t N, - const int64_t P1, - const int64_t P2, - const size_t norm) { - // Same general implementation as V2, but also hoist K into a template arg. - scalar_t cur_point[D]; - scalar_t min_dists[K]; - int min_idxs[K]; - const int64_t chunks_per_cloud = (1 + (P1 - 1) / blockDim.x); - const int64_t chunks_to_do = N * chunks_per_cloud; - for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) { - const int64_t n = chunk / chunks_per_cloud; - const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud); - int64_t p1 = start_point + threadIdx.x; - if (p1 >= lengths1[n]) - continue; - for (int d = 0; d < D; ++d) { - cur_point[d] = points1[n * P1 * D + p1 * D + d]; - } - int64_t length2 = lengths2[n]; - MinK mink(min_dists, min_idxs, K); - for (int p2 = 0; p2 < length2; ++p2) { - scalar_t dist = 0; - for (int d = 0; d < D; ++d) { - int offset = n * P2 * D + p2 * D + d; - scalar_t diff = cur_point[d] - points2[offset]; - scalar_t norm_diff = (norm == 2) ? (diff * diff) : abs(diff); - dist += norm_diff; - } - mink.add(dist, p2); - } - for (int k = 0; k < mink.size(); ++k) { - idxs[n * P1 * K + p1 * K + k] = min_idxs[k]; - dists[n * P1 * K + p1 * K + k] = min_dists[k]; - } - } -} - -// This is a shim so we can dispatch using DispatchKernel2D -template -struct KNearestNeighborKernelV2Functor { - static void run( - size_t blocks, - size_t threads, - const scalar_t* __restrict__ points1, - const scalar_t* __restrict__ points2, - const int64_t* __restrict__ lengths1, - const int64_t* __restrict__ lengths2, - scalar_t* __restrict__ dists, - int64_t* __restrict__ idxs, - const int64_t N, - const int64_t P1, - const int64_t P2, - const size_t norm) { - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - KNearestNeighborKernelV2<<>>( - points1, points2, lengths1, lengths2, dists, idxs, N, P1, P2, norm); - } -}; - -template -__global__ void KNearestNeighborKernelV3( - const scalar_t* __restrict__ points1, - const scalar_t* __restrict__ points2, - const int64_t* __restrict__ lengths1, - const int64_t* __restrict__ lengths2, - scalar_t* __restrict__ dists, - int64_t* __restrict__ idxs, - const size_t N, - const size_t P1, - const size_t P2, - const size_t norm) { - // Same idea as V2, but use register indexing for thread-local arrays. - // Enabling sorting for this version leads to huge slowdowns; I suspect - // that it forces min_dists into local memory rather than registers. - // As a result this version is always unsorted. - scalar_t cur_point[D]; - scalar_t min_dists[K]; - int min_idxs[K]; - const int64_t chunks_per_cloud = (1 + (P1 - 1) / blockDim.x); - const int64_t chunks_to_do = N * chunks_per_cloud; - for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) { - const int64_t n = chunk / chunks_per_cloud; - const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud); - int64_t p1 = start_point + threadIdx.x; - if (p1 >= lengths1[n]) - continue; - for (int d = 0; d < D; ++d) { - cur_point[d] = points1[n * P1 * D + p1 * D + d]; - } - int64_t length2 = lengths2[n]; - RegisterMinK mink(min_dists, min_idxs); - for (int p2 = 0; p2 < length2; ++p2) { - scalar_t dist = 0; - for (int d = 0; d < D; ++d) { - int offset = n * P2 * D + p2 * D + d; - scalar_t diff = cur_point[d] - points2[offset]; - scalar_t norm_diff = (norm == 2) ? (diff * diff) : abs(diff); - dist += norm_diff; - } - mink.add(dist, p2); - } - for (int k = 0; k < mink.size(); ++k) { - idxs[n * P1 * K + p1 * K + k] = min_idxs[k]; - dists[n * P1 * K + p1 * K + k] = min_dists[k]; - } - } -} - -// This is a shim so we can dispatch using DispatchKernel2D -template -struct KNearestNeighborKernelV3Functor { - static void run( - size_t blocks, - size_t threads, - const scalar_t* __restrict__ points1, - const scalar_t* __restrict__ points2, - const int64_t* __restrict__ lengths1, - const int64_t* __restrict__ lengths2, - scalar_t* __restrict__ dists, - int64_t* __restrict__ idxs, - const size_t N, - const size_t P1, - const size_t P2, - const size_t norm) { - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - KNearestNeighborKernelV3<<>>( - points1, points2, lengths1, lengths2, dists, idxs, N, P1, P2, norm); - } -}; - -constexpr int V1_MIN_D = 1; -constexpr int V1_MAX_D = 32; - -constexpr int V2_MIN_D = 1; -constexpr int V2_MAX_D = 8; -constexpr int V2_MIN_K = 1; -constexpr int V2_MAX_K = 32; - -constexpr int V3_MIN_D = 1; -constexpr int V3_MAX_D = 8; -constexpr int V3_MIN_K = 1; -constexpr int V3_MAX_K = 4; - -bool InBounds(const int64_t min, const int64_t x, const int64_t max) { - return min <= x && x <= max; -} - -bool KnnCheckVersion(int version, const int64_t D, const int64_t K) { - if (version == 0) { - return true; - } else if (version == 1) { - return InBounds(V1_MIN_D, D, V1_MAX_D); - } else if (version == 2) { - return InBounds(V2_MIN_D, D, V2_MAX_D) && InBounds(V2_MIN_K, K, V2_MAX_K); - } else if (version == 3) { - return InBounds(V3_MIN_D, D, V3_MAX_D) && InBounds(V3_MIN_K, K, V3_MAX_K); - } - return false; -} - -int ChooseVersion(const int64_t D, const int64_t K) { - for (int version = 3; version >= 1; version--) { - if (KnnCheckVersion(version, D, K)) { - return version; - } - } - return 0; -} - -std::tuple KNearestNeighborIdxCuda( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const int norm, - const int K, - int version) { - // Check inputs are on the same device - at::TensorArg p1_t{p1, "p1", 1}, p2_t{p2, "p2", 2}, - lengths1_t{lengths1, "lengths1", 3}, lengths2_t{lengths2, "lengths2", 4}; - at::CheckedFrom c = "KNearestNeighborIdxCuda"; - at::checkAllSameGPU(c, {p1_t, p2_t, lengths1_t, lengths2_t}); - at::checkAllSameType(c, {p1_t, p2_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(p1.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const auto N = p1.size(0); - const auto P1 = p1.size(1); - const auto P2 = p2.size(1); - const auto D = p2.size(2); - const int64_t K_64 = K; - - TORCH_CHECK((norm == 1) || (norm == 2), "Norm must be 1 or 2."); - - TORCH_CHECK(p2.size(2) == D, "Point sets must have the same last dimension"); - auto long_dtype = lengths1.options().dtype(at::kLong); - auto idxs = at::zeros({N, P1, K}, long_dtype); - auto dists = at::zeros({N, P1, K}, p1.options()); - - if (idxs.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(idxs, dists); - } - - if (version < 0) { - version = ChooseVersion(D, K); - } else if (!KnnCheckVersion(version, D, K)) { - int new_version = ChooseVersion(D, K); - std::cout << "WARNING: Requested KNN version " << version - << " is not compatible with D = " << D << "; K = " << K - << ". Falling back to version = " << new_version << std::endl; - version = new_version; - } - - // At this point we should have a valid version no matter what data the user - // gave us. But we can check once more to be sure; however this time - // assert fail since failing at this point means we have a bug in our version - // selection or checking code. - AT_ASSERTM(KnnCheckVersion(version, D, K), "Invalid version"); - - const size_t threads = 256; - const size_t blocks = 256; - if (version == 0) { - AT_DISPATCH_FLOATING_TYPES( - p1.scalar_type(), "knn_kernel_cuda", ([&] { - KNearestNeighborKernelV0<<>>( - p1.contiguous().data_ptr(), - p2.contiguous().data_ptr(), - lengths1.contiguous().data_ptr(), - lengths2.contiguous().data_ptr(), - dists.data_ptr(), - idxs.data_ptr(), - N, - P1, - P2, - D, - K, - norm); - })); - } else if (version == 1) { - AT_DISPATCH_FLOATING_TYPES(p1.scalar_type(), "knn_kernel_cuda", ([&] { - DispatchKernel1D< - KNearestNeighborV1Functor, - scalar_t, - V1_MIN_D, - V1_MAX_D>( - D, - blocks, - threads, - p1.contiguous().data_ptr(), - p2.contiguous().data_ptr(), - lengths1.contiguous().data_ptr(), - lengths2.contiguous().data_ptr(), - dists.data_ptr(), - idxs.data_ptr(), - N, - P1, - P2, - K, - norm); - })); - } else if (version == 2) { - AT_DISPATCH_FLOATING_TYPES(p1.scalar_type(), "knn_kernel_cuda", ([&] { - DispatchKernel2D< - KNearestNeighborKernelV2Functor, - scalar_t, - V2_MIN_D, - V2_MAX_D, - V2_MIN_K, - V2_MAX_K>( - D, - K_64, - blocks, - threads, - p1.contiguous().data_ptr(), - p2.contiguous().data_ptr(), - lengths1.contiguous().data_ptr(), - lengths2.contiguous().data_ptr(), - dists.data_ptr(), - idxs.data_ptr(), - N, - P1, - P2, - norm); - })); - } else if (version == 3) { - AT_DISPATCH_FLOATING_TYPES(p1.scalar_type(), "knn_kernel_cuda", ([&] { - DispatchKernel2D< - KNearestNeighborKernelV3Functor, - scalar_t, - V3_MIN_D, - V3_MAX_D, - V3_MIN_K, - V3_MAX_K>( - D, - K_64, - blocks, - threads, - p1.contiguous().data_ptr(), - p2.contiguous().data_ptr(), - lengths1.contiguous().data_ptr(), - lengths2.contiguous().data_ptr(), - dists.data_ptr(), - idxs.data_ptr(), - N, - P1, - P2, - norm); - })); - } - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(idxs, dists); -} - -// ------------------------------------------------------------- // -// Backward Operators // -// ------------------------------------------------------------- // - -// TODO(gkioxari) support all data types once AtomicAdd supports doubles. -// Currently, support is for floats only. -__global__ void KNearestNeighborBackwardKernel( - const float* __restrict__ p1, // (N, P1, D) - const float* __restrict__ p2, // (N, P2, D) - const int64_t* __restrict__ lengths1, // (N,) - const int64_t* __restrict__ lengths2, // (N,) - const int64_t* __restrict__ idxs, // (N, P1, K) - const float* __restrict__ grad_dists, // (N, P1, K) - float* __restrict__ grad_p1, // (N, P1, D) - float* __restrict__ grad_p2, // (N, P2, D) - const size_t N, - const size_t P1, - const size_t P2, - const size_t K, - const size_t D, - const size_t norm) { - const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; - const size_t stride = gridDim.x * blockDim.x; - - for (size_t i = tid; i < N * P1 * K * D; i += stride) { - const size_t n = i / (P1 * K * D); // batch index - size_t rem = i % (P1 * K * D); - const size_t p1_idx = rem / (K * D); // index of point in p1 - rem = rem % (K * D); - const size_t k = rem / D; // k-th nearest neighbor - const size_t d = rem % D; // d-th dimension in the feature vector - - const size_t num1 = lengths1[n]; // number of valid points in p1 in batch - const size_t num2 = lengths2[n]; // number of valid points in p2 in batch - if ((p1_idx < num1) && (k < num2)) { - const float grad_dist = grad_dists[n * P1 * K + p1_idx * K + k]; - // index of point in p2 corresponding to the k-th nearest neighbor - const int64_t p2_idx = idxs[n * P1 * K + p1_idx * K + k]; - // If the index is the pad value of -1 then ignore it - if (p2_idx == -1) { - continue; - } - float diff = 0.0; - if (norm == 1) { - float sign = - (p1[n * P1 * D + p1_idx * D + d] > p2[n * P2 * D + p2_idx * D + d]) - ? 1.0 - : -1.0; - diff = grad_dist * sign; - } else { // norm is 2 - diff = 2.0 * grad_dist * - (p1[n * P1 * D + p1_idx * D + d] - p2[n * P2 * D + p2_idx * D + d]); - } - atomicAdd(grad_p1 + n * P1 * D + p1_idx * D + d, diff); - atomicAdd(grad_p2 + n * P2 * D + p2_idx * D + d, -1.0f * diff); - } - } -} - -std::tuple KNearestNeighborBackwardCuda( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const at::Tensor& idxs, - int norm, - const at::Tensor& grad_dists) { - // Check inputs are on the same device - at::TensorArg p1_t{p1, "p1", 1}, p2_t{p2, "p2", 2}, - lengths1_t{lengths1, "lengths1", 3}, lengths2_t{lengths2, "lengths2", 4}, - idxs_t{idxs, "idxs", 5}, grad_dists_t{grad_dists, "grad_dists", 6}; - at::CheckedFrom c = "KNearestNeighborBackwardCuda"; - at::checkAllSameGPU( - c, {p1_t, p2_t, lengths1_t, lengths2_t, idxs_t, grad_dists_t}); - at::checkAllSameType(c, {p1_t, p2_t, grad_dists_t}); - - // This is nondeterministic because atomicAdd - at::globalContext().alertNotDeterministic("KNearestNeighborBackwardCuda"); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(p1.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const auto N = p1.size(0); - const auto P1 = p1.size(1); - const auto P2 = p2.size(1); - const auto D = p2.size(2); - const auto K = idxs.size(2); - - TORCH_CHECK(p1.size(2) == D, "Point sets must have the same last dimension"); - TORCH_CHECK(idxs.size(0) == N, "KNN idxs must have the same batch dimension"); - TORCH_CHECK( - idxs.size(1) == P1, "KNN idxs must have the same point dimension as p1"); - TORCH_CHECK(grad_dists.size(0) == N); - TORCH_CHECK(grad_dists.size(1) == P1); - TORCH_CHECK(grad_dists.size(2) == K); - - auto grad_p1 = at::zeros({N, P1, D}, p1.options()); - auto grad_p2 = at::zeros({N, P2, D}, p2.options()); - - if (grad_p1.numel() == 0 || grad_p2.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_p1, grad_p2); - } - - const int blocks = 64; - const int threads = 512; - - KNearestNeighborBackwardKernel<<>>( - p1.contiguous().data_ptr(), - p2.contiguous().data_ptr(), - lengths1.contiguous().data_ptr(), - lengths2.contiguous().data_ptr(), - idxs.contiguous().data_ptr(), - grad_dists.contiguous().data_ptr(), - grad_p1.data_ptr(), - grad_p2.data_ptr(), - N, - P1, - P2, - K, - D, - norm); - - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_p1, grad_p2); -} diff --git a/pytorch3d/pytorch3d/csrc/knn/knn.h b/pytorch3d/pytorch3d/csrc/knn/knn.h deleted file mode 100644 index 7fc8d48870758ccdeb6f36b74ba9afd916b2b8d0..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/knn/knn.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include "utils/pytorch3d_cutils.h" - -// Compute indices of K nearest neighbors in pointcloud p2 to points -// in pointcloud p1. -// -// Args: -// p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each -// containing P1 points of dimension D. -// p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each -// containing P2 points of dimension D. -// lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud. -// lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud. -// norm: int specifying the norm for the distance (1 for L1, 2 for L2) -// K: int giving the number of nearest points to return. -// version: Integer telling which implementation to use. -// -// Returns: -// p1_neighbor_idx: LongTensor of shape (N, P1, K), where -// p1_neighbor_idx[n, i, k] = j means that the kth nearest -// neighbor to p1[n, i] in the cloud p2[n] is p2[n, j]. -// It is padded with zeros so that it can be used easily in a later -// gather() operation. -// -// p1_neighbor_dists: FloatTensor of shape (N, P1, K) containing the squared -// distance from each point p1[n, p, :] to its K neighbors -// p2[n, p1_neighbor_idx[n, p, k], :]. - -// CPU implementation. -std::tuple KNearestNeighborIdxCpu( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const int norm, - const int K); - -// CUDA implementation -std::tuple KNearestNeighborIdxCuda( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const int norm, - const int K, - const int version); - -// Implementation which is exposed. -std::tuple KNearestNeighborIdx( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const int norm, - const int K, - const int version) { - if (p1.is_cuda() || p2.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(p1); - CHECK_CUDA(p2); - return KNearestNeighborIdxCuda( - p1, p2, lengths1, lengths2, norm, K, version); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return KNearestNeighborIdxCpu(p1, p2, lengths1, lengths2, norm, K); -} - -// Compute gradients with respect to p1 and p2 -// -// Args: -// p1: FloatTensor of shape (N, P1, D) giving a batch of pointclouds each -// containing P1 points of dimension D. -// p2: FloatTensor of shape (N, P2, D) giving a batch of pointclouds each -// containing P2 points of dimension D. -// lengths1: LongTensor, shape (N,), giving actual length of each P1 cloud. -// lengths2: LongTensor, shape (N,), giving actual length of each P2 cloud. -// p1_neighbor_idx: LongTensor of shape (N, P1, K), where -// p1_neighbor_idx[n, i, k] = j means that the kth nearest -// neighbor to p1[n, i] in the cloud p2[n] is p2[n, j]. -// It is padded with zeros so that it can be used easily in a later -// gather() operation. This is computed from the forward pass. -// norm: int specifying the norm for the distance (1 for L1, 2 for L2) -// grad_dists: FLoatTensor of shape (N, P1, K) which contains the input -// gradients. -// -// Returns: -// grad_p1: FloatTensor of shape (N, P1, D) containing the output gradients -// wrt p1. -// grad_p2: FloatTensor of shape (N, P2, D) containing the output gradients -// wrt p2. - -// CPU implementation. -std::tuple KNearestNeighborBackwardCpu( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const at::Tensor& idxs, - const int norm, - const at::Tensor& grad_dists); - -// CUDA implementation -std::tuple KNearestNeighborBackwardCuda( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const at::Tensor& idxs, - const int norm, - const at::Tensor& grad_dists); - -// Implementation which is exposed. -std::tuple KNearestNeighborBackward( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const at::Tensor& idxs, - const int norm, - const at::Tensor& grad_dists) { - if (p1.is_cuda() || p2.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(p1); - CHECK_CUDA(p2); - return KNearestNeighborBackwardCuda( - p1, p2, lengths1, lengths2, idxs, norm, grad_dists); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return KNearestNeighborBackwardCpu( - p1, p2, lengths1, lengths2, idxs, norm, grad_dists); -} - -// Utility to check whether a KNN version can be used. -// -// Args: -// version: Integer in the range 0 <= version <= 3 indicating one of our -// KNN implementations. -// D: Number of dimensions for the input and query point clouds -// K: Number of neighbors to be found -// -// Returns: -// Whether the indicated KNN version can be used. -bool KnnCheckVersion(int version, const int64_t D, const int64_t K); diff --git a/pytorch3d/pytorch3d/csrc/knn/knn_cpu.cpp b/pytorch3d/pytorch3d/csrc/knn/knn_cpu.cpp deleted file mode 100644 index 9e3153a6669721240c36084a3a7a563dee250a42..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/knn/knn_cpu.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -std::tuple KNearestNeighborIdxCpu( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const int norm, - const int K) { - const int N = p1.size(0); - const int P1 = p1.size(1); - const int D = p1.size(2); - - auto long_opts = lengths1.options().dtype(torch::kInt64); - torch::Tensor idxs = torch::full({N, P1, K}, 0, long_opts); - torch::Tensor dists = torch::full({N, P1, K}, 0, p1.options()); - - auto p1_a = p1.accessor(); - auto p2_a = p2.accessor(); - auto lengths1_a = lengths1.accessor(); - auto lengths2_a = lengths2.accessor(); - auto idxs_a = idxs.accessor(); - auto dists_a = dists.accessor(); - - for (int n = 0; n < N; ++n) { - const int64_t length1 = lengths1_a[n]; - const int64_t length2 = lengths2_a[n]; - for (int64_t i1 = 0; i1 < length1; ++i1) { - // Use a priority queue to store (distance, index) tuples. - std::priority_queue> q; - for (int64_t i2 = 0; i2 < length2; ++i2) { - float dist = 0; - for (int d = 0; d < D; ++d) { - float diff = p1_a[n][i1][d] - p2_a[n][i2][d]; - if (norm == 1) { - dist += abs(diff); - } else { // norm is 2 (default) - dist += diff * diff; - } - } - int size = static_cast(q.size()); - if (size < K || dist < std::get<0>(q.top())) { - q.emplace(dist, i2); - if (size >= K) { - q.pop(); - } - } - } - while (!q.empty()) { - auto t = q.top(); - q.pop(); - const int k = q.size(); - dists_a[n][i1][k] = std::get<0>(t); - idxs_a[n][i1][k] = std::get<1>(t); - } - } - } - return std::make_tuple(idxs, dists); -} - -// ------------------------------------------------------------- // -// Backward Operators // -// ------------------------------------------------------------- // - -std::tuple KNearestNeighborBackwardCpu( - const at::Tensor& p1, - const at::Tensor& p2, - const at::Tensor& lengths1, - const at::Tensor& lengths2, - const at::Tensor& idxs, - const int norm, - const at::Tensor& grad_dists) { - const int N = p1.size(0); - const int P1 = p1.size(1); - const int D = p1.size(2); - const int P2 = p2.size(1); - const int K = idxs.size(2); - - torch::Tensor grad_p1 = torch::full({N, P1, D}, 0, p1.options()); - torch::Tensor grad_p2 = torch::full({N, P2, D}, 0, p2.options()); - - auto p1_a = p1.accessor(); - auto p2_a = p2.accessor(); - auto lengths1_a = lengths1.accessor(); - auto lengths2_a = lengths2.accessor(); - auto idxs_a = idxs.accessor(); - auto grad_dists_a = grad_dists.accessor(); - auto grad_p1_a = grad_p1.accessor(); - auto grad_p2_a = grad_p2.accessor(); - - for (int n = 0; n < N; ++n) { - const int64_t length1 = lengths1_a[n]; - int64_t length2 = lengths2_a[n]; - length2 = (length2 < K) ? length2 : K; - for (int64_t i1 = 0; i1 < length1; ++i1) { - for (int64_t k = 0; k < length2; ++k) { - const int64_t i2 = idxs_a[n][i1][k]; - // If the index is the pad value of -1 then ignore it - if (i2 == -1) { - continue; - } - for (int64_t d = 0; d < D; ++d) { - float diff = 0.0; - if (norm == 1) { - float sign = (p1_a[n][i1][d] > p2_a[n][i2][d]) ? 1.0 : -1.0; - diff = grad_dists_a[n][i1][k] * sign; - } else { // norm is 2 (default) - diff = 2.0f * grad_dists_a[n][i1][k] * - (p1_a[n][i1][d] - p2_a[n][i2][d]); - } - grad_p1_a[n][i1][d] += diff; - grad_p2_a[n][i2][d] += -1.0f * diff; - } - } - } - } - return std::make_tuple(grad_p1, grad_p2); -} diff --git a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.cu b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.cu deleted file mode 100644 index 44d50934762939c1be67948621a1c27bf1d3d732..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.cu +++ /dev/null @@ -1,535 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include "marching_cubes/tables.h" - -/* -Parallelized marching cubes for pytorch extension -referenced and adapted from CUDA-Samples: -(https://github.com/NVIDIA/cuda-samples/tree/master/Samples/5_Domain_Specific/marchingCubes) -We divide the algorithm into two forward-passes: -(1) The first forward-pass executes "ClassifyVoxelKernel" to -evaluate volume scalar field for each cube and pre-compute -two arrays -- number of vertices per cube (d_voxelVerts) and -occupied or not per cube (d_voxelOccupied). - -Some prepration steps: -With d_voxelOccupied, an exclusive scan is performed to compute -the number of activeVoxels, which can be used to accelerate -computation. With d_voxelVerts, another exclusive scan -is performed to compute the accumulated sum of vertices in the 3d -grid and totalVerts. - -(2) The second forward-pass calls "GenerateFacesKernel" to -generate interpolated vertex positions and face indices by "marching -through" each cube in the grid. - -*/ - -// EPS: Used to indicate if two float values are close -__constant__ const float EPSILON = 1e-5; - -// Linearly interpolate the position where an isosurface cuts an edge -// between two vertices, based on their scalar values -// -// Args: -// isolevel: float value used as threshold -// p1: position of point1 -// p2: position of point2 -// valp1: field value for p1 -// valp2: field value for p2 -// -// Returns: -// point: interpolated verte -// -__device__ float3 -vertexInterp(float isolevel, float3 p1, float3 p2, float valp1, float valp2) { - float ratio; - float3 p; - - if (abs(isolevel - valp1) < EPSILON) { - return p1; - } else if (abs(isolevel - valp2) < EPSILON) { - return p2; - } else if (abs(valp1 - valp2) < EPSILON) { - return p1; - } - - ratio = (isolevel - valp1) / (valp2 - valp1); - - p.x = p1.x * (1 - ratio) + p2.x * ratio; - p.y = p1.y * (1 - ratio) + p2.y * ratio; - p.z = p1.z * (1 - ratio) + p2.z * ratio; - - return p; -} - -// Determine if the triangle is degenerate -// A triangle is degenerate when at least two of the vertices -// share the same position. -// -// Args: -// p1: position of vertex p1 -// p2: position of vertex p2 -// p3: position of vertex p3 -// -// Returns: -// boolean indicator if the triangle is degenerate -__device__ bool isDegenerate(float3 p1, float3 p2, float3 p3) { - if ((abs(p1.x - p2.x) < EPSILON && abs(p1.y - p2.y) < EPSILON && - abs(p1.z - p2.z) < EPSILON) || - (abs(p2.x - p3.x) < EPSILON && abs(p2.y - p3.y) < EPSILON && - abs(p2.z - p3.z) < EPSILON) || - (abs(p3.x - p1.x) < EPSILON && abs(p3.y - p1.y) < EPSILON && - abs(p3.z - p1.z) < EPSILON)) { - return true; - } else { - return false; - } -} - -// Convert from local vertex id to global vertex id, given position -// of the cube where the vertex resides. The function ensures vertices -// shared from adjacent cubes are mapped to the same global id. - -// Args: -// v: local vertex id -// x: x position of the cube where the vertex belongs -// y: y position of the cube where the vertex belongs -// z: z position of the cube where the vertex belongs -// W: width of x dimension -// H: height of y dimension - -// Returns: -// global vertex id represented by its x/y/z offsets -__device__ uint localToGlobal(int v, int x, int y, int z, int W, int H) { - const int dx = v & 1; - const int dy = v >> 1 & 1; - const int dz = v >> 2 & 1; - return (x + dx) + (y + dy) * W + (z + dz) * W * H; -} - -// Hash_combine a pair of global vertex id to a single integer. -// -// Args: -// v1_id: global id of vertex 1 -// v2_id: global id of vertex 2 -// W: width of the 3d grid -// H: height of the 3d grid -// Z: depth of the 3d grid -// -// Returns: -// hashing for a pair of vertex ids -// -__device__ int64_t hashVpair(uint v1_id, uint v2_id, int W, int H, int D) { - return (int64_t)v1_id * (W + W * H + W * H * D) + (int64_t)v2_id; -} - -// precompute number of vertices and occupancy -// for each voxel in the grid. -// -// Args: -// voxelVerts: pointer to device array to store number -// of verts per voxel -// voxelOccupied: pointer to device array to store -// occupancy state per voxel -// vol: torch tensor stored with 3D scalar field -// isolevel: threshold to determine isosurface intersection -// -__global__ void ClassifyVoxelKernel( - at::PackedTensorAccessor32 voxelVerts, - at::PackedTensorAccessor32 voxelOccupied, - const at::PackedTensorAccessor32 vol, - // const at::PackedTensorAccessor - // numVertsTable, - float isolevel) { - const int indexTable[8]{0, 1, 4, 5, 3, 2, 7, 6}; - const uint D = vol.size(0) - 1; - const uint H = vol.size(1) - 1; - const uint W = vol.size(2) - 1; - - // 1-d grid - uint id = blockIdx.x * blockDim.x + threadIdx.x; - uint num_threads = gridDim.x * blockDim.x; - - // Table mapping from cubeindex to number of vertices in the configuration - const unsigned char numVertsTable[256] = { - 0, 3, 3, 6, 3, 6, 6, 9, 3, 6, 6, 9, 6, 9, 9, 6, 3, 6, - 6, 9, 6, 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, 3, 6, 6, 9, - 6, 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, 6, 9, 9, 6, 9, 12, - 12, 9, 9, 12, 12, 9, 12, 15, 15, 6, 3, 6, 6, 9, 6, 9, 9, 12, - 6, 9, 9, 12, 9, 12, 12, 9, 6, 9, 9, 12, 9, 12, 12, 15, 9, 12, - 12, 15, 12, 15, 15, 12, 6, 9, 9, 12, 9, 12, 6, 9, 9, 12, 12, 15, - 12, 15, 9, 6, 9, 12, 12, 9, 12, 15, 9, 6, 12, 15, 15, 12, 15, 6, - 12, 3, 3, 6, 6, 9, 6, 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, - 6, 9, 9, 12, 9, 12, 12, 15, 9, 6, 12, 9, 12, 9, 15, 6, 6, 9, - 9, 12, 9, 12, 12, 15, 9, 12, 12, 15, 12, 15, 15, 12, 9, 12, 12, 9, - 12, 15, 15, 12, 12, 9, 15, 6, 15, 12, 6, 3, 6, 9, 9, 12, 9, 12, - 12, 15, 9, 12, 12, 15, 6, 9, 9, 6, 9, 12, 12, 15, 12, 15, 15, 6, - 12, 9, 15, 12, 9, 6, 12, 3, 9, 12, 12, 15, 12, 15, 9, 12, 12, 15, - 15, 6, 9, 12, 6, 3, 6, 9, 9, 6, 9, 12, 6, 3, 9, 6, 12, 3, - 6, 3, 3, 0, - }; - - for (uint tid = id; tid < D * H * W; tid += num_threads) { - // compute global location of the voxel - const int gx = tid % W; - const int gy = tid / W % H; - const int gz = tid / (W * H); - - int cubeindex = 0; - for (int i = 0; i < 8; i++) { - const int dx = i & 1; - const int dy = i >> 1 & 1; - const int dz = i >> 2 & 1; - - const int x = gx + dx; - const int y = gy + dy; - const int z = gz + dz; - - if (vol[z][y][x] < isolevel) { - cubeindex |= 1 << indexTable[i]; - } - } - // collect number of vertices for each voxel - unsigned char numVerts = numVertsTable[cubeindex]; - voxelVerts[tid] = numVerts; - voxelOccupied[tid] = (numVerts > 0); - } -} - -// extract compact voxel array for acceleration -// -// Args: -// compactedVoxelArray: tensor of shape (activeVoxels,) which maps -// from accumulated non-empty voxel index to original 3d grid index -// voxelOccupied: tensor of shape (numVoxels,) which stores -// the occupancy state per voxel -// voxelOccupiedScan: tensor of shape (numVoxels,) which -// stores the accumulated occupied voxel counts -// numVoxels: number of total voxels in the grid -// -__global__ void CompactVoxelsKernel( - at::PackedTensorAccessor32 - compactedVoxelArray, - const at::PackedTensorAccessor32 - voxelOccupied, - const at::PackedTensorAccessor32 - voxelOccupiedScan, - uint numVoxels) { - uint id = blockIdx.x * blockDim.x + threadIdx.x; - uint num_threads = gridDim.x * blockDim.x; - for (uint tid = id; tid < numVoxels; tid += num_threads) { - if (voxelOccupied[tid]) { - compactedVoxelArray[voxelOccupiedScan[tid]] = tid; - } - } -} - -// generate triangles for each voxel using marching cubes -// -// Args: -// verts: torch tensor of shape (V, 3) to store interpolated mesh vertices -// faces: torch tensor of shape (F, 3) to store indices for mesh faces -// ids: torch tensor of shape (V) to store id of each vertex -// compactedVoxelArray: tensor of shape (activeVoxels,) which stores -// non-empty voxel index. -// numVertsScanned: tensor of shape (numVoxels,) which stores accumulated -// vertices count in the voxel -// activeVoxels: number of active voxels used for acceleration -// vol: torch tensor stored with 3D scalar field -// isolevel: threshold to determine isosurface intersection -// -__global__ void GenerateFacesKernel( - at::PackedTensorAccessor32 verts, - at::PackedTensorAccessor faces, - at::PackedTensorAccessor ids, - at::PackedTensorAccessor32 - compactedVoxelArray, - at::PackedTensorAccessor32 - numVertsScanned, - const uint activeVoxels, - const at::PackedTensorAccessor32 vol, - const at::PackedTensorAccessor32 faceTable, - // const at::PackedTensorAccessor32 - // numVertsTable, - const float isolevel) { - uint id = blockIdx.x * blockDim.x + threadIdx.x; - uint num_threads = gridDim.x * blockDim.x; - const int faces_size = faces.size(0); - // Table mapping each edge to the corresponding cube vertices offsets - const int edgeToVertsTable[12][2] = { - {0, 1}, - {1, 5}, - {4, 5}, - {0, 4}, - {2, 3}, - {3, 7}, - {6, 7}, - {2, 6}, - {0, 2}, - {1, 3}, - {5, 7}, - {4, 6}, - }; - - // Table mapping from cubeindex to number of vertices in the configuration - const unsigned char numVertsTable[256] = { - 0, 3, 3, 6, 3, 6, 6, 9, 3, 6, 6, 9, 6, 9, 9, 6, 3, 6, - 6, 9, 6, 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, 3, 6, 6, 9, - 6, 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, 6, 9, 9, 6, 9, 12, - 12, 9, 9, 12, 12, 9, 12, 15, 15, 6, 3, 6, 6, 9, 6, 9, 9, 12, - 6, 9, 9, 12, 9, 12, 12, 9, 6, 9, 9, 12, 9, 12, 12, 15, 9, 12, - 12, 15, 12, 15, 15, 12, 6, 9, 9, 12, 9, 12, 6, 9, 9, 12, 12, 15, - 12, 15, 9, 6, 9, 12, 12, 9, 12, 15, 9, 6, 12, 15, 15, 12, 15, 6, - 12, 3, 3, 6, 6, 9, 6, 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, - 6, 9, 9, 12, 9, 12, 12, 15, 9, 6, 12, 9, 12, 9, 15, 6, 6, 9, - 9, 12, 9, 12, 12, 15, 9, 12, 12, 15, 12, 15, 15, 12, 9, 12, 12, 9, - 12, 15, 15, 12, 12, 9, 15, 6, 15, 12, 6, 3, 6, 9, 9, 12, 9, 12, - 12, 15, 9, 12, 12, 15, 6, 9, 9, 6, 9, 12, 12, 15, 12, 15, 15, 6, - 12, 9, 15, 12, 9, 6, 12, 3, 9, 12, 12, 15, 12, 15, 9, 12, 12, 15, - 15, 6, 9, 12, 6, 3, 6, 9, 9, 6, 9, 12, 6, 3, 9, 6, 12, 3, - 6, 3, 3, 0, - }; - - for (uint tid = id; tid < activeVoxels; tid += num_threads) { - uint voxel = compactedVoxelArray[tid]; // maps from accumulated id to - // original 3d voxel id - // mapping from offsets to vi index - int indexTable[8]{0, 1, 4, 5, 3, 2, 7, 6}; - // field value for each vertex - float val[8]; - // position for each vertex - float3 p[8]; - // 3d address - const uint D = vol.size(0) - 1; - const uint H = vol.size(1) - 1; - const uint W = vol.size(2) - 1; - - const int gx = voxel % W; - const int gy = voxel / W % H; - const int gz = voxel / (W * H); - - // recalculate cubeindex; - uint cubeindex = 0; - for (int i = 0; i < 8; i++) { - const int dx = i & 1; - const int dy = i >> 1 & 1; - const int dz = i >> 2 & 1; - - const int x = gx + dx; - const int y = gy + dy; - const int z = gz + dz; - - if (vol[z][y][x] < isolevel) { - cubeindex |= 1 << indexTable[i]; - } - val[indexTable[i]] = vol[z][y][x]; // maps from vi to volume - p[indexTable[i]] = make_float3(x, y, z); // maps from vi to position - } - - // Interpolate vertices where the surface intersects the cube - float3 vertlist[12]; - vertlist[0] = vertexInterp(isolevel, p[0], p[1], val[0], val[1]); - vertlist[1] = vertexInterp(isolevel, p[1], p[2], val[1], val[2]); - vertlist[2] = vertexInterp(isolevel, p[3], p[2], val[3], val[2]); - vertlist[3] = vertexInterp(isolevel, p[0], p[3], val[0], val[3]); - - vertlist[4] = vertexInterp(isolevel, p[4], p[5], val[4], val[5]); - vertlist[5] = vertexInterp(isolevel, p[5], p[6], val[5], val[6]); - vertlist[6] = vertexInterp(isolevel, p[7], p[6], val[7], val[6]); - vertlist[7] = vertexInterp(isolevel, p[4], p[7], val[4], val[7]); - - vertlist[8] = vertexInterp(isolevel, p[0], p[4], val[0], val[4]); - vertlist[9] = vertexInterp(isolevel, p[1], p[5], val[1], val[5]); - vertlist[10] = vertexInterp(isolevel, p[2], p[6], val[2], val[6]); - vertlist[11] = vertexInterp(isolevel, p[3], p[7], val[3], val[7]); - - // output triangle faces - uint numVerts = numVertsTable[cubeindex]; - - for (int i = 0; i < numVerts; i++) { - int index = numVertsScanned[voxel] + i; - unsigned char edge = faceTable[cubeindex][i]; - - uint v1 = edgeToVertsTable[edge][0]; - uint v2 = edgeToVertsTable[edge][1]; - uint v1_id = localToGlobal(v1, gx, gy, gz, W + 1, H + 1); - uint v2_id = localToGlobal(v2, gx, gy, gz, W + 1, H + 1); - int64_t edge_id = hashVpair(v1_id, v2_id, W + 1, H + 1, D + 1); - - verts[index][0] = vertlist[edge].x; - verts[index][1] = vertlist[edge].y; - verts[index][2] = vertlist[edge].z; - - if (index < faces_size) { - faces[index][0] = index * 3 + 0; - faces[index][1] = index * 3 + 1; - faces[index][2] = index * 3 + 2; - } - - ids[index] = edge_id; - } - } // end for grid-strided kernel -} - -// Entrance for marching cubes cuda extension. Marching Cubes is an algorithm to -// create triangle meshes from an implicit function (one of the form f(x, y, z) -// = 0). It works by iteratively checking a grid of cubes superimposed over a -// region of the function. The number of faces and positions of the vertices in -// each cube are determined by the the isolevel as well as the volume values -// from the eight vertices of the cube. -// -// We implement this algorithm with two forward passes where the first pass -// checks the occupancy and collects number of vertices for each cube. The -// second pass will skip empty voxels and generate vertices as well as faces for -// each cube through table lookup. The vertex positions, faces and identifiers -// for each vertex will be returned. -// -// -// Args: -// vol: torch tensor of shape (D, H, W) for volume scalar field -// isolevel: threshold to determine isosurface intesection -// -// Returns: -// tuple of : which stores vertex positions, face -// indices and integer identifiers for each vertex. -// verts: (N_verts, 3) FloatTensor for vertex positions -// faces: (N_faces, 3) LongTensor of face indices -// ids: (N_verts,) LongTensor used to identify each vertex. Vertices from -// adjacent edges can share the same 3d position. To reduce memory -// redudancy, we tag each vertex with a unique id for deduplication. In -// contrast to deduping on vertices, this has the benefit to avoid -// floating point precision issues. -// -std::tuple MarchingCubesCuda( - const at::Tensor& vol, - const float isolevel) { - // Set the device for the kernel launch based on the device of vol - at::cuda::CUDAGuard device_guard(vol.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - // transfer _FACE_TABLE data to device - at::Tensor face_table_tensor = at::zeros( - {256, 16}, at::TensorOptions().dtype(at::kInt).device(at::kCPU)); - auto face_table_a = face_table_tensor.accessor(); - for (int i = 0; i < 256; i++) { - for (int j = 0; j < 16; j++) { - face_table_a[i][j] = _FACE_TABLE[i][j]; - } - } - at::Tensor faceTable = face_table_tensor.to(vol.device()); - - // get numVoxels - int threads = 128; - const uint D = vol.size(0); - const uint H = vol.size(1); - const uint W = vol.size(2); - const int numVoxels = (D - 1) * (H - 1) * (W - 1); - dim3 grid((numVoxels + threads - 1) / threads, 1, 1); - if (grid.x > 65535) { - grid.x = 65535; - } - - using at::indexing::None; - using at::indexing::Slice; - - auto d_voxelVerts = - at::zeros({numVoxels + 1}, at::TensorOptions().dtype(at::kInt)) - .to(vol.device()); - auto d_voxelVerts_ = d_voxelVerts.index({Slice(1, None)}); - auto d_voxelOccupied = - at::zeros({numVoxels + 1}, at::TensorOptions().dtype(at::kInt)) - .to(vol.device()); - auto d_voxelOccupied_ = d_voxelOccupied.index({Slice(1, None)}); - - // Execute "ClassifyVoxelKernel" kernel to precompute - // two arrays - d_voxelOccupied and d_voxelVertices to global memory, - // which stores the occupancy state and number of voxel vertices per voxel. - ClassifyVoxelKernel<<>>( - d_voxelVerts_.packed_accessor32(), - d_voxelOccupied_.packed_accessor32(), - vol.packed_accessor32(), - isolevel); - AT_CUDA_CHECK(cudaGetLastError()); - cudaDeviceSynchronize(); - - // Scan "d_voxelOccupied" array to generate accumulated voxel occupancy - // count for voxels in the grid and compute the number of active voxels. - // If the number of active voxels is 0, return zero tensor for verts and - // faces. - - auto d_voxelOccupiedScan = at::cumsum(d_voxelOccupied, 0); - auto d_voxelOccupiedScan_ = d_voxelOccupiedScan.index({Slice(1, None)}); - - // number of active voxels - int64_t activeVoxels = d_voxelOccupiedScan[numVoxels].cpu().item(); - - const int device_id = vol.device().index(); - auto opt = at::TensorOptions().dtype(at::kInt).device(at::kCUDA, device_id); - auto opt_long = - at::TensorOptions().dtype(at::kLong).device(at::kCUDA, device_id); - - if (activeVoxels == 0) { - int ntris = 0; - at::Tensor verts = at::zeros({ntris * 3, 3}, vol.options()); - at::Tensor faces = at::zeros({ntris, 3}, opt_long); - at::Tensor ids = at::zeros({ntris}, opt_long); - return std::make_tuple(verts, faces, ids); - } - - // Execute "CompactVoxelsKernel" kernel to compress voxels for accleration. - // This allows us to run triangle generation on only the occupied voxels. - auto d_compVoxelArray = at::zeros({activeVoxels}, opt); - CompactVoxelsKernel<<>>( - d_compVoxelArray.packed_accessor32(), - d_voxelOccupied.packed_accessor32(), - d_voxelOccupiedScan_ - .packed_accessor32(), - numVoxels); - AT_CUDA_CHECK(cudaGetLastError()); - cudaDeviceSynchronize(); - - // Scan d_voxelVerts array to generate offsets of vertices for each voxel - auto d_voxelVertsScan = at::cumsum(d_voxelVerts, 0); - auto d_voxelVertsScan_ = d_voxelVertsScan.index({Slice(1, None)}); - - // total number of vertices - int64_t totalVerts = d_voxelVertsScan[numVoxels].cpu().item(); - - // Execute "GenerateFacesKernel" kernel - // This runs only on the occupied voxels. - // It looks up the field values and generates the triangle data. - at::Tensor verts = at::zeros({totalVerts, 3}, vol.options()); - at::Tensor faces = at::zeros({totalVerts / 3, 3}, opt_long); - - at::Tensor ids = at::zeros({totalVerts}, opt_long); - - dim3 grid2((activeVoxels + threads - 1) / threads, 1, 1); - if (grid2.x > 65535) { - grid2.x = 65535; - } - - GenerateFacesKernel<<>>( - verts.packed_accessor32(), - faces.packed_accessor(), - ids.packed_accessor(), - d_compVoxelArray.packed_accessor32(), - d_voxelVertsScan_.packed_accessor32(), - activeVoxels, - vol.packed_accessor32(), - faceTable.packed_accessor32(), - isolevel); - AT_CUDA_CHECK(cudaGetLastError()); - - return std::make_tuple(verts, faces, ids); -} diff --git a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.h b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.h deleted file mode 100644 index 51c660b18076014fd8717ef57eb93af328785f56..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include -#include "utils/pytorch3d_cutils.h" - -// Run Marching Cubes algorithm over a batch of volume scalar fields -// with a pre-defined threshold and return a mesh composed of vertices -// and faces for the mesh. -// -// Args: -// vol: FloatTensor of shape (D, H, W) giving a volume -// scalar grids. -// isolevel: isosurface value to use as the threshoold to determine whether -// the points are within a volume. -// -// Returns: -// vertices: (N_verts, 3) FloatTensor of vertices -// faces: (N_faces, 3) LongTensor of faces -// ids: (N_verts,) LongTensor used to identify each vertex and deduplication -// to avoid floating point precision issues. -// For Cuda, will be used to dedupe redundant vertices. -// For cpp implementation, this tensor is just a placeholder. - -// CPU implementation -std::tuple MarchingCubesCpu( - const at::Tensor& vol, - const float isolevel); - -// CUDA implementation -std::tuple MarchingCubesCuda( - const at::Tensor& vol, - const float isolevel); - -// Implementation which is exposed -inline std::tuple MarchingCubes( - const at::Tensor& vol, - const float isolevel) { - if (vol.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(vol); - const int D = vol.size(0); - const int H = vol.size(1); - const int W = vol.size(2); - if (D > 1024 || H > 1024 || W > 1024) { - AT_ERROR("Maximum volume size allowed 1K x 1K x 1K"); - } - return MarchingCubesCuda(vol.contiguous(), isolevel); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return MarchingCubesCpu(vol.contiguous(), isolevel); -} diff --git a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_cpu.cpp b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_cpu.cpp deleted file mode 100644 index fa128e714228fd4c4b699ee13071b752dd8fdf4c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_cpu.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include "marching_cubes/marching_cubes_utils.h" -#include "marching_cubes/tables.h" - -// Cpu implementation for Marching Cubes -// Args: -// vol: a Tensor of size (D, H, W) corresponding to a 3D scalar field -// isolevel: the isosurface value to use as the threshold to determine -// whether points are within a volume. -// -// Returns: -// vertices: a float tensor of shape (N_verts, 3) for positions of the mesh -// faces: a long tensor of shape (N_faces, 3) for indices of the face -// ids: a long tensor of shape (N_verts) as placeholder -// -std::tuple MarchingCubesCpu( - const at::Tensor& vol, - const float isolevel) { - // volume shapes - const int D = vol.size(0); - const int H = vol.size(1); - const int W = vol.size(2); - - // Create tensor accessors - auto vol_a = vol.accessor(); - // edge_id_to_v maps from an edge id to a vertex position - std::unordered_map edge_id_to_v; - // uniq_edge_id: used to remove redundant edge ids - std::unordered_map uniq_edge_id; - std::vector faces; // store face indices - std::vector verts; // store vertex positions - // enumerate each cell in the 3d grid - for (int z = 0; z < D - 1; z++) { - for (int y = 0; y < H - 1; y++) { - for (int x = 0; x < W - 1; x++) { - Cube cube(x, y, z, vol_a, isolevel); - // Cube is entirely in/out of the surface - if (_FACE_TABLE[cube.cubeindex][0] == 255) { - continue; - } - // store all boundary vertices that intersect with the edges - std::array interp_points; - // triangle vertex IDs and positions - std::vector tri; - std::vector ps; - - // Interpolate the vertices where the surface intersects with the cube - for (int j = 0; _FACE_TABLE[cube.cubeindex][j] != 255; j++) { - const int e = _FACE_TABLE[cube.cubeindex][j]; - interp_points[e] = cube.VertexInterp(isolevel, e, vol_a); - - int64_t edge = cube.HashVpair(e, W, H, D); - tri.push_back(edge); - ps.push_back(interp_points[e]); - - // Check if the triangle face is degenerate. A triangle face - // is degenerate if any of the two verices share the same 3D position - if ((j + 1) % 3 == 0 && ps[0] != ps[1] && ps[1] != ps[2] && - ps[2] != ps[0]) { - for (int k = 0; k < 3; k++) { - int64_t v = tri.at(k); - edge_id_to_v[v] = ps.at(k); - if (!uniq_edge_id.count(v)) { - uniq_edge_id[v] = verts.size(); - verts.push_back(edge_id_to_v[v]); - } - faces.push_back(uniq_edge_id[v]); - } - tri.clear(); - ps.clear(); - } // endif - } // endfor edge enumeration - } // endfor x - } // endfor y - } // endfor z - // Collect returning tensor - const int n_vertices = verts.size(); - const int64_t n_faces = (int64_t)faces.size() / 3; - auto vert_tensor = torch::zeros({n_vertices, 3}, torch::kFloat); - auto id_tensor = torch::zeros({n_vertices}, torch::kInt64); // placeholder - auto face_tensor = torch::zeros({n_faces, 3}, torch::kInt64); - - auto vert_a = vert_tensor.accessor(); - for (int i = 0; i < n_vertices; i++) { - vert_a[i][0] = verts.at(i).x; - vert_a[i][1] = verts.at(i).y; - vert_a[i][2] = verts.at(i).z; - } - - auto face_a = face_tensor.accessor(); - for (int64_t i = 0; i < n_faces; i++) { - face_a[i][0] = faces.at(i * 3 + 0); - face_a[i][1] = faces.at(i * 3 + 1); - face_a[i][2] = faces.at(i * 3 + 2); - } - - return std::make_tuple(vert_tensor, face_tensor, id_tensor); -} diff --git a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_utils.h b/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_utils.h deleted file mode 100644 index 486e0339eda613f2886bb3165a0fde1d0a5d6bf7..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/marching_cubes/marching_cubes_utils.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include -#include -#include "ATen/core/TensorAccessor.h" -#include "marching_cubes/tables.h" - -// EPS: Used to assess whether two float values are close -const float EPS = 1e-5; - -// Data structures for the marching cubes -struct Vertex { - // Constructor used when performing marching cube in each cell - explicit Vertex(float x = 0.0f, float y = 0.0f, float z = 0.0f) - : x(x), y(y), z(z) {} - - // The */+ operator overrides are used for vertex interpolation - Vertex operator*(float s) const { - return Vertex(x * s, y * s, z * s); - } - Vertex operator+(const Vertex& xyz) const { - return Vertex(x + xyz.x, y + xyz.y, z + xyz.z); - } - // The =/!= operator overrides is used for checking degenerate triangles - bool operator==(const Vertex& xyz) const { - return ( - std::abs(x - xyz.x) < EPS && std::abs(y - xyz.y) < EPS && - std::abs(z - xyz.z) < EPS); - } - bool operator!=(const Vertex& xyz) const { - return ( - std::abs(x - xyz.x) >= EPS || std::abs(y - xyz.y) >= EPS || - std::abs(z - xyz.z) >= EPS); - } - // vertex position - float x, y, z; -}; - -struct Cube { - // Edge and vertex convention: - // v4_______e4____________v5 - // /| /| - // / | / | - // e7/ | e5/ | - // /___|______e6_________/ | - // v7| | |v6 |e9 - // | | | | - // | |e8 |e10| - // e11| | | | - // | |_________________|___| - // | / v0 e0 | /v1 - // | / | / - // | /e3 | /e1 - // |/_____________________|/ - // v3 e2 v2 - - Vertex p[8]; - int x, y, z; - int cubeindex = 0; - Cube( - int x, - int y, - int z, - const at::TensorAccessor& vol_a, - const float isolevel) - : x(x), y(y), z(z) { - // vertex position (x, y, z) for v0-v1-v4-v5-v3-v2-v7-v6 - for (int v = 0; v < 8; v++) { - p[v] = Vertex(x + (v & 1), y + (v >> 1 & 1), z + (v >> 2 & 1)); - } - // Calculates cube configuration index given values of the cube vertices - for (int i = 0; i < 8; i++) { - const int idx = _INDEX_TABLE[i]; - Vertex v = p[idx]; - if (vol_a[v.z][v.y][v.x] < isolevel) { - cubeindex |= (1 << i); - } - } - } - - // Linearly interpolate the position where an isosurface cuts an edge - // between two vertices, based on their scalar values - // - // Args: - // isolevel: float value used as threshold - // edge: edge (ID) to interpolate - // cube: current cube vertices - // vol_a: 3D scalar field - // - // Returns: - // point: interpolated vertex - Vertex VertexInterp( - float isolevel, - const int edge, - const at::TensorAccessor& vol_a) { - const int v1 = _EDGE_TO_VERTICES[edge][0]; - const int v2 = _EDGE_TO_VERTICES[edge][1]; - Vertex p1 = p[v1]; - Vertex p2 = p[v2]; - float val1 = vol_a[p1.z][p1.y][p1.x]; - float val2 = vol_a[p2.z][p2.y][p2.x]; - - float ratio = 1.0f; - if (std::abs(isolevel - val1) < EPS) { - return p1; - } else if (std::abs(isolevel - val2) < EPS) { - return p2; - } else if (std::abs(val1 - val2) < EPS) { - return p1; - } - // interpolate vertex p based on two vertices on the edge - ratio = (isolevel - val1) / (val2 - val1); - return p1 * (1 - ratio) + p2 * ratio; - } - - // Hash an edge into a global edge_id. The function binds an - // edge with an integer to address floating point precision issue. - // - // Args: - // v1_id: global id of vertex 1 - // v2_id: global id of vertex 2 - // W: width of the 3d grid - // H: height of the 3d grid - // D: depth of the 3d grid - // - // Returns: - // hashing for a pair of vertex ids - // - int64_t HashVpair(const int edge, int W, int H, int D) { - const int v1 = _EDGE_TO_VERTICES[edge][0]; - const int v2 = _EDGE_TO_VERTICES[edge][1]; - const int v1_id = p[v1].x + p[v1].y * W + p[v1].z * W * H; - const int v2_id = p[v2].x + p[v2].y * W + p[v2].z * W * H; - return (int64_t)v1_id * (W + W * H + W * H * D) + (int64_t)v2_id; - } -}; diff --git a/pytorch3d/pytorch3d/csrc/marching_cubes/tables.h b/pytorch3d/pytorch3d/csrc/marching_cubes/tables.h deleted file mode 100644 index 3aff617c53e5ba963c014cabd7beb1c2cd61a053..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/marching_cubes/tables.h +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -using uint = unsigned int; - -// A table mapping from cubeindex to a list of face configurations. -// Each list contains at most 5 faces, where each face is represented with -// 3 consecutive numbers -// Table adapted from http://paulbourke.net/geometry/polygonise/ -// -#define X 255 -const unsigned char _FACE_TABLE[256][16] = { - {X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {0, 8, 3, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {0, 1, 9, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {1, 8, 3, 9, 8, 1, X, X, X, X, X, X, X, X, X, X}, - {1, 2, 10, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {0, 8, 3, 1, 2, 10, X, X, X, X, X, X, X, X, X, X}, - {9, 2, 10, 0, 2, 9, X, X, X, X, X, X, X, X, X, X}, - {2, 8, 3, 2, 10, 8, 10, 9, 8, X, X, X, X, X, X, X}, - {3, 11, 2, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {0, 11, 2, 8, 11, 0, X, X, X, X, X, X, X, X, X, X}, - {1, 9, 0, 2, 3, 11, X, X, X, X, X, X, X, X, X, X}, - {1, 11, 2, 1, 9, 11, 9, 8, 11, X, X, X, X, X, X, X}, - {3, 10, 1, 11, 10, 3, X, X, X, X, X, X, X, X, X, X}, - {0, 10, 1, 0, 8, 10, 8, 11, 10, X, X, X, X, X, X, X}, - {3, 9, 0, 3, 11, 9, 11, 10, 9, X, X, X, X, X, X, X}, - {9, 8, 10, 10, 8, 11, X, X, X, X, X, X, X, X, X, X}, - {4, 7, 8, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {4, 3, 0, 7, 3, 4, X, X, X, X, X, X, X, X, X, X}, - {0, 1, 9, 8, 4, 7, X, X, X, X, X, X, X, X, X, X}, - {4, 1, 9, 4, 7, 1, 7, 3, 1, X, X, X, X, X, X, X}, - {1, 2, 10, 8, 4, 7, X, X, X, X, X, X, X, X, X, X}, - {3, 4, 7, 3, 0, 4, 1, 2, 10, X, X, X, X, X, X, X}, - {9, 2, 10, 9, 0, 2, 8, 4, 7, X, X, X, X, X, X, X}, - {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, X, X, X, X}, - {8, 4, 7, 3, 11, 2, X, X, X, X, X, X, X, X, X, X}, - {11, 4, 7, 11, 2, 4, 2, 0, 4, X, X, X, X, X, X, X}, - {9, 0, 1, 8, 4, 7, 2, 3, 11, X, X, X, X, X, X, X}, - {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, X, X, X, X}, - {3, 10, 1, 3, 11, 10, 7, 8, 4, X, X, X, X, X, X, X}, - {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, X, X, X, X}, - {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, X, X, X, X}, - {4, 7, 11, 4, 11, 9, 9, 11, 10, X, X, X, X, X, X, X}, - {9, 5, 4, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {9, 5, 4, 0, 8, 3, X, X, X, X, X, X, X, X, X, X}, - {0, 5, 4, 1, 5, 0, X, X, X, X, X, X, X, X, X, X}, - {8, 5, 4, 8, 3, 5, 3, 1, 5, X, X, X, X, X, X, X}, - {1, 2, 10, 9, 5, 4, X, X, X, X, X, X, X, X, X, X}, - {3, 0, 8, 1, 2, 10, 4, 9, 5, X, X, X, X, X, X, X}, - {5, 2, 10, 5, 4, 2, 4, 0, 2, X, X, X, X, X, X, X}, - {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, X, X, X, X}, - {9, 5, 4, 2, 3, 11, X, X, X, X, X, X, X, X, X, X}, - {0, 11, 2, 0, 8, 11, 4, 9, 5, X, X, X, X, X, X, X}, - {0, 5, 4, 0, 1, 5, 2, 3, 11, X, X, X, X, X, X, X}, - {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, X, X, X, X}, - {10, 3, 11, 10, 1, 3, 9, 5, 4, X, X, X, X, X, X, X}, - {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, X, X, X, X}, - {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, X, X, X, X}, - {5, 4, 8, 5, 8, 10, 10, 8, 11, X, X, X, X, X, X, X}, - {9, 7, 8, 5, 7, 9, X, X, X, X, X, X, X, X, X, X}, - {9, 3, 0, 9, 5, 3, 5, 7, 3, X, X, X, X, X, X, X}, - {0, 7, 8, 0, 1, 7, 1, 5, 7, X, X, X, X, X, X, X}, - {1, 5, 3, 3, 5, 7, X, X, X, X, X, X, X, X, X, X}, - {9, 7, 8, 9, 5, 7, 10, 1, 2, X, X, X, X, X, X, X}, - {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, X, X, X, X}, - {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, X, X, X, X}, - {2, 10, 5, 2, 5, 3, 3, 5, 7, X, X, X, X, X, X, X}, - {7, 9, 5, 7, 8, 9, 3, 11, 2, X, X, X, X, X, X, X}, - {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, X, X, X, X}, - {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, X, X, X, X}, - {11, 2, 1, 11, 1, 7, 7, 1, 5, X, X, X, X, X, X, X}, - {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, X, X, X, X}, - {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, X}, - {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, X}, - {11, 10, 5, 7, 11, 5, X, X, X, X, X, X, X, X, X, X}, - {10, 6, 5, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {0, 8, 3, 5, 10, 6, X, X, X, X, X, X, X, X, X, X}, - {9, 0, 1, 5, 10, 6, X, X, X, X, X, X, X, X, X, X}, - {1, 8, 3, 1, 9, 8, 5, 10, 6, X, X, X, X, X, X, X}, - {1, 6, 5, 2, 6, 1, X, X, X, X, X, X, X, X, X, X}, - {1, 6, 5, 1, 2, 6, 3, 0, 8, X, X, X, X, X, X, X}, - {9, 6, 5, 9, 0, 6, 0, 2, 6, X, X, X, X, X, X, X}, - {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, X, X, X, X}, - {2, 3, 11, 10, 6, 5, X, X, X, X, X, X, X, X, X, X}, - {11, 0, 8, 11, 2, 0, 10, 6, 5, X, X, X, X, X, X, X}, - {0, 1, 9, 2, 3, 11, 5, 10, 6, X, X, X, X, X, X, X}, - {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, X, X, X, X}, - {6, 3, 11, 6, 5, 3, 5, 1, 3, X, X, X, X, X, X, X}, - {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, X, X, X, X}, - {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, X, X, X, X}, - {6, 5, 9, 6, 9, 11, 11, 9, 8, X, X, X, X, X, X, X}, - {5, 10, 6, 4, 7, 8, X, X, X, X, X, X, X, X, X, X}, - {4, 3, 0, 4, 7, 3, 6, 5, 10, X, X, X, X, X, X, X}, - {1, 9, 0, 5, 10, 6, 8, 4, 7, X, X, X, X, X, X, X}, - {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, X, X, X, X}, - {6, 1, 2, 6, 5, 1, 4, 7, 8, X, X, X, X, X, X, X}, - {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, X, X, X, X}, - {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, X, X, X, X}, - {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, X}, - {3, 11, 2, 7, 8, 4, 10, 6, 5, X, X, X, X, X, X, X}, - {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, X, X, X, X}, - {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, X, X, X, X}, - {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, X}, - {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, X, X, X, X}, - {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, X}, - {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, X}, - {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, X, X, X, X}, - {10, 4, 9, 6, 4, 10, X, X, X, X, X, X, X, X, X, X}, - {4, 10, 6, 4, 9, 10, 0, 8, 3, X, X, X, X, X, X, X}, - {10, 0, 1, 10, 6, 0, 6, 4, 0, X, X, X, X, X, X, X}, - {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, X, X, X, X}, - {1, 4, 9, 1, 2, 4, 2, 6, 4, X, X, X, X, X, X, X}, - {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, X, X, X, X}, - {0, 2, 4, 4, 2, 6, X, X, X, X, X, X, X, X, X, X}, - {8, 3, 2, 8, 2, 4, 4, 2, 6, X, X, X, X, X, X, X}, - {10, 4, 9, 10, 6, 4, 11, 2, 3, X, X, X, X, X, X, X}, - {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, X, X, X, X}, - {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, X, X, X, X}, - {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, X}, - {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, X, X, X, X}, - {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, X}, - {3, 11, 6, 3, 6, 0, 0, 6, 4, X, X, X, X, X, X, X}, - {6, 4, 8, 11, 6, 8, X, X, X, X, X, X, X, X, X, X}, - {7, 10, 6, 7, 8, 10, 8, 9, 10, X, X, X, X, X, X, X}, - {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, X, X, X, X}, - {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, X, X, X, X}, - {10, 6, 7, 10, 7, 1, 1, 7, 3, X, X, X, X, X, X, X}, - {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, X, X, X, X}, - {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, X}, - {7, 8, 0, 7, 0, 6, 6, 0, 2, X, X, X, X, X, X, X}, - {7, 3, 2, 6, 7, 2, X, X, X, X, X, X, X, X, X, X}, - {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, X, X, X, X}, - {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, X}, - {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, X}, - {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, X, X, X, X}, - {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, X}, - {0, 9, 1, 11, 6, 7, X, X, X, X, X, X, X, X, X, X}, - {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, X, X, X, X}, - {7, 11, 6, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {7, 6, 11, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {3, 0, 8, 11, 7, 6, X, X, X, X, X, X, X, X, X, X}, - {0, 1, 9, 11, 7, 6, X, X, X, X, X, X, X, X, X, X}, - {8, 1, 9, 8, 3, 1, 11, 7, 6, X, X, X, X, X, X, X}, - {10, 1, 2, 6, 11, 7, X, X, X, X, X, X, X, X, X, X}, - {1, 2, 10, 3, 0, 8, 6, 11, 7, X, X, X, X, X, X, X}, - {2, 9, 0, 2, 10, 9, 6, 11, 7, X, X, X, X, X, X, X}, - {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, X, X, X, X}, - {7, 2, 3, 6, 2, 7, X, X, X, X, X, X, X, X, X, X}, - {7, 0, 8, 7, 6, 0, 6, 2, 0, X, X, X, X, X, X, X}, - {2, 7, 6, 2, 3, 7, 0, 1, 9, X, X, X, X, X, X, X}, - {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, X, X, X, X}, - {10, 7, 6, 10, 1, 7, 1, 3, 7, X, X, X, X, X, X, X}, - {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, X, X, X, X}, - {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, X, X, X, X}, - {7, 6, 10, 7, 10, 8, 8, 10, 9, X, X, X, X, X, X, X}, - {6, 8, 4, 11, 8, 6, X, X, X, X, X, X, X, X, X, X}, - {3, 6, 11, 3, 0, 6, 0, 4, 6, X, X, X, X, X, X, X}, - {8, 6, 11, 8, 4, 6, 9, 0, 1, X, X, X, X, X, X, X}, - {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, X, X, X, X}, - {6, 8, 4, 6, 11, 8, 2, 10, 1, X, X, X, X, X, X, X}, - {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, X, X, X, X}, - {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, X, X, X, X}, - {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, X}, - {8, 2, 3, 8, 4, 2, 4, 6, 2, X, X, X, X, X, X, X}, - {0, 4, 2, 4, 6, 2, X, X, X, X, X, X, X, X, X, X}, - {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, X, X, X, X}, - {1, 9, 4, 1, 4, 2, 2, 4, 6, X, X, X, X, X, X, X}, - {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, X, X, X, X}, - {10, 1, 0, 10, 0, 6, 6, 0, 4, X, X, X, X, X, X, X}, - {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, X}, - {10, 9, 4, 6, 10, 4, X, X, X, X, X, X, X, X, X, X}, - {4, 9, 5, 7, 6, 11, X, X, X, X, X, X, X, X, X, X}, - {0, 8, 3, 4, 9, 5, 11, 7, 6, X, X, X, X, X, X, X}, - {5, 0, 1, 5, 4, 0, 7, 6, 11, X, X, X, X, X, X, X}, - {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, X, X, X, X}, - {9, 5, 4, 10, 1, 2, 7, 6, 11, X, X, X, X, X, X, X}, - {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, X, X, X, X}, - {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, X, X, X, X}, - {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, X}, - {7, 2, 3, 7, 6, 2, 5, 4, 9, X, X, X, X, X, X, X}, - {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, X, X, X, X}, - {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, X, X, X, X}, - {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, X}, - {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, X, X, X, X}, - {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, X}, - {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, X}, - {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, X, X, X, X}, - {6, 9, 5, 6, 11, 9, 11, 8, 9, X, X, X, X, X, X, X}, - {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, X, X, X, X}, - {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, X, X, X, X}, - {6, 11, 3, 6, 3, 5, 5, 3, 1, X, X, X, X, X, X, X}, - {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, X, X, X, X}, - {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, X}, - {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, X}, - {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, X, X, X, X}, - {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, X, X, X, X}, - {9, 5, 6, 9, 6, 0, 0, 6, 2, X, X, X, X, X, X, X}, - {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, X}, - {1, 5, 6, 2, 1, 6, X, X, X, X, X, X, X, X, X, X}, - {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, X}, - {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, X, X, X, X}, - {0, 3, 8, 5, 6, 10, X, X, X, X, X, X, X, X, X, X}, - {10, 5, 6, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {11, 5, 10, 7, 5, 11, X, X, X, X, X, X, X, X, X, X}, - {11, 5, 10, 11, 7, 5, 8, 3, 0, X, X, X, X, X, X, X}, - {5, 11, 7, 5, 10, 11, 1, 9, 0, X, X, X, X, X, X, X}, - {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, X, X, X, X}, - {11, 1, 2, 11, 7, 1, 7, 5, 1, X, X, X, X, X, X, X}, - {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, X, X, X, X}, - {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, X, X, X, X}, - {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, X}, - {2, 5, 10, 2, 3, 5, 3, 7, 5, X, X, X, X, X, X, X}, - {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, X, X, X, X}, - {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, X, X, X, X}, - {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, X}, - {1, 3, 5, 3, 7, 5, X, X, X, X, X, X, X, X, X, X}, - {0, 8, 7, 0, 7, 1, 1, 7, 5, X, X, X, X, X, X, X}, - {9, 0, 3, 9, 3, 5, 5, 3, 7, X, X, X, X, X, X, X}, - {9, 8, 7, 5, 9, 7, X, X, X, X, X, X, X, X, X, X}, - {5, 8, 4, 5, 10, 8, 10, 11, 8, X, X, X, X, X, X, X}, - {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, X, X, X, X}, - {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, X, X, X, X}, - {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, X}, - {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, X, X, X, X}, - {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, X}, - {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, X}, - {9, 4, 5, 2, 11, 3, X, X, X, X, X, X, X, X, X, X}, - {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, X, X, X, X}, - {5, 10, 2, 5, 2, 4, 4, 2, 0, X, X, X, X, X, X, X}, - {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, X}, - {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, X, X, X, X}, - {8, 4, 5, 8, 5, 3, 3, 5, 1, X, X, X, X, X, X, X}, - {0, 4, 5, 1, 0, 5, X, X, X, X, X, X, X, X, X, X}, - {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, X, X, X, X}, - {9, 4, 5, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {4, 11, 7, 4, 9, 11, 9, 10, 11, X, X, X, X, X, X, X}, - {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, X, X, X, X}, - {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, X, X, X, X}, - {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, X}, - {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, X, X, X, X}, - {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, X}, - {11, 7, 4, 11, 4, 2, 2, 4, 0, X, X, X, X, X, X, X}, - {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, X, X, X, X}, - {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, X, X, X, X}, - {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, X}, - {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, X}, - {1, 10, 2, 8, 7, 4, X, X, X, X, X, X, X, X, X, X}, - {4, 9, 1, 4, 1, 7, 7, 1, 3, X, X, X, X, X, X, X}, - {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, X, X, X, X}, - {4, 0, 3, 7, 4, 3, X, X, X, X, X, X, X, X, X, X}, - {4, 8, 7, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {9, 10, 8, 10, 11, 8, X, X, X, X, X, X, X, X, X, X}, - {3, 0, 9, 3, 9, 11, 11, 9, 10, X, X, X, X, X, X, X}, - {0, 1, 10, 0, 10, 8, 8, 10, 11, X, X, X, X, X, X, X}, - {3, 1, 10, 11, 3, 10, X, X, X, X, X, X, X, X, X, X}, - {1, 2, 11, 1, 11, 9, 9, 11, 8, X, X, X, X, X, X, X}, - {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, X, X, X, X}, - {0, 2, 11, 8, 0, 11, X, X, X, X, X, X, X, X, X, X}, - {3, 2, 11, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {2, 3, 8, 2, 8, 10, 10, 8, 9, X, X, X, X, X, X, X}, - {9, 10, 2, 0, 9, 2, X, X, X, X, X, X, X, X, X, X}, - {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, X, X, X, X}, - {1, 10, 2, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {1, 3, 8, 9, 1, 8, X, X, X, X, X, X, X, X, X, X}, - {0, 9, 1, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {0, 3, 8, X, X, X, X, X, X, X, X, X, X, X, X, X}, - {X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X}}; -#undef X - -// Table mapping each edge to the corresponding cube vertices offsets -const uint _EDGE_TO_VERTICES[12][2] = { - {0, 1}, - {1, 5}, - {4, 5}, - {0, 4}, - {2, 3}, - {3, 7}, - {6, 7}, - {2, 6}, - {0, 2}, - {1, 3}, - {5, 7}, - {4, 6}, -}; - -// Table mapping from 0-7 to v0-v7 in cube.vertices -const int _INDEX_TABLE[8] = {0, 1, 5, 4, 2, 3, 7, 6}; diff --git a/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency.h b/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency.h deleted file mode 100644 index 17795ae1eb4b6bceb8a9960bc6d7523eb3c2acb6..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include "utils/pytorch3d_cutils.h" - -// For mesh_normal_consistency, find pairs of vertices opposite the same edge. -// -// Args: -// edge_num: int64 Tensor of shape (E,) giving the number of vertices -// corresponding to each edge. -// -// Returns: -// pairs: int64 Tensor of shape (N,2) - -at::Tensor MeshNormalConsistencyFindVerticesCpu(const at::Tensor& edge_num); - -// Exposed implementation. -at::Tensor MeshNormalConsistencyFindVertices(const at::Tensor& edge_num) { - if (edge_num.is_cuda()) { - AT_ERROR("This function needs a CPU tensor."); - } - return MeshNormalConsistencyFindVerticesCpu(edge_num); -} diff --git a/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency_cpu.cpp b/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency_cpu.cpp deleted file mode 100644 index 1b0d5e6ea0a76cb2aac5e3f6ddb8eca77635da97..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/mesh_normal_consistency/mesh_normal_consistency_cpu.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -at::Tensor MeshNormalConsistencyFindVerticesCpu(const at::Tensor& edge_num) { - // We take a LongTensor of shape (E,) giving the number of things intersecting - // each edge. The things are taken to be numbered in order. - // (In fact, the "things" are opposite vertices to edges, renumbered). - // We return a tensor of shape (?, 2) where for every pair of things which - // intersect the same edge there is a row of their numbers in the output. - - // Example possible inputs and outputs (order of output is not specified): - // [1,0,1,1,0] => [[]] - // [3] => [[0,1], [0,2], [1,2]] - // [0,3] => [[0,1], [0,2], [1,2]] - // [1,3] => [[1,2], [1,3], [2,3]] - //[1,0,2,1,0,2] => [[1,2], [4,5]] - - const auto num_edges = edge_num.size(0); - auto edges_a = edge_num.accessor(); - - int64_t vert_idx = 0; - std::vector> pairs; - for (int64_t i_edge = 0; i_edge < num_edges; ++i_edge) { - int64_t e = edges_a[i_edge]; - for (int64_t j = 0; j < e; ++j) { - for (int64_t i = 0; i < j; ++i) { - pairs.emplace_back(vert_idx + i, vert_idx + j); - } - } - vert_idx += e; - } - - // Convert from std::vector by copying over the items to a new empty torch - // tensor. - auto pairs_tensor = at::empty({(int64_t)pairs.size(), 2}, edge_num.options()); - auto pairs_a = pairs_tensor.accessor(); - for (int64_t i_pair = 0; i_pair < pairs.size(); ++i_pair) { - auto accessor = pairs_a[i_pair]; - accessor[0] = pairs[i_pair].first; - accessor[1] = pairs[i_pair].second; - } - - return pairs_tensor; -} diff --git a/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.cu b/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.cu deleted file mode 100644 index 94f22c18431bb8bc4557584acdd5894155a17e37..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.cu +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -// Kernel for inputs_packed of shape (F, D), where D > 1 -template -__global__ void PackedToPaddedKernel( - const scalar_t* __restrict__ inputs_packed, - const int64_t* __restrict__ first_idxs, - scalar_t* __restrict__ inputs_padded, - const size_t batch_size, - const size_t max_size, - const size_t num_inputs, - const size_t D) { - // Batch elements split evenly across blocks (num blocks = batch_size) and - // values for each element split across threads in the block. Each thread adds - // the values of its respective input elements to the global inputs_padded - // tensor. - const size_t tid = threadIdx.x; - const size_t batch_idx = blockIdx.x; - - const int64_t start = first_idxs[batch_idx]; - const int64_t end = - batch_idx + 1 < batch_size ? first_idxs[batch_idx + 1] : num_inputs; - const int num = end - start; - for (size_t f = tid; f < num; f += blockDim.x) { - for (size_t j = 0; j < D; ++j) { - inputs_padded[batch_idx * max_size * D + f * D + j] = - inputs_packed[(start + f) * D + j]; - } - } -} - -// Kernel for inputs of shape (F, 1) -template -__global__ void PackedToPaddedKernelD1( - const scalar_t* __restrict__ inputs_packed, - const int64_t* __restrict__ first_idxs, - scalar_t* __restrict__ inputs_padded, - const size_t batch_size, - const size_t max_size, - const size_t num_inputs) { - // Batch elements split evenly across blocks (num blocks = batch_size) and - // values for each element split across threads in the block. Each thread adds - // the values of its respective input elements to the global inputs_padded - // tensor. - const size_t tid = threadIdx.x; - const size_t batch_idx = blockIdx.x; - - const int64_t start = first_idxs[batch_idx]; - const int64_t end = - batch_idx + 1 < batch_size ? first_idxs[batch_idx + 1] : num_inputs; - const int num = end - start; - for (size_t f = tid; f < num; f += blockDim.x) { - inputs_padded[batch_idx * max_size + f] = inputs_packed[start + f]; - } -} - -// Kernel for inputs_padded of shape (B, F, D), where D > 1 -template -__global__ void PaddedToPackedKernel( - const scalar_t* __restrict__ inputs_padded, - const int64_t* __restrict__ first_idxs, - scalar_t* __restrict__ inputs_packed, - const size_t batch_size, - const size_t max_size, - const size_t num_inputs, - const size_t D) { - // Batch elements split evenly across blocks (num blocks = batch_size) and - // values for each element split across threads in the block. Each thread adds - // the values of its respective input elements to the global inputs_packed - // tensor. - const size_t tid = threadIdx.x; - const size_t batch_idx = blockIdx.x; - - const int64_t start = first_idxs[batch_idx]; - const int64_t end = - batch_idx + 1 < batch_size ? first_idxs[batch_idx + 1] : num_inputs; - const int num = end - start; - for (size_t f = tid; f < num; f += blockDim.x) { - for (size_t j = 0; j < D; ++j) { - inputs_packed[(start + f) * D + j] = - inputs_padded[batch_idx * max_size * D + f * D + j]; - } - } -} - -// Kernel for inputs_padded of shape (B, F, 1) -template -__global__ void PaddedToPackedKernelD1( - const scalar_t* __restrict__ inputs_padded, - const int64_t* __restrict__ first_idxs, - scalar_t* __restrict__ inputs_packed, - const size_t batch_size, - const size_t max_size, - const size_t num_inputs) { - // Batch elements split evenly across blocks (num blocks = batch_size) and - // values for each element split across threads in the block. Each thread adds - // the values of its respective input elements to the global inputs_packed - // tensor. - const size_t tid = threadIdx.x; - const size_t batch_idx = blockIdx.x; - - const int64_t start = first_idxs[batch_idx]; - const int64_t end = - batch_idx + 1 < batch_size ? first_idxs[batch_idx + 1] : num_inputs; - const int num = end - start; - for (size_t f = tid; f < num; f += blockDim.x) { - inputs_packed[start + f] = inputs_padded[batch_idx * max_size + f]; - } -} - -at::Tensor PackedToPaddedCuda( - const at::Tensor inputs_packed, - const at::Tensor first_idxs, - const int64_t max_size) { - // Check inputs are on the same device - at::TensorArg inputs_packed_t{inputs_packed, "inputs_packed", 1}, - first_idxs_t{first_idxs, "first_idxs", 2}; - at::CheckedFrom c = "PackedToPaddedCuda"; - at::checkAllSameGPU(c, {inputs_packed_t, first_idxs_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(inputs_packed.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t num_inputs = inputs_packed.size(0); - const int64_t batch_size = first_idxs.size(0); - - TORCH_CHECK( - inputs_packed.dim() == 2, "inputs_packed must be a 2-dimensional tensor"); - const int64_t D = inputs_packed.size(1); - at::Tensor inputs_padded = - at::zeros({batch_size, max_size, D}, inputs_packed.options()); - - if (inputs_padded.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return inputs_padded; - } - - const int threads = 512; - const int blocks = batch_size; - if (D == 1) { - AT_DISPATCH_FLOATING_TYPES( - inputs_packed.scalar_type(), "packed_to_padded_d1_kernel", ([&] { - PackedToPaddedKernelD1<<>>( - inputs_packed.contiguous().data_ptr(), - first_idxs.contiguous().data_ptr(), - inputs_padded.data_ptr(), - batch_size, - max_size, - num_inputs); - })); - } else { - AT_DISPATCH_FLOATING_TYPES( - inputs_packed.scalar_type(), "packed_to_padded_kernel", ([&] { - PackedToPaddedKernel<<>>( - inputs_packed.contiguous().data_ptr(), - first_idxs.contiguous().data_ptr(), - inputs_padded.data_ptr(), - batch_size, - max_size, - num_inputs, - D); - })); - } - - AT_CUDA_CHECK(cudaGetLastError()); - return inputs_padded; -} - -at::Tensor PaddedToPackedCuda( - const at::Tensor inputs_padded, - const at::Tensor first_idxs, - const int64_t num_inputs) { - // Check inputs are on the same device - at::TensorArg inputs_padded_t{inputs_padded, "inputs_padded", 1}, - first_idxs_t{first_idxs, "first_idxs", 2}; - at::CheckedFrom c = "PaddedToPackedCuda"; - at::checkAllSameGPU(c, {inputs_padded_t, first_idxs_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(inputs_padded.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t batch_size = inputs_padded.size(0); - const int64_t max_size = inputs_padded.size(1); - - TORCH_CHECK(batch_size == first_idxs.size(0), "sizes mismatch"); - TORCH_CHECK( - inputs_padded.dim() == 3, - "inputs_padded must be a 3-dimensional tensor"); - const int64_t D = inputs_padded.size(2); - - at::Tensor inputs_packed = - at::zeros({num_inputs, D}, inputs_padded.options()); - - if (inputs_packed.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return inputs_packed; - } - - const int threads = 512; - const int blocks = batch_size; - - if (D == 1) { - AT_DISPATCH_FLOATING_TYPES( - inputs_padded.scalar_type(), "padded_to_packed_d1_kernel", ([&] { - PaddedToPackedKernelD1<<>>( - inputs_padded.contiguous().data_ptr(), - first_idxs.contiguous().data_ptr(), - inputs_packed.data_ptr(), - batch_size, - max_size, - num_inputs); - })); - } else { - AT_DISPATCH_FLOATING_TYPES( - inputs_padded.scalar_type(), "padded_to_packed_kernel", ([&] { - PaddedToPackedKernel<<>>( - inputs_padded.contiguous().data_ptr(), - first_idxs.contiguous().data_ptr(), - inputs_packed.data_ptr(), - batch_size, - max_size, - num_inputs, - D); - })); - } - - AT_CUDA_CHECK(cudaGetLastError()); - return inputs_packed; -} diff --git a/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h b/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h deleted file mode 100644 index 27bbc3bf03d1e195f44d36662c93d9db85adae68..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include "utils/pytorch3d_cutils.h" - -// PackedToPadded -// Converts a packed tensor into a padded tensor, restoring the batch dimension. -// Refer to pytorch3d/structures/meshes.py for details on packed/padded tensors. -// -// Inputs: -// inputs_packed: FloatTensor of shape (F, D), representing the packed batch -// tensor, e.g. areas for faces in a batch of meshes. -// first_idxs: LongTensor of shape (N,) where N is the number of -// elements in the batch and `first_idxs[i] = f` -// means that the inputs for batch element i begin at -// `inputs[f]`. -// max_size: Max length of an element in the batch. -// Returns: -// inputs_padded: FloatTensor of shape (N, max_size, D) where max_size is max -// of `sizes`. The values for batch element i which start at -// `inputs_packed[first_idxs[i]]` will be copied to -// `inputs_padded[i, :]`, with zeros padding out the extra -// inputs. -// - -// PaddedToPacked -// Converts a padded tensor into a packed tensor. -// Refer to pytorch3d/structures/meshes.py for details on packed/padded tensors. -// -// Inputs: -// inputs_padded: FloatTensor of shape (N, max_size, D), representing the -// padded tensor, e.g. areas for faces in a batch of meshes. -// first_idxs: LongTensor of shape (N,) where N is the number of -// elements in the batch and `first_idxs[i] = f` -// means that the inputs for batch element i begin at -// `inputs_packed[f]`. -// num_inputs: Number of packed entries (= F) -// Returns: -// inputs_packed: FloatTensor of shape (F, D), where -// `inputs_packed[first_idx[i]:] = inputs_padded[i, :]`. -// -// - -// Cpu implementation. -at::Tensor PackedToPaddedCpu( - const at::Tensor inputs_packed, - const at::Tensor first_idxs, - const int64_t max_size); - -// Cpu implementation. -at::Tensor PaddedToPackedCpu( - const at::Tensor inputs_padded, - const at::Tensor first_idxs, - const int64_t num_inputs); - -#ifdef WITH_CUDA -// Cuda implementation. -at::Tensor PackedToPaddedCuda( - const at::Tensor inputs_packed, - const at::Tensor first_idxs, - const int64_t max_size); - -// Cuda implementation. -at::Tensor PaddedToPackedCuda( - const at::Tensor inputs_padded, - const at::Tensor first_idxs, - const int64_t num_inputs); -#endif - -// Implementation which is exposed. -at::Tensor PackedToPadded( - const at::Tensor inputs_packed, - const at::Tensor first_idxs, - const int64_t max_size) { - if (inputs_packed.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(inputs_packed); - CHECK_CUDA(first_idxs); - return PackedToPaddedCuda(inputs_packed, first_idxs, max_size); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return PackedToPaddedCpu(inputs_packed, first_idxs, max_size); -} - -// Implementation which is exposed. -at::Tensor PaddedToPacked( - const at::Tensor inputs_padded, - const at::Tensor first_idxs, - const int64_t num_inputs) { - if (inputs_padded.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(inputs_padded); - CHECK_CUDA(first_idxs); - return PaddedToPackedCuda(inputs_padded, first_idxs, num_inputs); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return PaddedToPackedCpu(inputs_padded, first_idxs, num_inputs); -} diff --git a/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor_cpu.cpp b/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor_cpu.cpp deleted file mode 100644 index c9176a1afd5e6736f938dc938dfc1d62c1052ddc..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor_cpu.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include - -at::Tensor PackedToPaddedCpu( - const at::Tensor inputs_packed, - const at::Tensor first_idxs, - const int64_t max_size) { - const int64_t num_inputs = inputs_packed.size(0); - const int64_t batch_size = first_idxs.size(0); - - AT_ASSERTM( - inputs_packed.dim() == 2, "inputs_packed must be a 2-dimensional tensor"); - const int64_t D = inputs_packed.size(1); - - torch::Tensor inputs_padded = - torch::zeros({batch_size, max_size, D}, inputs_packed.options()); - - auto inputs_packed_a = inputs_packed.accessor(); - auto first_idxs_a = first_idxs.accessor(); - auto inputs_padded_a = inputs_padded.accessor(); - - for (int b = 0; b < batch_size; ++b) { - const int64_t start = first_idxs_a[b]; - const int64_t end = b + 1 < batch_size ? first_idxs_a[b + 1] : num_inputs; - const int64_t num = end - start; - for (int i = 0; i < num; ++i) { - for (int j = 0; j < D; ++j) { - inputs_padded_a[b][i][j] = inputs_packed_a[start + i][j]; - } - } - } - return inputs_padded; -} - -at::Tensor PaddedToPackedCpu( - const at::Tensor inputs_padded, - const at::Tensor first_idxs, - const int64_t num_inputs) { - const int64_t batch_size = inputs_padded.size(0); - - AT_ASSERTM( - inputs_padded.dim() == 3, "inputs_padded must be a 3-dimensional tensor"); - const int64_t D = inputs_padded.size(2); - - torch::Tensor inputs_packed = - torch::zeros({num_inputs, D}, inputs_padded.options()); - - auto inputs_padded_a = inputs_padded.accessor(); - auto first_idxs_a = first_idxs.accessor(); - auto inputs_packed_a = inputs_packed.accessor(); - - for (int b = 0; b < batch_size; ++b) { - const int64_t start = first_idxs_a[b]; - const int64_t end = b + 1 < batch_size ? first_idxs_a[b + 1] : num_inputs; - const int64_t num = end - start; - for (int i = 0; i < num; ++i) { - for (int j = 0; j < D; ++j) { - inputs_packed_a[start + i][j] = inputs_padded_a[b][i][j]; - } - } - } - return inputs_packed; -} diff --git a/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp b/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp deleted file mode 100644 index e059409c40330005e76f88792bf7037cb72e4000..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp +++ /dev/null @@ -1,428 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include "utils/geometry_utils.h" -#include "utils/vec3.h" - -// - We start with implementations of simple operations on points, edges and -// faces. The hull of H points is a point if H=1, an edge if H=2, a face if H=3. - -template -vec3 ExtractPoint(const at::TensorAccessor& t) { - return vec3(t[0], t[1], t[2]); -} - -template -static std::array>, 1> -ExtractHullHelper(const Accessor& t, std::array /*tag*/) { - return {ExtractPoint(t)}; -} - -template -static std::array>, 2> -ExtractHullHelper(const Accessor& t, std::array /*tag*/) { - return {ExtractPoint(t[0]), ExtractPoint(t[1])}; -} - -template -static std::array>, 3> -ExtractHullHelper(const Accessor& t, std::array /*tag*/) { - return {ExtractPoint(t[0]), ExtractPoint(t[1]), ExtractPoint(t[2])}; -} - -template -std::array>, H> -ExtractHull(const Accessor& t) { - std::array tag; - return ExtractHullHelper(t, tag); -} - -template -void IncrementPoint(at::TensorAccessor&& t, const vec3& point) { - t[0] += point.x; - t[1] += point.y; - t[2] += point.z; -} - -// distance between the convex hull of A points and B points -// this could be done in c++17 with tuple_cat and invoke -template -T HullDistance( - const std::array, 1>& a, - const std::array, 2>& b, - const double /*min_triangle_area*/) { - using std::get; - return PointLine3DistanceForward(get<0>(a), get<0>(b), get<1>(b)); -} -template -T HullDistance( - const std::array, 1>& a, - const std::array, 3>& b, - const double min_triangle_area) { - using std::get; - return PointTriangle3DistanceForward( - get<0>(a), get<0>(b), get<1>(b), get<2>(b), min_triangle_area); -} -template -T HullDistance( - const std::array, 2>& a, - const std::array, 1>& b, - const double /*min_triangle_area*/) { - return HullDistance(b, a, 1); -} -template -T HullDistance( - const std::array, 3>& a, - const std::array, 1>& b, - const double min_triangle_area) { - return HullDistance(b, a, min_triangle_area); -} - -template -void HullHullDistanceBackward( - const std::array, 1>& a, - const std::array, 2>& b, - T grad_dist, - at::TensorAccessor&& grad_a, - at::TensorAccessor&& grad_b, - const double /*min_triangle_area*/) { - using std::get; - auto res = - PointLine3DistanceBackward(get<0>(a), get<0>(b), get<1>(b), grad_dist); - IncrementPoint(std::move(grad_a), get<0>(res)); - IncrementPoint(grad_b[0], get<1>(res)); - IncrementPoint(grad_b[1], get<2>(res)); -} -template -void HullHullDistanceBackward( - const std::array, 1>& a, - const std::array, 3>& b, - T grad_dist, - at::TensorAccessor&& grad_a, - at::TensorAccessor&& grad_b, - const double min_triangle_area) { - using std::get; - auto res = PointTriangle3DistanceBackward( - get<0>(a), get<0>(b), get<1>(b), get<2>(b), grad_dist, min_triangle_area); - IncrementPoint(std::move(grad_a), get<0>(res)); - IncrementPoint(grad_b[0], get<1>(res)); - IncrementPoint(grad_b[1], get<2>(res)); - IncrementPoint(grad_b[2], get<3>(res)); -} -template -void HullHullDistanceBackward( - const std::array, 3>& a, - const std::array, 1>& b, - T grad_dist, - at::TensorAccessor&& grad_a, - at::TensorAccessor&& grad_b, - const double min_triangle_area) { - return HullHullDistanceBackward( - b, a, grad_dist, std::move(grad_b), std::move(grad_a), min_triangle_area); -} -template -void HullHullDistanceBackward( - const std::array, 2>& a, - const std::array, 1>& b, - T grad_dist, - at::TensorAccessor&& grad_a, - at::TensorAccessor&& grad_b, - const double /*min_triangle_area*/) { - return HullHullDistanceBackward( - b, a, grad_dist, std::move(grad_b), std::move(grad_a), 1); -} - -template -void ValidateShape(const at::Tensor& as) { - if (H == 1) { - TORCH_CHECK(as.size(1) == 3); - } else { - TORCH_CHECK(as.size(2) == 3 && as.size(1) == H); - } -} - -// ----------- Here begins the implementation of each top-level -// function using non-type template parameters to -// implement all the cases in one go. ----------- // - -template -std::tuple HullHullDistanceForwardCpu( - const at::Tensor& as, - const at::Tensor& as_first_idx, - const at::Tensor& bs, - const at::Tensor& bs_first_idx, - const double min_triangle_area) { - const int64_t A_N = as.size(0); - const int64_t B_N = bs.size(0); - const int64_t BATCHES = as_first_idx.size(0); - - ValidateShape

(as); - ValidateShape

(bs); - - TORCH_CHECK(bs_first_idx.size(0) == BATCHES); - - // clang-format off - at::Tensor dists = at::zeros({A_N,}, as.options()); - at::Tensor idxs = at::zeros({A_N,}, as_first_idx.options()); - // clang-format on - - auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > (); - auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > (); - auto as_first_idx_a = as_first_idx.accessor(); - auto bs_first_idx_a = bs_first_idx.accessor(); - auto dists_a = dists.accessor(); - auto idxs_a = idxs.accessor(); - int64_t a_batch_end = 0; - int64_t b_batch_start = 0, b_batch_end = 0; - int64_t batch_idx = 0; - for (int64_t a_n = 0; a_n < A_N; ++a_n) { - if (a_n == a_batch_end) { - ++batch_idx; - b_batch_start = b_batch_end; - if (batch_idx == BATCHES) { - a_batch_end = std::numeric_limits::max(); - b_batch_end = B_N; - } else { - a_batch_end = as_first_idx_a[batch_idx]; - b_batch_end = bs_first_idx_a[batch_idx]; - } - } - float min_dist = std::numeric_limits::max(); - size_t min_idx = 0; - auto a = ExtractHull

(as_a[a_n]); - for (int64_t b_n = b_batch_start; b_n < b_batch_end; ++b_n) { - float dist = - HullDistance(a, ExtractHull

(bs_a[b_n]), min_triangle_area); - if (dist <= min_dist) { - min_dist = dist; - min_idx = b_n; - } - } - dists_a[a_n] = min_dist; - idxs_a[a_n] = min_idx; - } - - return std::make_tuple(dists, idxs); -} - -template -std::tuple HullHullDistanceBackwardCpu( - const at::Tensor& as, - const at::Tensor& bs, - const at::Tensor& idx_bs, - const at::Tensor& grad_dists, - const double min_triangle_area) { - const int64_t A_N = as.size(0); - - TORCH_CHECK(idx_bs.size(0) == A_N); - TORCH_CHECK(grad_dists.size(0) == A_N); - ValidateShape

(as); - ValidateShape

(bs); - - at::Tensor grad_as = at::zeros_like(as); - at::Tensor grad_bs = at::zeros_like(bs); - - auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > (); - auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > (); - auto grad_as_a = grad_as.accessor < float, H1 == 1 ? 2 : 3 > (); - auto grad_bs_a = grad_bs.accessor < float, H2 == 1 ? 2 : 3 > (); - auto idx_bs_a = idx_bs.accessor(); - auto grad_dists_a = grad_dists.accessor(); - - for (int64_t a_n = 0; a_n < A_N; ++a_n) { - auto a = ExtractHull

(as_a[a_n]); - auto b = ExtractHull

(bs_a[idx_bs_a[a_n]]); - HullHullDistanceBackward( - a, - b, - grad_dists_a[a_n], - grad_as_a[a_n], - grad_bs_a[idx_bs_a[a_n]], - min_triangle_area); - } - return std::make_tuple(grad_as, grad_bs); -} - -template -torch::Tensor PointHullArrayDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& bs, - const double min_triangle_area) { - const int64_t P = points.size(0); - const int64_t B_N = bs.size(0); - - TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3"); - ValidateShape(bs); - - at::Tensor dists = at::zeros({P, B_N}, points.options()); - auto points_a = points.accessor(); - auto bs_a = bs.accessor(); - auto dists_a = dists.accessor(); - for (int64_t p = 0; p < P; ++p) { - auto point = ExtractHull<1>(points_a[p]); - auto dest = dists_a[p]; - for (int64_t b_n = 0; b_n < B_N; ++b_n) { - auto b = ExtractHull(bs_a[b_n]); - dest[b_n] = HullDistance(point, b, min_triangle_area); - } - } - return dists; -} - -template -std::tuple PointHullArrayDistanceBackwardCpu( - const at::Tensor& points, - const at::Tensor& bs, - const at::Tensor& grad_dists, - const double min_triangle_area) { - const int64_t P = points.size(0); - const int64_t B_N = bs.size(0); - - TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3"); - ValidateShape(bs); - TORCH_CHECK((grad_dists.size(0) == P) && (grad_dists.size(1) == B_N)); - - at::Tensor grad_points = at::zeros({P, 3}, points.options()); - at::Tensor grad_bs = at::zeros({B_N, H, 3}, bs.options()); - - auto points_a = points.accessor(); - auto bs_a = bs.accessor(); - auto grad_dists_a = grad_dists.accessor(); - auto grad_points_a = grad_points.accessor(); - auto grad_bs_a = grad_bs.accessor(); - for (int64_t p = 0; p < P; ++p) { - auto point = ExtractHull<1>(points_a[p]); - auto grad_point = grad_points_a[p]; - auto grad_dist = grad_dists_a[p]; - for (int64_t b_n = 0; b_n < B_N; ++b_n) { - auto b = ExtractHull(bs_a[b_n]); - HullHullDistanceBackward( - point, - b, - grad_dist[b_n], - std::move(grad_point), - grad_bs_a[b_n], - min_triangle_area); - } - } - return std::make_tuple(grad_points, grad_bs); -} - -// ---------- Here begin the exported functions ------------ // - -std::tuple PointFaceDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& tris, - const torch::Tensor& tris_first_idx, - const double min_triangle_area) { - return HullHullDistanceForwardCpu<1, 3>( - points, points_first_idx, tris, tris_first_idx, min_triangle_area); -} - -std::tuple PointFaceDistanceBackwardCpu( - const torch::Tensor& points, - const torch::Tensor& tris, - const torch::Tensor& idx_points, - const torch::Tensor& grad_dists, - const double min_triangle_area) { - return HullHullDistanceBackwardCpu<1, 3>( - points, tris, idx_points, grad_dists, min_triangle_area); -} - -std::tuple FacePointDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& tris, - const torch::Tensor& tris_first_idx, - const double min_triangle_area) { - return HullHullDistanceForwardCpu<3, 1>( - tris, tris_first_idx, points, points_first_idx, min_triangle_area); -} - -std::tuple FacePointDistanceBackwardCpu( - const torch::Tensor& points, - const torch::Tensor& tris, - const torch::Tensor& idx_tris, - const torch::Tensor& grad_dists, - const double min_triangle_area) { - auto res = HullHullDistanceBackwardCpu<3, 1>( - tris, points, idx_tris, grad_dists, min_triangle_area); - return std::make_tuple(std::get<1>(res), std::get<0>(res)); -} - -torch::Tensor PointEdgeArrayDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& segms) { - return PointHullArrayDistanceForwardCpu<2>(points, segms, 1); -} - -std::tuple PointFaceArrayDistanceBackwardCpu( - const at::Tensor& points, - const at::Tensor& tris, - const at::Tensor& grad_dists, - const double min_triangle_area) { - return PointHullArrayDistanceBackwardCpu<3>( - points, tris, grad_dists, min_triangle_area); -} - -torch::Tensor PointFaceArrayDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& tris, - const double min_triangle_area) { - return PointHullArrayDistanceForwardCpu<3>(points, tris, min_triangle_area); -} - -std::tuple PointEdgeArrayDistanceBackwardCpu( - const at::Tensor& points, - const at::Tensor& segms, - const at::Tensor& grad_dists) { - return PointHullArrayDistanceBackwardCpu<2>(points, segms, grad_dists, 1); -} - -std::tuple PointEdgeDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& segms, - const torch::Tensor& segms_first_idx, - const int64_t /*max_points*/) { - return HullHullDistanceForwardCpu<1, 2>( - points, points_first_idx, segms, segms_first_idx, 1); -} - -std::tuple PointEdgeDistanceBackwardCpu( - const torch::Tensor& points, - const torch::Tensor& segms, - const torch::Tensor& idx_points, - const torch::Tensor& grad_dists) { - return HullHullDistanceBackwardCpu<1, 2>( - points, segms, idx_points, grad_dists, 1); -} - -std::tuple EdgePointDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& segms, - const torch::Tensor& segms_first_idx, - const int64_t /*max_segms*/) { - return HullHullDistanceForwardCpu<2, 1>( - segms, segms_first_idx, points, points_first_idx, 1); -} - -std::tuple EdgePointDistanceBackwardCpu( - const torch::Tensor& points, - const torch::Tensor& segms, - const torch::Tensor& idx_segms, - const torch::Tensor& grad_dists) { - auto res = HullHullDistanceBackwardCpu<2, 1>( - segms, points, idx_segms, grad_dists, 1); - return std::make_tuple(std::get<1>(res), std::get<0>(res)); -} diff --git a/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu b/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu deleted file mode 100644 index 3788d4055136e854f9415fef4e1233cfe23cfc86..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu +++ /dev/null @@ -1,833 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include -#include "utils/float_math.cuh" -#include "utils/geometry_utils.cuh" -#include "utils/warp_reduce.cuh" - -// **************************************************************************** -// * Generic Forward/Backward Kernels * -// **************************************************************************** - -__global__ void DistanceForwardKernel( - const float* __restrict__ objects, // (O * oD * 3) - const size_t objects_size, // O - const size_t objects_dim, // oD - const float* __restrict__ targets, // (T * tD * 3) - const size_t targets_size, // T - const size_t targets_dim, // tD - const int64_t* __restrict__ objects_first_idx, // (B,) - const int64_t* __restrict__ targets_first_idx, // (B,) - const size_t batch_size, // B - float* __restrict__ dist_objects, // (O,) - int64_t* __restrict__ idx_objects, // (O,) - const double min_triangle_area) { - // This kernel is used interchangeably to compute bi-directional distances - // between points and triangles/lines. The direction of the distance computed, - // i.e. point to triangle/line or triangle/line to point, depends on the order - // of the input arguments and is inferred based on their shape. The shape is - // used to distinguish between triangles and lines - - // Single shared memory buffer which is split and cast to different types. - extern __shared__ char shared_buf[]; - float* min_dists = (float*)shared_buf; // float[NUM_THREADS] - int64_t* min_idxs = (int64_t*)&min_dists[blockDim.x]; // int64_t[NUM_THREADS] - - const size_t batch_idx = blockIdx.y; // index of batch element. - - // start and end for objects in batch_idx - const int64_t starto = objects_first_idx[batch_idx]; - const int64_t endo = batch_idx + 1 < batch_size - ? objects_first_idx[batch_idx + 1] - : objects_size; - - // start and end for targets in batch_idx - const int64_t startt = targets_first_idx[batch_idx]; - const int64_t endt = batch_idx + 1 < batch_size - ? targets_first_idx[batch_idx + 1] - : targets_size; - - const size_t i = blockIdx.x; // index within batch element. - const size_t tid = threadIdx.x; // thread index - - // Set references to points/face based on which of objects/targets refer to - // points/faces - float3* points_f3 = objects_dim == 1 ? (float3*)objects : (float3*)targets; - float3* face_f3 = objects_dim == 1 ? (float3*)targets : (float3*)objects; - // Distinguishes whether we're computing distance against triangle vs edge - bool isTriangle = objects_dim == 3 || targets_dim == 3; - - // Each block will compute one element of the output idx_objects[starto + i], - // dist_objects[starto + i]. Within the block we will use threads to compute - // the distances between objects[starto + i] and targets[j] for all j - // belonging in the same batch as i, i.e. j in [startt, endt]. Then use a - // block reduction to take an argmin of the distances. - - // If i exceeds the number of objects in batch_idx, then do nothing - if (i < (endo - starto)) { - // Compute the distances between objects[starto + i] and targets[j] for - // all j belonging in the same batch as i, i.e. j in [startt, endt]. - // Here each thread will reduce over (endt-startt) / blockDim.x in serial, - // and store its result to shared memory - float min_dist = FLT_MAX; - size_t min_idx = 0; - for (size_t j = tid; j < (endt - startt); j += blockDim.x) { - size_t point_idx = objects_dim == 1 ? starto + i : startt + j; - size_t face_idx = objects_dim == 1 ? (startt + j) * targets_dim - : (starto + i) * objects_dim; - - float dist; - if (isTriangle) { - dist = PointTriangle3DistanceForward( - points_f3[point_idx], - face_f3[face_idx], - face_f3[face_idx + 1], - face_f3[face_idx + 2], - min_triangle_area); - } else { - dist = PointLine3DistanceForward( - points_f3[point_idx], face_f3[face_idx], face_f3[face_idx + 1]); - } - - min_dist = (j == tid) ? dist : min_dist; - min_idx = (dist <= min_dist) ? (startt + j) : min_idx; - min_dist = (dist <= min_dist) ? dist : min_dist; - } - min_dists[tid] = min_dist; - min_idxs[tid] = min_idx; - __syncthreads(); - - // Perform reduction in shared memory. - for (int s = blockDim.x / 2; s > 32; s >>= 1) { - if (tid < s) { - if (min_dists[tid] > min_dists[tid + s]) { - min_dists[tid] = min_dists[tid + s]; - min_idxs[tid] = min_idxs[tid + s]; - } - } - __syncthreads(); - } - - // Unroll the last 6 iterations of the loop since they will happen - // synchronized within a single warp. - if (tid < 32) - WarpReduceMin(min_dists, min_idxs, tid); - - // Finally thread 0 writes the result to the output buffer. - if (tid == 0) { - idx_objects[starto + i] = min_idxs[0]; - dist_objects[starto + i] = min_dists[0]; - } - } -} - -std::tuple DistanceForwardCuda( - const at::Tensor& objects, - const size_t objects_dim, - const at::Tensor& objects_first_idx, - const at::Tensor& targets, - const size_t targets_dim, - const at::Tensor& targets_first_idx, - const int64_t max_objects, - const double min_triangle_area) { - // Check inputs are on the same device - at::TensorArg objects_t{objects, "objects", 1}, - objects_first_idx_t{objects_first_idx, "objects_first_idx", 2}, - targets_t{targets, "targets", 3}, - targets_first_idx_t{targets_first_idx, "targets_first_idx", 4}; - at::CheckedFrom c = "DistanceForwardCuda"; - at::checkAllSameGPU( - c, {objects_t, objects_first_idx_t, targets_t, targets_first_idx_t}); - at::checkAllSameType(c, {objects_t, targets_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(objects.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t objects_size = objects.size(0); - const int64_t targets_size = targets.size(0); - const int64_t batch_size = objects_first_idx.size(0); - - TORCH_CHECK(targets_first_idx.size(0) == batch_size); - if (objects_dim == 1) { - TORCH_CHECK( - targets_dim >= 2 && targets_dim <= 3, - "either object or target must be edge or face"); - TORCH_CHECK(objects.size(1) == 3, "points must be of shape Px3"); - TORCH_CHECK( - targets.size(2) == 3, - "face must be of shape Tx3x3, lines must be of shape Tx2x3"); - } else { - TORCH_CHECK(targets_dim == 1, "either object or target must be point"); - TORCH_CHECK( - objects_dim >= 2 && objects_dim <= 3, - "either object or target must be edge or face"); - TORCH_CHECK(targets.size(1) == 3, "points must be of shape Px3"); - TORCH_CHECK( - objects.size(2) == 3, - "face must be of shape Tx3x3, lines must be of shape Tx2x3"); - } - - // clang-format off - at::Tensor dists = at::zeros({objects_size,}, objects.options()); - at::Tensor idxs = at::zeros({objects_size,}, objects_first_idx.options()); - // clang-format on - - if (dists.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(dists, idxs); - } - - const int threads = 128; - const dim3 blocks(max_objects, batch_size); - size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t); - - DistanceForwardKernel<<>>( - objects.contiguous().data_ptr(), - objects_size, - objects_dim, - targets.contiguous().data_ptr(), - targets_size, - targets_dim, - objects_first_idx.contiguous().data_ptr(), - targets_first_idx.contiguous().data_ptr(), - batch_size, - dists.data_ptr(), - idxs.data_ptr(), - min_triangle_area); - - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(dists, idxs); -} - -__global__ void DistanceBackwardKernel( - const float* __restrict__ objects, // (O * oD * 3) - const size_t objects_size, // O - const size_t objects_dim, // oD - const float* __restrict__ targets, // (T * tD * 3) - const size_t targets_dim, // tD - const int64_t* __restrict__ idx_objects, // (O,) - const float* __restrict__ grad_dists, // (O,) - float* __restrict__ grad_points, // ((O or T) * 3) - float* __restrict__ grad_face, // ((O or T) * max(oD, tD) * 3) - const double min_triangle_area) { - // This kernel is used interchangeably to compute bi-directional backward - // distances between points and triangles/lines. The direction of the distance - // computed, i.e. point to triangle/line or triangle/line to point, depends on - // the order of the input arguments and is inferred based on their shape. The - // shape is used to distinguish between triangles and lines. Note that - // grad_points will always be used for the point data and grad_face for the - // edge/triangle - - // Set references to points/face based on whether objects/targets are which - float3* points_f3 = objects_dim == 1 ? (float3*)objects : (float3*)targets; - float3* face_f3 = objects_dim == 1 ? (float3*)targets : (float3*)objects; - - const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; - const size_t stride = gridDim.x * blockDim.x; - - for (size_t o = tid; o < objects_size; o += stride) { - const int64_t tidx = idx_objects[o]; - - size_t point_index = objects_dim == 1 ? o : tidx; - size_t face_index = objects_dim == 1 ? tidx * targets_dim : o * objects_dim; - bool isTriangle = objects_dim == 3 || targets_dim == 3; - - float3 grad_point, grad_v0, grad_v1, grad_v2; - if (isTriangle) { - const auto grads = PointTriangle3DistanceBackward( - points_f3[point_index], - face_f3[face_index], - face_f3[face_index + 1], - face_f3[face_index + 2], - grad_dists[o], - min_triangle_area); - grad_point = thrust::get<0>(grads); - grad_v0 = thrust::get<1>(grads); - grad_v1 = thrust::get<2>(grads); - grad_v2 = thrust::get<3>(grads); - } else { - const auto grads = PointLine3DistanceBackward( - points_f3[point_index], - face_f3[face_index], - face_f3[face_index + 1], - grad_dists[o]); - grad_point = thrust::get<0>(grads); - grad_v0 = thrust::get<1>(grads); - grad_v1 = thrust::get<2>(grads); - } - - atomicAdd(grad_points + point_index * 3 + 0, grad_point.x); - atomicAdd(grad_points + point_index * 3 + 1, grad_point.y); - atomicAdd(grad_points + point_index * 3 + 2, grad_point.z); - - atomicAdd(grad_face + face_index * 3 + 0 * 3 + 0, grad_v0.x); - atomicAdd(grad_face + face_index * 3 + 0 * 3 + 1, grad_v0.y); - atomicAdd(grad_face + face_index * 3 + 0 * 3 + 2, grad_v0.z); - - atomicAdd(grad_face + face_index * 3 + 1 * 3 + 0, grad_v1.x); - atomicAdd(grad_face + face_index * 3 + 1 * 3 + 1, grad_v1.y); - atomicAdd(grad_face + face_index * 3 + 1 * 3 + 2, grad_v1.z); - - if (isTriangle) { - atomicAdd(grad_face + face_index * 3 + 2 * 3 + 0, grad_v2.x); - atomicAdd(grad_face + face_index * 3 + 2 * 3 + 1, grad_v2.y); - atomicAdd(grad_face + face_index * 3 + 2 * 3 + 2, grad_v2.z); - } - } -} - -std::tuple DistanceBackwardCuda( - const at::Tensor& objects, - const size_t objects_dim, - const at::Tensor& targets, - const size_t targets_dim, - const at::Tensor& idx_objects, - const at::Tensor& grad_dists, - const double min_triangle_area) { - // Check inputs are on the same device - at::TensorArg objects_t{objects, "objects", 1}, - targets_t{targets, "targets", 2}, - idx_objects_t{idx_objects, "idx_objects", 3}, - grad_dists_t{grad_dists, "grad_dists", 4}; - at::CheckedFrom c = "DistanceBackwardCuda"; - at::checkAllSameGPU(c, {objects_t, targets_t, idx_objects_t, grad_dists_t}); - at::checkAllSameType(c, {objects_t, targets_t, grad_dists_t}); - // This is nondeterministic because atomicAdd - at::globalContext().alertNotDeterministic("DistanceBackwardCuda"); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(objects.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t objects_size = objects.size(0); - const int64_t targets_size = targets.size(0); - - at::Tensor grad_points; - at::Tensor grad_tris; - - TORCH_CHECK(idx_objects.size(0) == objects_size); - TORCH_CHECK(grad_dists.size(0) == objects_size); - if (objects_dim == 1) { - TORCH_CHECK( - targets_dim >= 2 && targets_dim <= 3, - "either object or target must be edge or face"); - TORCH_CHECK(objects.size(1) == 3, "points must be of shape Px3"); - TORCH_CHECK( - targets.size(2) == 3, - "face must be of shape Tx3x3, lines must be of shape Tx2x3"); - // clang-format off - grad_points = at::zeros({objects_size, 3}, objects.options()); - grad_tris = at::zeros({targets_size, int64_t(targets_dim), 3}, targets.options()); - // clang-format on - } else { - TORCH_CHECK(targets_dim == 1, "either object or target must be point"); - TORCH_CHECK( - objects_dim >= 2 && objects_dim <= 3, - "either object or target must be edge or face"); - TORCH_CHECK(targets.size(1) == 3, "points must be of shape Px3"); - TORCH_CHECK( - objects.size(2) == 3, - "face must be of shape Tx3x3, lines must be of shape Tx2x3"); - // clang-format off - grad_points = at::zeros({targets_size, 3}, targets.options()); - grad_tris = at::zeros({objects_size, int64_t(objects_dim), 3}, objects.options()); - // clang-format on - } - - if (grad_points.numel() == 0 || grad_tris.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_points, grad_tris); - } - - const int blocks = 64; - const int threads = 512; - - DistanceBackwardKernel<<>>( - objects.contiguous().data_ptr(), - objects_size, - objects_dim, - targets.contiguous().data_ptr(), - targets_dim, - idx_objects.contiguous().data_ptr(), - grad_dists.contiguous().data_ptr(), - grad_points.data_ptr(), - grad_tris.data_ptr(), - min_triangle_area); - - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_points, grad_tris); -} - -// **************************************************************************** -// * PointFaceDistance * -// **************************************************************************** - -std::tuple PointFaceDistanceForwardCuda( - const at::Tensor& points, - const at::Tensor& points_first_idx, - const at::Tensor& tris, - const at::Tensor& tris_first_idx, - const int64_t max_points, - const double min_triangle_area) { - return DistanceForwardCuda( - points, - 1, - points_first_idx, - tris, - 3, - tris_first_idx, - max_points, - min_triangle_area); -} - -std::tuple PointFaceDistanceBackwardCuda( - const at::Tensor& points, - const at::Tensor& tris, - const at::Tensor& idx_points, - const at::Tensor& grad_dists, - const double min_triangle_area) { - return DistanceBackwardCuda( - points, 1, tris, 3, idx_points, grad_dists, min_triangle_area); -} - -// **************************************************************************** -// * FacePointDistance * -// **************************************************************************** - -std::tuple FacePointDistanceForwardCuda( - const at::Tensor& points, - const at::Tensor& points_first_idx, - const at::Tensor& tris, - const at::Tensor& tris_first_idx, - const int64_t max_tris, - const double min_triangle_area) { - return DistanceForwardCuda( - tris, - 3, - tris_first_idx, - points, - 1, - points_first_idx, - max_tris, - min_triangle_area); -} - -std::tuple FacePointDistanceBackwardCuda( - const at::Tensor& points, - const at::Tensor& tris, - const at::Tensor& idx_tris, - const at::Tensor& grad_dists, - const double min_triangle_area) { - return DistanceBackwardCuda( - tris, 3, points, 1, idx_tris, grad_dists, min_triangle_area); -} - -// **************************************************************************** -// * PointEdgeDistance * -// **************************************************************************** - -std::tuple PointEdgeDistanceForwardCuda( - const at::Tensor& points, - const at::Tensor& points_first_idx, - const at::Tensor& segms, - const at::Tensor& segms_first_idx, - const int64_t max_points) { - return DistanceForwardCuda( - points, - 1, - points_first_idx, - segms, - 2, - segms_first_idx, - max_points, - 1); // todo: unused parameter handling for min_triangle_area -} - -std::tuple PointEdgeDistanceBackwardCuda( - const at::Tensor& points, - const at::Tensor& segms, - const at::Tensor& idx_points, - const at::Tensor& grad_dists) { - return DistanceBackwardCuda(points, 1, segms, 2, idx_points, grad_dists, 1); -} - -// **************************************************************************** -// * EdgePointDistance * -// **************************************************************************** - -std::tuple EdgePointDistanceForwardCuda( - const at::Tensor& points, - const at::Tensor& points_first_idx, - const at::Tensor& segms, - const at::Tensor& segms_first_idx, - const int64_t max_segms) { - return DistanceForwardCuda( - segms, 2, segms_first_idx, points, 1, points_first_idx, max_segms, 1); -} - -std::tuple EdgePointDistanceBackwardCuda( - const at::Tensor& points, - const at::Tensor& segms, - const at::Tensor& idx_segms, - const at::Tensor& grad_dists) { - return DistanceBackwardCuda(segms, 2, points, 1, idx_segms, grad_dists, 1); -} - -// **************************************************************************** -// * PointFaceArrayDistance * -// **************************************************************************** -// TODO: Create wrapper function and merge kernel with other array kernel - -__global__ void PointFaceArrayForwardKernel( - const float* __restrict__ points, // (P, 3) - const float* __restrict__ tris, // (T, 3, 3) - float* __restrict__ dists, // (P, T) - const size_t P, - const size_t T, - const double min_triangle_area) { - const float3* points_f3 = (float3*)points; - const float3* tris_f3 = (float3*)tris; - - // Parallelize over P * S computations - const int num_threads = gridDim.x * blockDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - for (int t_i = tid; t_i < P * T; t_i += num_threads) { - const int t = t_i / P; // segment index. - const int p = t_i % P; // point index - const float3 v0 = tris_f3[t * 3 + 0]; - const float3 v1 = tris_f3[t * 3 + 1]; - const float3 v2 = tris_f3[t * 3 + 2]; - - const float3 point = points_f3[p]; - float dist = - PointTriangle3DistanceForward(point, v0, v1, v2, min_triangle_area); - dists[p * T + t] = dist; - } -} - -at::Tensor PointFaceArrayDistanceForwardCuda( - const at::Tensor& points, - const at::Tensor& tris, - const double min_triangle_area) { - // Check inputs are on the same device - at::TensorArg points_t{points, "points", 1}, tris_t{tris, "tris", 2}; - at::CheckedFrom c = "PointFaceArrayDistanceForwardCuda"; - at::checkAllSameGPU(c, {points_t, tris_t}); - at::checkAllSameType(c, {points_t, tris_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t P = points.size(0); - const int64_t T = tris.size(0); - - TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3"); - TORCH_CHECK( - (tris.size(1) == 3) && (tris.size(2) == 3), - "tris must be of shape Tx3x3"); - - at::Tensor dists = at::zeros({P, T}, points.options()); - - if (dists.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return dists; - } - - const size_t blocks = 1024; - const size_t threads = 64; - - PointFaceArrayForwardKernel<<>>( - points.contiguous().data_ptr(), - tris.contiguous().data_ptr(), - dists.data_ptr(), - P, - T, - min_triangle_area); - - AT_CUDA_CHECK(cudaGetLastError()); - return dists; -} - -__global__ void PointFaceArrayBackwardKernel( - const float* __restrict__ points, // (P, 3) - const float* __restrict__ tris, // (T, 3, 3) - const float* __restrict__ grad_dists, // (P, T) - float* __restrict__ grad_points, // (P, 3) - float* __restrict__ grad_tris, // (T, 3, 3) - const size_t P, - const size_t T, - const double min_triangle_area) { - const float3* points_f3 = (float3*)points; - const float3* tris_f3 = (float3*)tris; - - // Parallelize over P * S computations - const int num_threads = gridDim.x * blockDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - for (int t_i = tid; t_i < P * T; t_i += num_threads) { - const int t = t_i / P; // triangle index. - const int p = t_i % P; // point index - const float3 v0 = tris_f3[t * 3 + 0]; - const float3 v1 = tris_f3[t * 3 + 1]; - const float3 v2 = tris_f3[t * 3 + 2]; - - const float3 point = points_f3[p]; - - const float grad_dist = grad_dists[p * T + t]; - const auto grad = PointTriangle3DistanceBackward( - point, v0, v1, v2, grad_dist, min_triangle_area); - - const float3 grad_point = thrust::get<0>(grad); - const float3 grad_v0 = thrust::get<1>(grad); - const float3 grad_v1 = thrust::get<2>(grad); - const float3 grad_v2 = thrust::get<3>(grad); - - atomicAdd(grad_points + 3 * p + 0, grad_point.x); - atomicAdd(grad_points + 3 * p + 1, grad_point.y); - atomicAdd(grad_points + 3 * p + 2, grad_point.z); - - atomicAdd(grad_tris + t * 3 * 3 + 0 * 3 + 0, grad_v0.x); - atomicAdd(grad_tris + t * 3 * 3 + 0 * 3 + 1, grad_v0.y); - atomicAdd(grad_tris + t * 3 * 3 + 0 * 3 + 2, grad_v0.z); - - atomicAdd(grad_tris + t * 3 * 3 + 1 * 3 + 0, grad_v1.x); - atomicAdd(grad_tris + t * 3 * 3 + 1 * 3 + 1, grad_v1.y); - atomicAdd(grad_tris + t * 3 * 3 + 1 * 3 + 2, grad_v1.z); - - atomicAdd(grad_tris + t * 3 * 3 + 2 * 3 + 0, grad_v2.x); - atomicAdd(grad_tris + t * 3 * 3 + 2 * 3 + 1, grad_v2.y); - atomicAdd(grad_tris + t * 3 * 3 + 2 * 3 + 2, grad_v2.z); - } -} - -std::tuple PointFaceArrayDistanceBackwardCuda( - const at::Tensor& points, - const at::Tensor& tris, - const at::Tensor& grad_dists, - const double min_triangle_area) { - // Check inputs are on the same device - at::TensorArg points_t{points, "points", 1}, tris_t{tris, "tris", 2}, - grad_dists_t{grad_dists, "grad_dists", 3}; - at::CheckedFrom c = "PointFaceArrayDistanceBackwardCuda"; - at::checkAllSameGPU(c, {points_t, tris_t, grad_dists_t}); - at::checkAllSameType(c, {points_t, tris_t, grad_dists_t}); - // This is nondeterministic because atomicAdd - at::globalContext().alertNotDeterministic( - "PointFaceArrayDistanceBackwardCuda"); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t P = points.size(0); - const int64_t T = tris.size(0); - - TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3"); - TORCH_CHECK( - (tris.size(1) == 3) && (tris.size(2) == 3), - "tris must be of shape Tx3x3"); - TORCH_CHECK((grad_dists.size(0) == P) && (grad_dists.size(1) == T)); - - at::Tensor grad_points = at::zeros({P, 3}, points.options()); - at::Tensor grad_tris = at::zeros({T, 3, 3}, tris.options()); - - if (grad_points.numel() == 0 || grad_tris.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_points, grad_tris); - } - - const size_t blocks = 1024; - const size_t threads = 64; - - PointFaceArrayBackwardKernel<<>>( - points.contiguous().data_ptr(), - tris.contiguous().data_ptr(), - grad_dists.contiguous().data_ptr(), - grad_points.data_ptr(), - grad_tris.data_ptr(), - P, - T, - min_triangle_area); - - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_points, grad_tris); -} - -// **************************************************************************** -// * PointEdgeArrayDistance * -// **************************************************************************** -// TODO: Create wrapper function and merge kernel with other array kernel - -__global__ void PointEdgeArrayForwardKernel( - const float* __restrict__ points, // (P, 3) - const float* __restrict__ segms, // (S, 2, 3) - float* __restrict__ dists, // (P, S) - const size_t P, - const size_t S) { - float3* points_f3 = (float3*)points; - float3* segms_f3 = (float3*)segms; - - // Parallelize over P * S computations - const int num_threads = gridDim.x * blockDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - for (int t_i = tid; t_i < P * S; t_i += num_threads) { - const int s = t_i / P; // segment index. - const int p = t_i % P; // point index - float3 a = segms_f3[s * 2 + 0]; - float3 b = segms_f3[s * 2 + 1]; - - float3 point = points_f3[p]; - float dist = PointLine3DistanceForward(point, a, b); - dists[p * S + s] = dist; - } -} - -at::Tensor PointEdgeArrayDistanceForwardCuda( - const at::Tensor& points, - const at::Tensor& segms) { - // Check inputs are on the same device - at::TensorArg points_t{points, "points", 1}, segms_t{segms, "segms", 2}; - at::CheckedFrom c = "PointEdgeArrayDistanceForwardCuda"; - at::checkAllSameGPU(c, {points_t, segms_t}); - at::checkAllSameType(c, {points_t, segms_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t P = points.size(0); - const int64_t S = segms.size(0); - - TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3"); - TORCH_CHECK( - (segms.size(1) == 2) && (segms.size(2) == 3), - "segms must be of shape Sx2x3"); - - at::Tensor dists = at::zeros({P, S}, points.options()); - - if (dists.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return dists; - } - - const size_t blocks = 1024; - const size_t threads = 64; - - PointEdgeArrayForwardKernel<<>>( - points.contiguous().data_ptr(), - segms.contiguous().data_ptr(), - dists.data_ptr(), - P, - S); - - AT_CUDA_CHECK(cudaGetLastError()); - return dists; -} - -__global__ void PointEdgeArrayBackwardKernel( - const float* __restrict__ points, // (P, 3) - const float* __restrict__ segms, // (S, 2, 3) - const float* __restrict__ grad_dists, // (P, S) - float* __restrict__ grad_points, // (P, 3) - float* __restrict__ grad_segms, // (S, 2, 3) - const size_t P, - const size_t S) { - float3* points_f3 = (float3*)points; - float3* segms_f3 = (float3*)segms; - - // Parallelize over P * S computations - const int num_threads = gridDim.x * blockDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - for (int t_i = tid; t_i < P * S; t_i += num_threads) { - const int s = t_i / P; // segment index. - const int p = t_i % P; // point index - const float3 a = segms_f3[s * 2 + 0]; - const float3 b = segms_f3[s * 2 + 1]; - - const float3 point = points_f3[p]; - const float grad_dist = grad_dists[p * S + s]; - const auto grads = PointLine3DistanceBackward(point, a, b, grad_dist); - const float3 grad_point = thrust::get<0>(grads); - const float3 grad_a = thrust::get<1>(grads); - const float3 grad_b = thrust::get<2>(grads); - - atomicAdd(grad_points + p * 3 + 0, grad_point.x); - atomicAdd(grad_points + p * 3 + 1, grad_point.y); - atomicAdd(grad_points + p * 3 + 2, grad_point.z); - - atomicAdd(grad_segms + s * 2 * 3 + 0 * 3 + 0, grad_a.x); - atomicAdd(grad_segms + s * 2 * 3 + 0 * 3 + 1, grad_a.y); - atomicAdd(grad_segms + s * 2 * 3 + 0 * 3 + 2, grad_a.z); - - atomicAdd(grad_segms + s * 2 * 3 + 1 * 3 + 0, grad_b.x); - atomicAdd(grad_segms + s * 2 * 3 + 1 * 3 + 1, grad_b.y); - atomicAdd(grad_segms + s * 2 * 3 + 1 * 3 + 2, grad_b.z); - } -} - -std::tuple PointEdgeArrayDistanceBackwardCuda( - const at::Tensor& points, - const at::Tensor& segms, - const at::Tensor& grad_dists) { - // Check inputs are on the same device - at::TensorArg points_t{points, "points", 1}, segms_t{segms, "segms", 2}, - grad_dists_t{grad_dists, "grad_dists", 3}; - at::CheckedFrom c = "PointEdgeArrayDistanceBackwardCuda"; - at::checkAllSameGPU(c, {points_t, segms_t, grad_dists_t}); - at::checkAllSameType(c, {points_t, segms_t, grad_dists_t}); - // This is nondeterministic because atomicAdd - at::globalContext().alertNotDeterministic( - "PointEdgeArrayDistanceBackwardCuda"); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t P = points.size(0); - const int64_t S = segms.size(0); - - TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3"); - TORCH_CHECK( - (segms.size(1) == 2) && (segms.size(2) == 3), - "segms must be of shape Sx2x3"); - TORCH_CHECK((grad_dists.size(0) == P) && (grad_dists.size(1) == S)); - - at::Tensor grad_points = at::zeros({P, 3}, points.options()); - at::Tensor grad_segms = at::zeros({S, 2, 3}, segms.options()); - - if (grad_points.numel() == 0 || grad_segms.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_points, grad_segms); - } - - const size_t blocks = 1024; - const size_t threads = 64; - - PointEdgeArrayBackwardKernel<<>>( - points.contiguous().data_ptr(), - segms.contiguous().data_ptr(), - grad_dists.contiguous().data_ptr(), - grad_points.data_ptr(), - grad_segms.data_ptr(), - P, - S); - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(grad_points, grad_segms); -} diff --git a/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.h b/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.h deleted file mode 100644 index 529dd5604c2ff4a4b84c590611adc4ef83edce4d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.h +++ /dev/null @@ -1,707 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include -#include "utils/pytorch3d_cutils.h" - -// **************************************************************************** -// * PointFaceDistance * -// **************************************************************************** - -// Computes the squared euclidean distance of each p in points to it closest -// triangular face belonging to the corresponding mesh example in the batch of -// size N. -// -// Args: -// points: FloatTensor of shape (P, 3) -// points_first_idx: LongTensor of shape (N,) indicating the first point -// index for each example in the batch -// tris: FloatTensor of shape (T, 3, 3) of the triangular faces. The t-th -// triangular face is spanned by (tris[t, 0], tris[t, 1], tris[t, 2]) -// tris_first_idx: LongTensor of shape (N,) indicating the first face -// index for each example in the batch -// max_points: Scalar equal to max(P_i) for i in [0, N - 1] containing -// the maximum number of points in the batch and is used to set -// the block dimensions in the CUDA implementation. -// min_triangle_area: triangles less than this size are considered -// points/lines. -// -// Returns: -// dists: FloatTensor of shape (P,), where dists[p] is the minimum -// squared euclidean distance of points[p] to the faces in the same -// example in the batch. -// idxs: LongTensor of shape (P,), where idxs[p] is the index of the closest -// face in the batch. -// So, dists[p] = d(points[p], tris[idxs[p], 0], tris[idxs[p], 1], -// tris[idxs[p], 2]) where d(u, v0, v1, v2) is the distance of u from the -// face spanned by (v0, v1, v2) -// -// - -#ifdef WITH_CUDA - -std::tuple PointFaceDistanceForwardCuda( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& tris, - const torch::Tensor& tris_first_idx, - const int64_t max_points, - const double min_triangle_area); -#endif - -std::tuple PointFaceDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& tris, - const torch::Tensor& tris_first_idx, - const double min_triangle_area); - -std::tuple PointFaceDistanceForward( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& tris, - const torch::Tensor& tris_first_idx, - const int64_t max_points, - const double min_triangle_area) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(points_first_idx); - CHECK_CUDA(tris); - CHECK_CUDA(tris_first_idx); - return PointFaceDistanceForwardCuda( - points, - points_first_idx, - tris, - tris_first_idx, - max_points, - min_triangle_area); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return PointFaceDistanceForwardCpu( - points, points_first_idx, tris, tris_first_idx, min_triangle_area); -} - -// Backward pass for PointFaceDistance. -// -// Args: -// points: FloatTensor of shape (P, 3) -// tris: FloatTensor of shape (T, 3, 3) -// idx_points: LongTensor of shape (P,) containing the indices -// of the closest face in the example in the batch. -// This is computed by the forward pass -// grad_dists: FloatTensor of shape (P,) -// min_triangle_area: triangles less than this size are considered -// points/lines. -// -// Returns: -// grad_points: FloatTensor of shape (P, 3) -// grad_tris: FloatTensor of shape (T, 3, 3) -// - -#ifdef WITH_CUDA - -std::tuple PointFaceDistanceBackwardCuda( - const torch::Tensor& points, - const torch::Tensor& tris, - const torch::Tensor& idx_points, - const torch::Tensor& grad_dists, - const double min_triangle_area); -#endif -std::tuple PointFaceDistanceBackwardCpu( - const torch::Tensor& points, - const torch::Tensor& tris, - const torch::Tensor& idx_points, - const torch::Tensor& grad_dists, - const double min_triangle_area); - -std::tuple PointFaceDistanceBackward( - const torch::Tensor& points, - const torch::Tensor& tris, - const torch::Tensor& idx_points, - const torch::Tensor& grad_dists, - const double min_triangle_area) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(tris); - CHECK_CUDA(idx_points); - CHECK_CUDA(grad_dists); - return PointFaceDistanceBackwardCuda( - points, tris, idx_points, grad_dists, min_triangle_area); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return PointFaceDistanceBackwardCpu( - points, tris, idx_points, grad_dists, min_triangle_area); -} - -// **************************************************************************** -// * FacePointDistance * -// **************************************************************************** - -// Computes the squared euclidean distance of each triangular face to its -// closest point belonging to the corresponding example in the batch of size N. -// -// Args: -// points: FloatTensor of shape (P, 3) -// points_first_idx: LongTensor of shape (N,) indicating the first point -// index for each example in the batch -// tris: FloatTensor of shape (T, 3, 3) of the triangular faces. The t-th -// triangular face is spanned by (tris[t, 0], tris[t, 1], tris[t, 2]) -// tris_first_idx: LongTensor of shape (N,) indicating the first face -// index for each example in the batch -// max_tris: Scalar equal to max(T_i) for i in [0, N - 1] containing -// the maximum number of faces in the batch and is used to set -// the block dimensions in the CUDA implementation. -// min_triangle_area: triangles less than this size are considered -// points/lines. -// -// Returns: -// dists: FloatTensor of shape (T,), where dists[t] is the minimum squared -// euclidean distance of t-th triangular face from the closest point in -// the batch. -// idxs: LongTensor of shape (T,), where idxs[t] is the index of the closest -// point in the batch. -// So, dists[t] = d(points[idxs[t]], tris[t, 0], tris[t, 1], tris[t, 2]) -// where d(u, v0, v1, v2) is the distance of u from the triangular face -// spanned by (v0, v1, v2) -// - -#ifdef WITH_CUDA - -std::tuple FacePointDistanceForwardCuda( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& tris, - const torch::Tensor& tris_first_idx, - const int64_t max_tris, - const double min_triangle_area); -#endif - -std::tuple FacePointDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& tris, - const torch::Tensor& tris_first_idx, - const double min_triangle_area); - -std::tuple FacePointDistanceForward( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& tris, - const torch::Tensor& tris_first_idx, - const int64_t max_tris, - const double min_triangle_area) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(points_first_idx); - CHECK_CUDA(tris); - CHECK_CUDA(tris_first_idx); - return FacePointDistanceForwardCuda( - points, - points_first_idx, - tris, - tris_first_idx, - max_tris, - min_triangle_area); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return FacePointDistanceForwardCpu( - points, points_first_idx, tris, tris_first_idx, min_triangle_area); -} - -// Backward pass for FacePointDistance. -// -// Args: -// points: FloatTensor of shape (P, 3) -// tris: FloatTensor of shape (T, 3, 3) -// idx_tris: LongTensor of shape (T,) containing the indices -// of the closest point in the example in the batch. -// This is computed by the forward pass -// grad_dists: FloatTensor of shape (T,) -// min_triangle_area: triangles less than this size are considered -// points/lines. -// -// Returns: -// grad_points: FloatTensor of shape (P, 3) -// grad_tris: FloatTensor of shape (T, 3, 3) -// - -#ifdef WITH_CUDA - -std::tuple FacePointDistanceBackwardCuda( - const torch::Tensor& points, - const torch::Tensor& tris, - const torch::Tensor& idx_tris, - const torch::Tensor& grad_dists, - const double min_triangle_area); -#endif - -std::tuple FacePointDistanceBackwardCpu( - const torch::Tensor& points, - const torch::Tensor& tris, - const torch::Tensor& idx_tris, - const torch::Tensor& grad_dists, - const double min_triangle_area); - -std::tuple FacePointDistanceBackward( - const torch::Tensor& points, - const torch::Tensor& tris, - const torch::Tensor& idx_tris, - const torch::Tensor& grad_dists, - const double min_triangle_area) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(tris); - CHECK_CUDA(idx_tris); - CHECK_CUDA(grad_dists); - return FacePointDistanceBackwardCuda( - points, tris, idx_tris, grad_dists, min_triangle_area); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return FacePointDistanceBackwardCpu( - points, tris, idx_tris, grad_dists, min_triangle_area); -} - -// **************************************************************************** -// * PointEdgeDistance * -// **************************************************************************** - -// Computes the squared euclidean distance of each p in points to the closest -// mesh edge belonging to the corresponding example in the batch of size N. -// -// Args: -// points: FloatTensor of shape (P, 3) -// points_first_idx: LongTensor of shape (N,) indicating the first point -// index for each example in the batch -// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th edge -// segment is spanned by (segms[s, 0], segms[s, 1]) -// segms_first_idx: LongTensor of shape (N,) indicating the first edge -// index for each example in the batch -// max_points: Scalar equal to max(P_i) for i in [0, N - 1] containing -// the maximum number of points in the batch and is used to set -// the grid dimensions in the CUDA implementation. -// -// Returns: -// dists: FloatTensor of shape (P,), where dists[p] is the squared euclidean -// distance of points[p] to the closest edge in the same example in the -// batch. -// idxs: LongTensor of shape (P,), where idxs[p] is the index of the closest -// edge in the batch. -// So, dists[p] = d(points[p], segms[idxs[p], 0], segms[idxs[p], 1]), -// where d(u, v0, v1) is the distance of u from the segment spanned by -// (v0, v1). -// - -#ifdef WITH_CUDA - -std::tuple PointEdgeDistanceForwardCuda( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& segms, - const torch::Tensor& segms_first_idx, - const int64_t max_points); -#endif - -std::tuple PointEdgeDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& segms, - const torch::Tensor& segms_first_idx, - const int64_t max_points); - -std::tuple PointEdgeDistanceForward( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& segms, - const torch::Tensor& segms_first_idx, - const int64_t max_points) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(points_first_idx); - CHECK_CUDA(segms); - CHECK_CUDA(segms_first_idx); - return PointEdgeDistanceForwardCuda( - points, points_first_idx, segms, segms_first_idx, max_points); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return PointEdgeDistanceForwardCpu( - points, points_first_idx, segms, segms_first_idx, max_points); -} - -// Backward pass for PointEdgeDistance. -// -// Args: -// points: FloatTensor of shape (P, 3) -// segms: FloatTensor of shape (S, 2, 3) -// idx_points: LongTensor of shape (P,) containing the indices -// of the closest edge in the example in the batch. -// This is computed by the forward pass. -// grad_dists: FloatTensor of shape (P,) -// -// Returns: -// grad_points: FloatTensor of shape (P, 3) -// grad_segms: FloatTensor of shape (S, 2, 3) -// - -#ifdef WITH_CUDA - -std::tuple PointEdgeDistanceBackwardCuda( - const torch::Tensor& points, - const torch::Tensor& segms, - const torch::Tensor& idx_points, - const torch::Tensor& grad_dists); -#endif - -std::tuple PointEdgeDistanceBackwardCpu( - const torch::Tensor& points, - const torch::Tensor& segms, - const torch::Tensor& idx_points, - const torch::Tensor& grad_dists); - -std::tuple PointEdgeDistanceBackward( - const torch::Tensor& points, - const torch::Tensor& segms, - const torch::Tensor& idx_points, - const torch::Tensor& grad_dists) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(segms); - CHECK_CUDA(idx_points); - CHECK_CUDA(grad_dists); - return PointEdgeDistanceBackwardCuda(points, segms, idx_points, grad_dists); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return PointEdgeDistanceBackwardCpu(points, segms, idx_points, grad_dists); -} - -// **************************************************************************** -// * EdgePointDistance * -// **************************************************************************** - -// Computes the squared euclidean distance of each edge segment to the closest -// point belonging to the corresponding example in the batch of size N. -// -// Args: -// points: FloatTensor of shape (P, 3) -// points_first_idx: LongTensor of shape (N,) indicating the first point -// index for each example in the batch -// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th edge -// segment is spanned by (segms[s, 0], segms[s, 1]) -// segms_first_idx: LongTensor of shape (N,) indicating the first edge -// index for each example in the batch -// max_segms: Scalar equal to max(S_i) for i in [0, N - 1] containing -// the maximum number of edges in the batch and is used to set -// the block dimensions in the CUDA implementation. -// -// Returns: -// dists: FloatTensor of shape (S,), where dists[s] is the squared -// euclidean distance of s-th edge to the closest point in the -// corresponding example in the batch. -// idxs: LongTensor of shape (S,), where idxs[s] is the index of the closest -// point in the example in the batch. -// So, dists[s] = d(points[idxs[s]], segms[s, 0], segms[s, 1]), where -// d(u, v0, v1) is the distance of u from the segment spanned by (v0, v1) -// -// - -#ifdef WITH_CUDA - -std::tuple EdgePointDistanceForwardCuda( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& segms, - const torch::Tensor& segms_first_idx, - const int64_t max_segms); -#endif - -std::tuple EdgePointDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& segms, - const torch::Tensor& segms_first_idx, - const int64_t max_segms); - -std::tuple EdgePointDistanceForward( - const torch::Tensor& points, - const torch::Tensor& points_first_idx, - const torch::Tensor& segms, - const torch::Tensor& segms_first_idx, - const int64_t max_segms) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(points_first_idx); - CHECK_CUDA(segms); - CHECK_CUDA(segms_first_idx); - return EdgePointDistanceForwardCuda( - points, points_first_idx, segms, segms_first_idx, max_segms); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return EdgePointDistanceForwardCpu( - points, points_first_idx, segms, segms_first_idx, max_segms); -} - -// Backward pass for EdgePointDistance. -// -// Args: -// points: FloatTensor of shape (P, 3) -// segms: FloatTensor of shape (S, 2, 3) -// idx_segms: LongTensor of shape (S,) containing the indices -// of the closest point in the example in the batch. -// This is computed by the forward pass -// grad_dists: FloatTensor of shape (S,) -// -// Returns: -// grad_points: FloatTensor of shape (P, 3) -// grad_segms: FloatTensor of shape (S, 2, 3) -// - -#ifdef WITH_CUDA - -std::tuple EdgePointDistanceBackwardCuda( - const torch::Tensor& points, - const torch::Tensor& segms, - const torch::Tensor& idx_segms, - const torch::Tensor& grad_dists); -#endif - -std::tuple EdgePointDistanceBackwardCpu( - const torch::Tensor& points, - const torch::Tensor& segms, - const torch::Tensor& idx_segms, - const torch::Tensor& grad_dists); - -std::tuple EdgePointDistanceBackward( - const torch::Tensor& points, - const torch::Tensor& segms, - const torch::Tensor& idx_segms, - const torch::Tensor& grad_dists) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(segms); - CHECK_CUDA(idx_segms); - CHECK_CUDA(grad_dists); - return EdgePointDistanceBackwardCuda(points, segms, idx_segms, grad_dists); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return EdgePointDistanceBackwardCpu(points, segms, idx_segms, grad_dists); -} - -// **************************************************************************** -// * PointFaceArrayDistance * -// **************************************************************************** - -// Computes the squared euclidean distance of each p in points to each -// triangular face spanned by (v0, v1, v2) in tris. -// -// Args: -// points: FloatTensor of shape (P, 3) -// tris: FloatTensor of shape (T, 3, 3) of the triangular faces. The t-th -// triangular face is spanned by (tris[t, 0], tris[t, 1], tris[t, 2]) -// min_triangle_area: triangles less than this size are considered -// points/lines. -// -// Returns: -// dists: FloatTensor of shape (P, T), where dists[p, t] is the squared -// euclidean distance of points[p] to the face spanned by (v0, v1, v2) -// where v0 = tris[t, 0], v1 = tris[t, 1] and v2 = tris[t, 2] -// -// For pointcloud and meshes of batch size N, this function requires N -// computations. The memory occupied is O(NPT) which can become quite large. -// For example, a medium sized batch with N = 32 with P = 10000 and T = 5000 -// will require for the forward pass 5.8G of memory to store dists. - -#ifdef WITH_CUDA - -torch::Tensor PointFaceArrayDistanceForwardCuda( - const torch::Tensor& points, - const torch::Tensor& tris, - const double min_triangle_area); -#endif - -torch::Tensor PointFaceArrayDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& tris, - const double min_triangle_area); - -torch::Tensor PointFaceArrayDistanceForward( - const torch::Tensor& points, - const torch::Tensor& tris, - const double min_triangle_area) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(tris); - return PointFaceArrayDistanceForwardCuda(points, tris, min_triangle_area); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return PointFaceArrayDistanceForwardCpu(points, tris, min_triangle_area); -} - -// Backward pass for PointFaceArrayDistance. -// -// Args: -// points: FloatTensor of shape (P, 3) -// tris: FloatTensor of shape (T, 3, 3) -// grad_dists: FloatTensor of shape (P, T) -// min_triangle_area: triangles less than this size are considered -// points/lines. -// -// Returns: -// grad_points: FloatTensor of shape (P, 3) -// grad_tris: FloatTensor of shape (T, 3, 3) -// - -#ifdef WITH_CUDA -std::tuple PointFaceArrayDistanceBackwardCuda( - const torch::Tensor& points, - const torch::Tensor& tris, - const torch::Tensor& grad_dists, - const double min_triangle_area); -#endif -std::tuple PointFaceArrayDistanceBackwardCpu( - const torch::Tensor& points, - const torch::Tensor& tris, - const torch::Tensor& grad_dists, - const double min_triangle_area); - -std::tuple PointFaceArrayDistanceBackward( - const torch::Tensor& points, - const torch::Tensor& tris, - const torch::Tensor& grad_dists, - const double min_triangle_area) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(tris); - CHECK_CUDA(grad_dists); - return PointFaceArrayDistanceBackwardCuda( - points, tris, grad_dists, min_triangle_area); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return PointFaceArrayDistanceBackwardCpu( - points, tris, grad_dists, min_triangle_area); -} - -// **************************************************************************** -// * PointEdgeArrayDistance * -// **************************************************************************** - -// Computes the squared euclidean distance of each p in points to each edge -// segment in segms. -// -// Args: -// points: FloatTensor of shape (P, 3) -// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th -// edge segment is spanned by (segms[s, 0], segms[s, 1]) -// -// Returns: -// dists: FloatTensor of shape (P, S), where dists[p, s] is the squared -// euclidean distance of points[p] to the segment spanned by -// (segms[s, 0], segms[s, 1]) -// -// For pointcloud and meshes of batch size N, this function requires N -// computations. The memory occupied is O(NPS) which can become quite large. -// For example, a medium sized batch with N = 32 with P = 10000 and S = 5000 -// will require for the forward pass 5.8G of memory to store dists. - -#ifdef WITH_CUDA -torch::Tensor PointEdgeArrayDistanceForwardCuda( - const torch::Tensor& points, - const torch::Tensor& segms); -#endif - -torch::Tensor PointEdgeArrayDistanceForwardCpu( - const torch::Tensor& points, - const torch::Tensor& segms); - -torch::Tensor PointEdgeArrayDistanceForward( - const torch::Tensor& points, - const torch::Tensor& segms) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(segms); - return PointEdgeArrayDistanceForwardCuda(points, segms); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return PointEdgeArrayDistanceForwardCpu(points, segms); -} - -// Backward pass for PointEdgeArrayDistance. -// -// Args: -// points: FloatTensor of shape (P, 3) -// segms: FloatTensor of shape (S, 2, 3) -// grad_dists: FloatTensor of shape (P, S) -// -// Returns: -// grad_points: FloatTensor of shape (P, 3) -// grad_segms: FloatTensor of shape (S, 2, 3) -// - -#ifdef WITH_CUDA - -std::tuple PointEdgeArrayDistanceBackwardCuda( - const torch::Tensor& points, - const torch::Tensor& segms, - const torch::Tensor& grad_dists); -#endif - -std::tuple PointEdgeArrayDistanceBackwardCpu( - const torch::Tensor& points, - const torch::Tensor& segms, - const torch::Tensor& grad_dists); - -std::tuple PointEdgeArrayDistanceBackward( - const torch::Tensor& points, - const torch::Tensor& segms, - const torch::Tensor& grad_dists) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(segms); - CHECK_CUDA(grad_dists); - return PointEdgeArrayDistanceBackwardCuda(points, segms, grad_dists); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return PointEdgeArrayDistanceBackwardCpu(points, segms, grad_dists); -} diff --git a/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.cu b/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.cu deleted file mode 100644 index 43d4ed55a3b0c0bb6e5b0256ef62084ccebfe660..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.cu +++ /dev/null @@ -1,349 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -using at::PackedTensorAccessor64; -using at::RestrictPtrTraits; - -// A chunk of work is blocksize-many points. -// There are N clouds in the batch, and P points in each cloud. -// The number of potential chunks to do per cloud is (1+(P-1)/blocksize), -// which we call chunks_per_cloud. -// These (N*chunks_per_cloud) chunks are divided among the gridSize-many blocks. -// In block b, we work on chunks b, b+gridSize, b+2*gridSize etc . -// In chunk i, we work on cloud (i/chunks_per_cloud) on points starting from -// blocksize*(i%chunks_per_cloud). - -// Explanation of the calculation is in the cpp file. - -// EightDirections(t) runs t(a,b,c) for every combination of boolean a, b, c. -template -static __device__ void EightDirections(T&& t) { - t(false, false, false); - t(false, false, true); - t(false, true, false); - t(false, true, true); - t(true, false, false); - t(true, false, true); - t(true, true, false); - t(true, true, true); -} - -__global__ void PointsToVolumesForwardKernel( - const PackedTensorAccessor64 points_3d, - const PackedTensorAccessor64 points_features, - PackedTensorAccessor64 volume_densities, - PackedTensorAccessor64 volume_features, - PackedTensorAccessor64 grid_sizes, - PackedTensorAccessor64 mask, - const float point_weight, - const bool align_corners, - const bool splat, - const int64_t batch_size, - const int64_t P, - const int64_t n_features) { - const int64_t chunks_per_cloud = (1 + (P - 1) / blockDim.x); - const int64_t chunks_to_do = batch_size * chunks_per_cloud; - const int scale_offset = align_corners ? 1 : 0; - const float offset = align_corners ? 0 : 0.5; - for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) { - const int64_t batch_index = chunk / chunks_per_cloud; - const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud); - int64_t point_idx = start_point + threadIdx.x; - if (point_idx >= P) { - continue; - } - if (mask[batch_index][point_idx] == 0) { - continue; - } - auto volume_densities_aa = volume_densities[batch_index][0]; - auto volume_features_aa = volume_features[batch_index]; - auto point = points_3d[batch_index][point_idx]; - auto point_features = points_features[batch_index][point_idx]; - const int64_t grid_size_x = grid_sizes[batch_index][2]; - const int64_t grid_size_y = grid_sizes[batch_index][1]; - const int64_t grid_size_z = grid_sizes[batch_index][0]; - auto increment_location = - [&](int64_t x, int64_t y, int64_t z, float weight) { - if (x >= grid_size_x || y >= grid_size_y || z >= grid_size_z) { - return; - } - if (x < 0 || y < 0 || z < 0) { - return; - } - - atomicAdd(&volume_densities_aa[z][y][x], weight * point_weight); - - for (int64_t feature_idx = 0; feature_idx < n_features; - ++feature_idx) { - atomicAdd( - &volume_features_aa[feature_idx][z][y][x], - point_features[feature_idx] * weight * point_weight); - } - }; - if (!splat) { - long x = std::lround( - (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset); - long y = std::lround( - (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset); - long z = std::lround( - (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset); - increment_location(x, y, z, 1); - } else { - float x = 0, y = 0, z = 0; - float rx = std::modf( - (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset, &x); - float ry = std::modf( - (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset, &y); - float rz = std::modf( - (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset, &z); - auto handle_point = [&](bool up_x, bool up_y, bool up_z) { - float weight = - (up_x ? rx : 1 - rx) * (up_y ? ry : 1 - ry) * (up_z ? rz : 1 - rz); - increment_location(x + up_x, y + up_y, z + up_z, weight); - }; - EightDirections(handle_point); - } - } -} - -void PointsToVolumesForwardCuda( - const at::Tensor& points_3d, - const at::Tensor& points_features, - const at::Tensor& volume_densities, - const at::Tensor& volume_features, - const at::Tensor& grid_sizes, - const at::Tensor& mask, - const float point_weight, - const bool align_corners, - const bool splat) { - // Check inputs are on the same device - at::TensorArg points_3d_t{points_3d, "points_3d", 1}, - points_features_t{points_features, "points_features", 2}, - volume_densities_t{volume_densities, "volume_densities", 3}, - volume_features_t{volume_features, "volume_features", 4}, - grid_sizes_t{grid_sizes, "grid_sizes", 5}, mask_t{mask, "mask", 6}; - at::CheckedFrom c = "PointsToVolumesForwardCuda"; - at::checkAllSameGPU( - c, - {points_3d_t, - points_features_t, - volume_densities_t, - volume_features_t, - grid_sizes_t, - mask_t}); - - // This is nondeterministic because atomicAdd - at::globalContext().alertNotDeterministic("PointsToVolumesForwardCuda"); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(points_3d.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int blocks = 1024; - const int threads = 32; - - const int64_t batch_size = points_3d.size(0); - const int64_t P = points_3d.size(1); - const int64_t n_features = points_features.size(2); - - PointsToVolumesForwardKernel<<>>( - points_3d.packed_accessor64(), - points_features.packed_accessor64(), - volume_densities.packed_accessor64(), - volume_features.packed_accessor64(), - grid_sizes.packed_accessor64(), - mask.packed_accessor64(), - point_weight, - align_corners, - splat, - batch_size, - P, - n_features); -} - -__global__ void PointsToVolumesBackwardKernel( - const PackedTensorAccessor64 points_3d, - const PackedTensorAccessor64 points_features, - const PackedTensorAccessor64 grid_sizes, - const PackedTensorAccessor64 mask, - PackedTensorAccessor64 grad_volume_densities, - PackedTensorAccessor64 grad_volume_features, - PackedTensorAccessor64 grad_points_3d, - PackedTensorAccessor64 grad_points_features, - const float point_weight, - const bool align_corners, - const bool splat, - const int64_t batch_size, - const int64_t P, - const int64_t n_features) { - const int64_t chunks_per_cloud = (1 + (P - 1) / blockDim.x); - const int64_t chunks_to_do = batch_size * chunks_per_cloud; - const int scale_offset = align_corners ? 1 : 0; - const float offset = align_corners ? 0 : 0.5; - // Note that the gradients belonging to each point are only touched by - // a single thread in one of our "chunks", which is in a single block. - // So unlike in the forward pass, there's no need for atomics here. - for (int64_t chunk = blockIdx.x; chunk < chunks_to_do; chunk += gridDim.x) { - const int64_t batch_index = chunk / chunks_per_cloud; - const int64_t start_point = blockDim.x * (chunk % chunks_per_cloud); - int64_t point_idx = start_point + threadIdx.x; - if (point_idx >= P) { - continue; - } - if (mask[batch_index][point_idx] == 0) { - continue; - } - auto point = points_3d[batch_index][point_idx]; - auto point_features = points_features[batch_index][point_idx]; - auto grad_point = grad_points_3d[batch_index][point_idx]; - auto grad_point_features = grad_points_features[batch_index][point_idx]; - auto grad_volume_densities_a = grad_volume_densities[batch_index][0]; - auto grad_volume_features_a = grad_volume_features[batch_index]; - const int64_t grid_size_x = grid_sizes[batch_index][2]; - const int64_t grid_size_y = grid_sizes[batch_index][1]; - const int64_t grid_size_z = grid_sizes[batch_index][0]; - - auto increment_location = - [&](int64_t x, int64_t y, int64_t z, float weight) { - if (x >= grid_size_x || y >= grid_size_y || z >= grid_size_z) { - return false; - } - if (x < 0 || y < 0 || z < 0) { - return false; - } - - // This is a forward line, for comparison - // volume_densities_aa[z][y][x] += weight * point_weight; - - for (int64_t feature_idx = 0; feature_idx < n_features; - ++feature_idx) { - // This is a forward line, for comparison - // volume_features_aa[feature_idx][z][y][x] += - // point_features[feature_idx] * weight * point_weight; - grad_point_features[feature_idx] += - grad_volume_features_a[feature_idx][z][y][x] * weight * - point_weight; - } - return true; - }; - - if (!splat) { - long x = std::lround( - (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset); - long y = std::lround( - (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset); - long z = std::lround( - (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset); - increment_location(x, y, z, 1); - } else { - float x = 0, y = 0, z = 0; - float rx = std::modf( - (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset, &x); - float ry = std::modf( - (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset, &y); - float rz = std::modf( - (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset, &z); - auto handle_point = [&](bool up_x, bool up_y, bool up_z) { - float weight_x = (up_x ? rx : 1 - rx); - float weight_y = (up_y ? ry : 1 - ry); - float weight_z = (up_z ? rz : 1 - rz); - float weight = weight_x * weight_y * weight_z; - if (increment_location(x + up_x, y + up_y, z + up_z, weight)) { - // weight * point_weight has been added to - // volume_densities_aa[z+up_z][y+up_y][x+up_x] - // Also for each feature_idx, - // point_features[feature_idx] * weight * point_weight - // has been added to - // volume_features_aa[feature_idx][z+up_z][y+up_y][x+up_x] - - double source_gradient = - grad_volume_densities_a[z + up_z][y + up_y][x + up_x]; - for (int64_t feature_idx = 0; feature_idx < n_features; - ++feature_idx) { - source_gradient += point_features[feature_idx] * - grad_volume_features_a[feature_idx][z + up_z][y + up_y] - [x + up_x]; - } - grad_point[0] += source_gradient * (up_x ? 1 : -1) * weight_y * - weight_z * 0.5 * (grid_size_x - scale_offset) * point_weight; - grad_point[1] += source_gradient * (up_y ? 1 : -1) * weight_x * - weight_z * 0.5 * (grid_size_y - scale_offset) * point_weight; - grad_point[2] += source_gradient * (up_z ? 1 : -1) * weight_x * - weight_y * 0.5 * (grid_size_z - scale_offset) * point_weight; - } - }; - EightDirections(handle_point); - } - } -} - -void PointsToVolumesBackwardCuda( - const at::Tensor& points_3d, - const at::Tensor& points_features, - const at::Tensor& grid_sizes, - const at::Tensor& mask, - const float point_weight, - const bool align_corners, - const bool splat, - const at::Tensor& grad_volume_densities, - const at::Tensor& grad_volume_features, - const at::Tensor& grad_points_3d, - const at::Tensor& grad_points_features) { - // Check inputs are on the same device - at::TensorArg points_3d_t{points_3d, "points_3d", 1}, - points_features_t{points_features, "points_features", 2}, - grid_sizes_t{grid_sizes, "grid_sizes", 3}, mask_t{mask, "mask", 4}, - grad_volume_densities_t{ - grad_volume_densities, "grad_volume_densities", 8}, - grad_volume_features_t{grad_volume_features, "grad_volume_features", 9}, - grad_points_3d_t{grad_points_3d, "grad_points_3d", 10}, - grad_points_features_t{grad_points_features, "grad_points_features", 11}; - - at::CheckedFrom c = "PointsToVolumesBackwardCuda"; - at::checkAllSameGPU( - c, - {points_3d_t, - points_features_t, - grid_sizes_t, - mask_t, - grad_volume_densities_t, - grad_volume_features_t, - grad_points_3d_t, - grad_points_features_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(points_3d.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int blocks = 1024; - const int threads = 32; - - const int64_t batch_size = points_3d.size(0); - const int64_t P = points_3d.size(1); - const int64_t n_features = points_features.size(2); - - PointsToVolumesBackwardKernel<<>>( - points_3d.packed_accessor64(), - points_features.packed_accessor64(), - grid_sizes.packed_accessor64(), - mask.packed_accessor64(), - grad_volume_densities.packed_accessor64(), - grad_volume_features.packed_accessor64(), - grad_points_3d.packed_accessor64(), - grad_points_features.packed_accessor64(), - point_weight, - align_corners, - splat, - batch_size, - P, - n_features); -} diff --git a/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.h b/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.h deleted file mode 100644 index 4c5eba3c9e55f7dc81f1df8a7e6698abdec33d70..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include -#include -#include "utils/pytorch3d_cutils.h" - -/* - volume_features and volume_densities are modified in place. - - Args: - points_3d: Batch of 3D point cloud coordinates of shape - `(minibatch, N, 3)` where N is the number of points - in each point cloud. Coordinates have to be specified in the - local volume coordinates (ranging in [-1, 1]). - points_features: Features of shape `(minibatch, N, feature_dim)` - corresponding to the points of the input point cloud `points_3d`. - volume_features: Batch of input feature volumes - of shape `(minibatch, feature_dim, D, H, W)` - volume_densities: Batch of input feature volume densities - of shape `(minibatch, 1, D, H, W)`. Each voxel should - contain a non-negative number corresponding to its - opaqueness (the higher, the less transparent). - - grid_sizes: `LongTensor` of shape (minibatch, 3) representing the - spatial resolutions of each of the the non-flattened `volumes` - tensors. Note that the following has to hold: - `torch.prod(grid_sizes, dim=1)==N_voxels`. - - point_weight: A scalar controlling how much weight a single point has. - - mask: A binary mask of shape `(minibatch, N)` determining - which 3D points are going to be converted to the resulting - volume. Set to `None` if all points are valid. - - align_corners: as for grid_sample. - - splat: if true, trilinear interpolation. If false all the weight goes in - the nearest voxel. -*/ - -void PointsToVolumesForwardCpu( - const torch::Tensor& points_3d, - const torch::Tensor& points_features, - const torch::Tensor& volume_densities, - const torch::Tensor& volume_features, - const torch::Tensor& grid_sizes, - const torch::Tensor& mask, - float point_weight, - bool align_corners, - bool splat); - -void PointsToVolumesForwardCuda( - const torch::Tensor& points_3d, - const torch::Tensor& points_features, - const torch::Tensor& volume_densities, - const torch::Tensor& volume_features, - const torch::Tensor& grid_sizes, - const torch::Tensor& mask, - float point_weight, - bool align_corners, - bool splat); - -inline void PointsToVolumesForward( - const torch::Tensor& points_3d, - const torch::Tensor& points_features, - const torch::Tensor& volume_densities, - const torch::Tensor& volume_features, - const torch::Tensor& grid_sizes, - const torch::Tensor& mask, - float point_weight, - bool align_corners, - bool splat) { - if (points_3d.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points_3d); - CHECK_CUDA(points_features); - CHECK_CUDA(volume_densities); - CHECK_CUDA(volume_features); - CHECK_CUDA(grid_sizes); - CHECK_CUDA(mask); - PointsToVolumesForwardCuda( - points_3d, - points_features, - volume_densities, - volume_features, - grid_sizes, - mask, - point_weight, - align_corners, - splat); - torch::autograd::increment_version(volume_features); - torch::autograd::increment_version(volume_densities); - return; -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - PointsToVolumesForwardCpu( - points_3d, - points_features, - volume_densities, - volume_features, - grid_sizes, - mask, - point_weight, - align_corners, - splat); -} - -// grad_points_3d and grad_points_features are modified in place. - -void PointsToVolumesBackwardCpu( - const torch::Tensor& points_3d, - const torch::Tensor& points_features, - const torch::Tensor& grid_sizes, - const torch::Tensor& mask, - float point_weight, - bool align_corners, - bool splat, - const torch::Tensor& grad_volume_densities, - const torch::Tensor& grad_volume_features, - const torch::Tensor& grad_points_3d, - const torch::Tensor& grad_points_features); - -void PointsToVolumesBackwardCuda( - const torch::Tensor& points_3d, - const torch::Tensor& points_features, - const torch::Tensor& grid_sizes, - const torch::Tensor& mask, - float point_weight, - bool align_corners, - bool splat, - const torch::Tensor& grad_volume_densities, - const torch::Tensor& grad_volume_features, - const torch::Tensor& grad_points_3d, - const torch::Tensor& grad_points_features); - -inline void PointsToVolumesBackward( - const torch::Tensor& points_3d, - const torch::Tensor& points_features, - const torch::Tensor& grid_sizes, - const torch::Tensor& mask, - float point_weight, - bool align_corners, - bool splat, - const torch::Tensor& grad_volume_densities, - const torch::Tensor& grad_volume_features, - const torch::Tensor& grad_points_3d, - const torch::Tensor& grad_points_features) { - if (points_3d.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points_3d); - CHECK_CUDA(points_features); - CHECK_CUDA(grid_sizes); - CHECK_CUDA(mask); - CHECK_CUDA(grad_volume_densities); - CHECK_CUDA(grad_volume_features); - CHECK_CUDA(grad_points_3d); - CHECK_CUDA(grad_points_features); - PointsToVolumesBackwardCuda( - points_3d, - points_features, - grid_sizes, - mask, - point_weight, - align_corners, - splat, - grad_volume_densities, - grad_volume_features, - grad_points_3d, - grad_points_features); - return; -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - PointsToVolumesBackwardCpu( - points_3d, - points_features, - grid_sizes, - mask, - point_weight, - align_corners, - splat, - grad_volume_densities, - grad_volume_features, - grad_points_3d, - grad_points_features); -} diff --git a/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes_cpu.cpp b/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes_cpu.cpp deleted file mode 100644 index 900ea097a9a6e734c694305ca8f78b3e4d6eccbf..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/points_to_volumes/points_to_volumes_cpu.cpp +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include - -// In the x direction, the location {0, ..., grid_size_x - 1} correspond to -// points px in [-1, 1]. There are two ways to do this. - -// If align_corners=True, px=-1 is the exact location 0 and px=1 is the exact -// location grid_size_x - 1. -// So the location of px is {(px + 1) * 0.5} * (grid_size_x - 1). -// Note that if you generate random points within the bounds you are less likely -// to hit the edge locations than other locations. -// This can be thought of as saying "location i" means a specific point. - -// If align_corners=False, px=-1 is half way between the exact location 0 and -// the non-existent location -1, i.e. location -0.5. -// Similarly px=1 is is half way between the exact location grid_size_x-1 and -// the non-existent location grid_size, i.e. the location grid_size_x - 0.5. -// So the location of px is ({(px + 1) * 0.5} * grid_size_x) - 0.5. -// Note that if you generate random points within the bounds you are equally -// likely to hit any location. -// This can be thought of as saying "location i" means the whole box from -// (i-0.5) to (i+0.5) - -// EightDirections(t) runs t(a,b,c) for every combination of boolean a, b, c. -template -static void EightDirections(T&& t) { - t(false, false, false); - t(false, false, true); - t(false, true, false); - t(false, true, true); - t(true, false, false); - t(true, false, true); - t(true, true, false); - t(true, true, true); -} - -void PointsToVolumesForwardCpu( - const torch::Tensor& points_3d, - const torch::Tensor& points_features, - const torch::Tensor& volume_densities, - const torch::Tensor& volume_features, - const torch::Tensor& grid_sizes, - const torch::Tensor& mask, - const float point_weight, - const bool align_corners, - const bool splat) { - const int64_t batch_size = points_3d.size(0); - const int64_t P = points_3d.size(1); - const int64_t n_features = points_features.size(2); - - // We unify the formula for the location of px in the comment above as - // ({(px + 1) * 0.5} * (grid_size_x-scale_offset)) - offset. - const int scale_offset = align_corners ? 1 : 0; - const float offset = align_corners ? 0 : 0.5; - - auto points_3d_a = points_3d.accessor(); - auto points_features_a = points_features.accessor(); - auto volume_densities_a = volume_densities.accessor(); - auto volume_features_a = volume_features.accessor(); - auto grid_sizes_a = grid_sizes.accessor(); - auto mask_a = mask.accessor(); - - // For each batch element - for (int64_t batch_idx = 0; batch_idx < batch_size; ++batch_idx) { - auto points_3d_aa = points_3d_a[batch_idx]; - auto points_features_aa = points_features_a[batch_idx]; - auto volume_densities_aa = volume_densities_a[batch_idx][0]; - auto volume_features_aa = volume_features_a[batch_idx]; - auto grid_sizes_aa = grid_sizes_a[batch_idx]; - auto mask_aa = mask_a[batch_idx]; - - const int64_t grid_size_x = grid_sizes_aa[2]; - const int64_t grid_size_y = grid_sizes_aa[1]; - const int64_t grid_size_z = grid_sizes_aa[0]; - - // For each point - for (int64_t point_idx = 0; point_idx < P; ++point_idx) { - // Ignore point if mask is 0 - if (mask_aa[point_idx] == 0) { - continue; - } - auto point = points_3d_aa[point_idx]; - auto point_features = points_features_aa[point_idx]; - - // Define how to increment a location in the volume by an amount. The need - // for this depends on the interpolation method: - // once per point for nearest, eight times for splat. - auto increment_location = - [&](int64_t x, int64_t y, int64_t z, float weight) { - if (x >= grid_size_x || y >= grid_size_y || z >= grid_size_z) { - return; - } - if (x < 0 || y < 0 || z < 0) { - return; - } - - volume_densities_aa[z][y][x] += weight * point_weight; - - for (int64_t feature_idx = 0; feature_idx < n_features; - ++feature_idx) { - volume_features_aa[feature_idx][z][y][x] += - point_features[feature_idx] * weight * point_weight; - } - }; - - if (!splat) { - // Increment the location nearest the point. - long x = std::lround( - (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset); - long y = std::lround( - (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset); - long z = std::lround( - (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset); - increment_location(x, y, z, 1); - } else { - // There are 8 locations around the point which we need to worry about. - // Their coordinates are (x or x+1, y or y+1, z or z+1). - // rx is a number between 0 and 1 for the proportion in the x direction: - // rx==0 means weight all on the lower bound, x, rx=1-eps means most - // weight on x+1. Ditto for ry and yz. - float x = 0, y = 0, z = 0; - float rx = std::modf( - (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset, &x); - float ry = std::modf( - (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset, &y); - float rz = std::modf( - (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset, &z); - // Define how to fractionally increment one of the 8 locations around - // the point. - auto handle_point = [&](bool up_x, bool up_y, bool up_z) { - float weight = (up_x ? rx : 1 - rx) * (up_y ? ry : 1 - ry) * - (up_z ? rz : 1 - rz); - increment_location(x + up_x, y + up_y, z + up_z, weight); - }; - // and do so. - EightDirections(handle_point); - } - } - } - torch::autograd::increment_version(volume_features); - torch::autograd::increment_version(volume_densities); -} - -// With nearest, the only smooth dependence is that volume features -// depend on points features. -// -// With splat, the dependencies are as follows, with gradients passing -// in the opposite direction. -// -// points_3d points_features -// β”‚ β”‚ β”‚ -// β”‚ β”‚ β”‚ -// β”‚ └───────────┐ β”‚ -// β”‚ β”‚ β”‚ -// β”‚ β”‚ β”‚ -// β–Ό β–Ό β–Ό -// volume_densities volume_features - -// It is also the case that the input volume_densities and -// volume_features affect the corresponding outputs (they are -// modified in place). -// But the forward pass just increments these by a value which -// does not depend on them. So our autograd backwards pass needs -// to copy the gradient for each of those outputs to the -// corresponding input. We just do that in the Python layer. - -void PointsToVolumesBackwardCpu( - const torch::Tensor& points_3d, - const torch::Tensor& points_features, - const torch::Tensor& grid_sizes, - const torch::Tensor& mask, - const float point_weight, - const bool align_corners, - const bool splat, - const torch::Tensor& grad_volume_densities, - const torch::Tensor& grad_volume_features, - const torch::Tensor& grad_points_3d, - const torch::Tensor& grad_points_features) { - const int64_t batch_size = points_3d.size(0); - const int64_t P = points_3d.size(1); - const int64_t n_features = grad_points_features.size(2); - const int scale_offset = align_corners ? 1 : 0; - const float offset = align_corners ? 0 : 0.5; - - auto points_3d_a = points_3d.accessor(); - auto points_features_a = points_features.accessor(); - auto grid_sizes_a = grid_sizes.accessor(); - auto mask_a = mask.accessor(); - auto grad_volume_densities_a = grad_volume_densities.accessor(); - auto grad_volume_features_a = grad_volume_features.accessor(); - auto grad_points_3d_a = grad_points_3d.accessor(); - auto grad_points_features_a = grad_points_features.accessor(); - - // For each batch element - for (int64_t batch_idx = 0; batch_idx < batch_size; ++batch_idx) { - auto points_3d_aa = points_3d_a[batch_idx]; - auto points_features_aa = points_features_a[batch_idx]; - auto grid_sizes_aa = grid_sizes_a[batch_idx]; - auto mask_aa = mask_a[batch_idx]; - auto grad_volume_densities_aa = grad_volume_densities_a[batch_idx][0]; - auto grad_volume_features_aa = grad_volume_features_a[batch_idx]; - auto grad_points_3d_aa = grad_points_3d_a[batch_idx]; - auto grad_points_features_aa = grad_points_features_a[batch_idx]; - - const int64_t grid_size_x = grid_sizes_aa[2]; - const int64_t grid_size_y = grid_sizes_aa[1]; - const int64_t grid_size_z = grid_sizes_aa[0]; - - // For each point - for (int64_t point_idx = 0; point_idx < P; ++point_idx) { - if (mask_aa[point_idx] == 0) { - continue; - } - auto point = points_3d_aa[point_idx]; - auto point_features = points_features_aa[point_idx]; - auto grad_point_features = grad_points_features_aa[point_idx]; - auto grad_point = grad_points_3d_aa[point_idx]; - - // Define how to (backwards) increment a location in the point cloud, - // to take gradients to the features. - // We return false if the location does not really exist, so there was - // nothing to do. - // This happens once per point for nearest, eight times for splat. - auto increment_location = - [&](int64_t x, int64_t y, int64_t z, float weight) { - if (x >= grid_size_x || y >= grid_size_y || z >= grid_size_z) { - return false; - } - if (x < 0 || y < 0 || z < 0) { - return false; - } - - for (int64_t feature_idx = 0; feature_idx < n_features; - ++feature_idx) { - // This is a forward line, for comparison - // volume_features_aa[feature_idx][z][y][x] += - // point_features[feature_idx] * weight * point_weight; - grad_point_features[feature_idx] += - grad_volume_features_aa[feature_idx][z][y][x] * weight * - point_weight; - } - return true; - }; - - if (!splat) { - long x = std::lround( - (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset); - long y = std::lround( - (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset); - long z = std::lround( - (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset); - increment_location(x, y, z, 1); - } else { - float x = 0, y = 0, z = 0; - float rx = std::modf( - (point[0] + 1) * 0.5 * (grid_size_x - scale_offset) - offset, &x); - float ry = std::modf( - (point[1] + 1) * 0.5 * (grid_size_y - scale_offset) - offset, &y); - float rz = std::modf( - (point[2] + 1) * 0.5 * (grid_size_z - scale_offset) - offset, &z); - auto handle_point = [&](bool up_x, bool up_y, bool up_z) { - float weight_x = (up_x ? rx : 1 - rx); - float weight_y = (up_y ? ry : 1 - ry); - float weight_z = (up_z ? rz : 1 - rz); - float weight = weight_x * weight_y * weight_z; - // For each of the eight locations, we first increment the feature - // gradient. - if (increment_location(x + up_x, y + up_y, z + up_z, weight)) { - // If the location is a real location, we also (in this splat - // case) need to update the gradient w.r.t. the point position. - // - the amount in this location is controlled by the weight. - // There are two contributions: - // (1) The point position affects how much density we added - // to the location's density, so we have a contribution - // from grad_volume_density. Specifically, - // weight * point_weight has been added to - // volume_densities_aa[z+up_z][y+up_y][x+up_x] - // - // (2) The point position affects how much of each of the - // point's features were added to the corresponding feature - // of this location, so we have a contribution from - // grad_volume_features. Specifically, for each feature_idx, - // point_features[feature_idx] * weight * point_weight - // has been added to - // volume_features_aa[feature_idx][z+up_z][y+up_y][x+up_x] - - float source_gradient = - grad_volume_densities_aa[z + up_z][y + up_y][x + up_x]; - for (int64_t feature_idx = 0; feature_idx < n_features; - ++feature_idx) { - source_gradient += point_features[feature_idx] * - grad_volume_features_aa[feature_idx][z + up_z][y + up_y] - [x + up_x]; - } - grad_point[0] += source_gradient * (up_x ? 1 : -1) * weight_y * - weight_z * 0.5 * (grid_size_x - scale_offset) * point_weight; - grad_point[1] += source_gradient * (up_y ? 1 : -1) * weight_x * - weight_z * 0.5 * (grid_size_y - scale_offset) * point_weight; - grad_point[2] += source_gradient * (up_z ? 1 : -1) * weight_x * - weight_y * 0.5 * (grid_size_z - scale_offset) * point_weight; - } - }; - EightDirections(handle_point); - } - } - } -} diff --git a/pytorch3d/pytorch3d/csrc/pulsar/constants.h b/pytorch3d/pytorch3d/csrc/pulsar/constants.h deleted file mode 100644 index a2eee6217158d3a2e7a3e92a52e5afa4107494ab..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/constants.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_CONSTANTS_H_ -#define PULSAR_NATIVE_CONSTANTS_H_ - -#define EPS 1E-6 -#define FEPS 1E-6f -#define MAX_FLOAT 3.4E38f -#define MAX_INT 2147483647 -#define MAX_UINT 4294967295u -#define MAX_USHORT 65535u - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/README.md b/pytorch3d/pytorch3d/csrc/pulsar/cuda/README.md deleted file mode 100644 index 60c5d07cba3b8d403693e9aa3db2a0b74f66c472..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# CUDA device compilation units - -This folder contains `.cu` files to create compilation units -for device-specific functions. See `../include/README.md` for -more information. diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/commands.h b/pytorch3d/pytorch3d/csrc/pulsar/cuda/commands.h deleted file mode 100644 index 00e6f37852169c6dd3ccaaf02d0381039fe2edbc..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/commands.h +++ /dev/null @@ -1,505 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_CUDA_COMMANDS_H_ -#define PULSAR_NATIVE_CUDA_COMMANDS_H_ - -// Definitions for GPU commands. -#include -#include -namespace cg = cooperative_groups; - -#ifdef __DRIVER_TYPES_H__ -#ifndef DEVICE_RESET -#define DEVICE_RESET cudaDeviceReset(); -#endif -#else -#ifndef DEVICE_RESET -#define DEVICE_RESET -#endif -#endif - -#define HANDLECUDA(CMD) CMD -// handleCudaError((CMD), __FILE__, __LINE__) -inline void -handleCudaError(const cudaError_t err, const char* file, const int line) { - if (err != cudaSuccess) { -#ifndef __NVCC__ - fprintf( - stderr, - "%s(%i) : getLastCudaError() CUDA error :" - " (%d) %s.\n", - file, - line, - static_cast(err), - cudaGetErrorString(err)); - DEVICE_RESET - exit(1); -#endif - } -} -inline void -getLastCudaError(const char* errorMessage, const char* file, const int line) { - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "Error: %s.", errorMessage); - handleCudaError(err, file, line); - } -} - -#define ALIGN(VAL) __align__(VAL) -#define SYNC() HANDLECUDE(cudaDeviceSynchronize()) -#define THREADFENCE_B() __threadfence_block() -#define SHFL_SYNC(a, b, c) __shfl_sync((a), (b), (c)) -#define SHARED __shared__ -#define ACTIVEMASK() __activemask() -#define BALLOT(mask, val) __ballot_sync((mask), val) -/** - * Find the cumulative sum within a warp up to the current - * thread lane, with each mask thread contributing base. - */ -template -DEVICE T -WARP_CUMSUM(const cg::coalesced_group& group, const uint& mask, const T& base) { - T ret = base; - T shfl_val; - shfl_val = __shfl_down_sync(mask, ret, 1u); // Deactivate the rightmost lane. - ret += (group.thread_rank() < 31) * shfl_val; - shfl_val = __shfl_down_sync(mask, ret, 2u); - ret += (group.thread_rank() < 30) * shfl_val; - shfl_val = __shfl_down_sync(mask, ret, 4u); // ...4 - ret += (group.thread_rank() < 28) * shfl_val; - shfl_val = __shfl_down_sync(mask, ret, 8u); // ...8 - ret += (group.thread_rank() < 24) * shfl_val; - shfl_val = __shfl_down_sync(mask, ret, 16u); // ...16 - ret += (group.thread_rank() < 16) * shfl_val; - return ret; -} - -template -DEVICE T -WARP_MAX(const cg::coalesced_group& group, const uint& mask, const T& base) { - T ret = base; - ret = max(ret, __shfl_down_sync(mask, ret, 16u)); - ret = max(ret, __shfl_down_sync(mask, ret, 8u)); - ret = max(ret, __shfl_down_sync(mask, ret, 4u)); - ret = max(ret, __shfl_down_sync(mask, ret, 2u)); - ret = max(ret, __shfl_down_sync(mask, ret, 1u)); - return ret; -} - -template -DEVICE T -WARP_SUM(const cg::coalesced_group& group, const uint& mask, const T& base) { - T ret = base; - ret = ret + __shfl_down_sync(mask, ret, 16u); - ret = ret + __shfl_down_sync(mask, ret, 8u); - ret = ret + __shfl_down_sync(mask, ret, 4u); - ret = ret + __shfl_down_sync(mask, ret, 2u); - ret = ret + __shfl_down_sync(mask, ret, 1u); - return ret; -} - -INLINE DEVICE float3 WARP_SUM_FLOAT3( - const cg::coalesced_group& group, - const uint& mask, - const float3& base) { - float3 ret = base; - ret.x = WARP_SUM(group, mask, base.x); - ret.y = WARP_SUM(group, mask, base.y); - ret.z = WARP_SUM(group, mask, base.z); - return ret; -} - -// Floating point. -// #define FMUL(a, b) __fmul_rn((a), (b)) -#define FMUL(a, b) ((a) * (b)) -#define FDIV(a, b) __fdiv_rn((a), (b)) -// #define FSUB(a, b) __fsub_rn((a), (b)) -#define FSUB(a, b) ((a) - (b)) -#define FADD(a, b) __fadd_rn((a), (b)) -#define FSQRT(a) __fsqrt_rn(a) -#define FEXP(a) fasterexp(a) -#define FLN(a) fasterlog(a) -#define FPOW(a, b) __powf((a), (b)) -#define FMAX(a, b) fmax((a), (b)) -#define FMIN(a, b) fmin((a), (b)) -#define FCEIL(a) ceilf(a) -#define FFLOOR(a) floorf(a) -#define FROUND(x) nearbyintf(x) -#define FSATURATE(x) __saturatef(x) -#define FABS(a) abs(a) -#define IASF(a, loc) (loc) = __int_as_float(a) -#define FASI(a, loc) (loc) = __float_as_int(a) -#define FABSLEQAS(a, b, c) \ - ((a) <= (b) ? FSUB((b), (a)) <= (c) : FSUB((a), (b)) < (c)) -/** Calculates x*y+z. */ -#define FMA(x, y, z) __fmaf_rn((x), (y), (z)) -#define I2F(a) __int2float_rn(a) -#define FRCP(x) __frcp_rn(x) -__device__ static float atomicMax(float* address, float val) { - int* address_as_i = (int*)address; - int old = *address_as_i, assumed; - do { - assumed = old; - old = ::atomicCAS( - address_as_i, - assumed, - __float_as_int(::fmaxf(val, __int_as_float(assumed)))); - } while (assumed != old); - return __int_as_float(old); -} -__device__ static float atomicMin(float* address, float val) { - int* address_as_i = (int*)address; - int old = *address_as_i, assumed; - do { - assumed = old; - old = ::atomicCAS( - address_as_i, - assumed, - __float_as_int(::fminf(val, __int_as_float(assumed)))); - } while (assumed != old); - return __int_as_float(old); -} -#define DMAX(a, b) FMAX(a, b) -#define DMIN(a, b) FMIN(a, b) -#define DSQRT(a) sqrt(a) -#define DSATURATE(a) DMIN(1., DMAX(0., (a))) -// half -#define HADD(a, b) __hadd((a), (b)) -#define HSUB2(a, b) __hsub2((a), (b)) -#define HMUL2(a, b) __hmul2((a), (b)) -#define HSQRT(a) hsqrt(a) - -// uint. -#define CLZ(VAL) __clz(VAL) -#define POPC(a) __popc(a) -// -// -// -// -// -// -// -// -// -#define ATOMICADD(PTR, VAL) atomicAdd((PTR), (VAL)) -#define ATOMICADD_F3(PTR, VAL) \ - ATOMICADD(&((PTR)->x), VAL.x); \ - ATOMICADD(&((PTR)->y), VAL.y); \ - ATOMICADD(&((PTR)->z), VAL.z); -#if (CUDART_VERSION >= 10000) && (__CUDA_ARCH__ >= 600) -#define ATOMICADD_B(PTR, VAL) atomicAdd_block((PTR), (VAL)) -#else -#define ATOMICADD_B(PTR, VAL) ATOMICADD(PTR, VAL) -#endif -// -// -// -// -// int. -#define IMIN(a, b) min((a), (b)) -#define IMAX(a, b) max((a), (b)) -#define IABS(a) abs(a) - -// Checks. -// like TORCH_CHECK_ARG in PyTorch > 1.10 -#define ARGCHECK(cond, argN, ...) \ - TORCH_CHECK(cond, "invalid argument ", argN, ": ", __VA_ARGS__) - -// Math. -#define NORM3DF(x, y, z) norm3df(x, y, z) -#define RNORM3DF(x, y, z) rnorm3df(x, y, z) - -// High level. -#define GET_SORT_WS_SIZE(RES_PTR, KEY_TYPE, VAL_TYPE, NUM_OBJECTS) \ - cub::DeviceRadixSort::SortPairsDescending( \ - (void*)NULL, \ - *(RES_PTR), \ - reinterpret_cast(NULL), \ - reinterpret_cast(NULL), \ - reinterpret_cast(NULL), \ - reinterpret_cast(NULL), \ - (NUM_OBJECTS)); -#define GET_REDUCE_WS_SIZE(RES_PTR, TYPE, REDUCE_OP, NUM_OBJECTS) \ - { \ - TYPE init = TYPE(); \ - cub::DeviceReduce::Reduce( \ - (void*)NULL, \ - *(RES_PTR), \ - (TYPE*)NULL, \ - (TYPE*)NULL, \ - (NUM_OBJECTS), \ - (REDUCE_OP), \ - init); \ - } -#define GET_SELECT_WS_SIZE( \ - RES_PTR, TYPE_SELECTOR, TYPE_SELECTION, NUM_OBJECTS) \ - { \ - cub::DeviceSelect::Flagged( \ - (void*)NULL, \ - *(RES_PTR), \ - (TYPE_SELECTION*)NULL, \ - (TYPE_SELECTOR*)NULL, \ - (TYPE_SELECTION*)NULL, \ - (int*)NULL, \ - (NUM_OBJECTS)); \ - } -#define GET_SUM_WS_SIZE(RES_PTR, TYPE_SUM, NUM_OBJECTS) \ - { \ - cub::DeviceReduce::Sum( \ - (void*)NULL, \ - *(RES_PTR), \ - (TYPE_SUM*)NULL, \ - (TYPE_SUM*)NULL, \ - NUM_OBJECTS); \ - } -#define GET_MM_WS_SIZE(RES_PTR, TYPE, NUM_OBJECTS) \ - { \ - TYPE init = TYPE(); \ - cub::DeviceReduce::Max( \ - (void*)NULL, *(RES_PTR), (TYPE*)NULL, (TYPE*)NULL, (NUM_OBJECTS)); \ - } -#define SORT_DESCENDING( \ - TMPN1, SORT_PTR, SORTED_PTR, VAL_PTR, VAL_SORTED_PTR, NUM_OBJECTS) \ - void* TMPN1 = NULL; \ - size_t TMPN1##_bytes = 0; \ - cub::DeviceRadixSort::SortPairsDescending( \ - TMPN1, \ - TMPN1##_bytes, \ - (SORT_PTR), \ - (SORTED_PTR), \ - (VAL_PTR), \ - (VAL_SORTED_PTR), \ - (NUM_OBJECTS)); \ - HANDLECUDA(cudaMalloc(&TMPN1, TMPN1##_bytes)); \ - cub::DeviceRadixSort::SortPairsDescending( \ - TMPN1, \ - TMPN1##_bytes, \ - (SORT_PTR), \ - (SORTED_PTR), \ - (VAL_PTR), \ - (VAL_SORTED_PTR), \ - (NUM_OBJECTS)); \ - HANDLECUDA(cudaFree(TMPN1)); -#define SORT_DESCENDING_WS( \ - TMPN1, \ - SORT_PTR, \ - SORTED_PTR, \ - VAL_PTR, \ - VAL_SORTED_PTR, \ - NUM_OBJECTS, \ - WORKSPACE_PTR, \ - WORKSPACE_BYTES) \ - cub::DeviceRadixSort::SortPairsDescending( \ - (WORKSPACE_PTR), \ - (WORKSPACE_BYTES), \ - (SORT_PTR), \ - (SORTED_PTR), \ - (VAL_PTR), \ - (VAL_SORTED_PTR), \ - (NUM_OBJECTS)); -#define SORT_ASCENDING_WS( \ - SORT_PTR, \ - SORTED_PTR, \ - VAL_PTR, \ - VAL_SORTED_PTR, \ - NUM_OBJECTS, \ - WORKSPACE_PTR, \ - WORKSPACE_BYTES, \ - STREAM) \ - cub::DeviceRadixSort::SortPairs( \ - (WORKSPACE_PTR), \ - (WORKSPACE_BYTES), \ - (SORT_PTR), \ - (SORTED_PTR), \ - (VAL_PTR), \ - (VAL_SORTED_PTR), \ - (NUM_OBJECTS), \ - 0, \ - sizeof(*(SORT_PTR)) * 8, \ - (STREAM)); -#define SUM_WS( \ - SUM_PTR, OUT_PTR, NUM_OBJECTS, WORKSPACE_PTR, WORKSPACE_BYTES, STREAM) \ - cub::DeviceReduce::Sum( \ - (WORKSPACE_PTR), \ - (WORKSPACE_BYTES), \ - (SUM_PTR), \ - (OUT_PTR), \ - (NUM_OBJECTS), \ - (STREAM)); -#define MIN_WS( \ - MIN_PTR, OUT_PTR, NUM_OBJECTS, WORKSPACE_PTR, WORKSPACE_BYTES, STREAM) \ - cub::DeviceReduce::Min( \ - (WORKSPACE_PTR), \ - (WORKSPACE_BYTES), \ - (MIN_PTR), \ - (OUT_PTR), \ - (NUM_OBJECTS), \ - (STREAM)); -#define MAX_WS( \ - MAX_PTR, OUT_PTR, NUM_OBJECTS, WORKSPACE_PTR, WORKSPACE_BYTES, STREAM) \ - cub::DeviceReduce::Min( \ - (WORKSPACE_PTR), \ - (WORKSPACE_BYTES), \ - (MAX_PTR), \ - (OUT_PTR), \ - (NUM_OBJECTS), \ - (STREAM)); -// -// -// -// TODO: rewrite using nested contexts instead of temporary names. -#define REDUCE(REDUCE_PTR, RESULT_PTR, NUM_ITEMS, REDUCE_OP, REDUCE_INIT) \ - cub::DeviceReduce::Reduce( \ - TMPN1, \ - TMPN1##_bytes, \ - (REDUCE_PTR), \ - (RESULT_PTR), \ - (NUM_ITEMS), \ - (REDUCE_OP), \ - (REDUCE_INIT)); \ - HANDLECUDA(cudaMalloc(&TMPN1, TMPN1##_bytes)); \ - cub::DeviceReduce::Reduce( \ - TMPN1, \ - TMPN1##_bytes, \ - (REDUCE_PTR), \ - (RESULT_PTR), \ - (NUM_ITEMS), \ - (REDUCE_OP), \ - (REDUCE_INIT)); \ - HANDLECUDA(cudaFree(TMPN1)); -#define REDUCE_WS( \ - REDUCE_PTR, \ - RESULT_PTR, \ - NUM_ITEMS, \ - REDUCE_OP, \ - REDUCE_INIT, \ - WORKSPACE_PTR, \ - WORSPACE_BYTES, \ - STREAM) \ - cub::DeviceReduce::Reduce( \ - (WORKSPACE_PTR), \ - (WORSPACE_BYTES), \ - (REDUCE_PTR), \ - (RESULT_PTR), \ - (NUM_ITEMS), \ - (REDUCE_OP), \ - (REDUCE_INIT), \ - (STREAM)); -#define SELECT_FLAGS_WS( \ - FLAGS_PTR, \ - ITEM_PTR, \ - OUT_PTR, \ - NUM_SELECTED_PTR, \ - NUM_ITEMS, \ - WORKSPACE_PTR, \ - WORSPACE_BYTES, \ - STREAM) \ - cub::DeviceSelect::Flagged( \ - (WORKSPACE_PTR), \ - (WORSPACE_BYTES), \ - (ITEM_PTR), \ - (FLAGS_PTR), \ - (OUT_PTR), \ - (NUM_SELECTED_PTR), \ - (NUM_ITEMS), \ - stream = (STREAM)); - -#define COPY_HOST_DEV(PTR_D, PTR_H, TYPE, SIZE) \ - HANDLECUDA(cudaMemcpy( \ - (PTR_D), (PTR_H), sizeof(TYPE) * (SIZE), cudaMemcpyHostToDevice)) -#define COPY_DEV_HOST(PTR_H, PTR_D, TYPE, SIZE) \ - HANDLECUDA(cudaMemcpy( \ - (PTR_H), (PTR_D), sizeof(TYPE) * (SIZE), cudaMemcpyDeviceToHost)) -#define COPY_DEV_DEV(PTR_T, PTR_S, TYPE, SIZE) \ - HANDLECUDA(cudaMemcpy( \ - (PTR_T), (PTR_S), sizeof(TYPE) * (SIZE), cudaMemcpyDeviceToDevice)) -// -// We *must* use cudaMallocManaged for pointers on device that should -// interact with pytorch. However, this comes at a significant speed penalty. -// We're using plain CUDA pointers for the rendering operations and -// explicitly copy results to managed pointers wrapped for pytorch (see -// pytorch/util.h). -#define MALLOC(VAR, TYPE, SIZE) cudaMalloc(&(VAR), sizeof(TYPE) * (SIZE)) -#define FREE(PTR) HANDLECUDA(cudaFree(PTR)) -#define MEMSET(VAR, VAL, TYPE, SIZE, STREAM) \ - HANDLECUDA(cudaMemsetAsync((VAR), (VAL), sizeof(TYPE) * (SIZE), (STREAM))) - -#define LAUNCH_MAX_PARALLEL_1D(FUNC, N, STREAM, ...) \ - { \ - int64_t max_threads = \ - at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock; \ - uint num_threads = min((N), max_threads); \ - uint num_blocks = iDivCeil((N), num_threads); \ - FUNC<<>>(__VA_ARGS__); \ - } -#define LAUNCH_PARALLEL_1D(FUNC, N, TN, STREAM, ...) \ - { \ - uint num_threads = min(static_cast(N), static_cast(TN)); \ - uint num_blocks = iDivCeil((N), num_threads); \ - FUNC<<>>(__VA_ARGS__); \ - } -#define LAUNCH_MAX_PARALLEL_2D(FUNC, NX, NY, STREAM, ...) \ - { \ - int64_t max_threads = \ - at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock; \ - int64_t max_threads_sqrt = static_cast(sqrt(max_threads)); \ - dim3 num_threads, num_blocks; \ - num_threads.x = min((NX), max_threads_sqrt); \ - num_blocks.x = iDivCeil((NX), num_threads.x); \ - num_threads.y = min((NY), max_threads_sqrt); \ - num_blocks.y = iDivCeil((NY), num_threads.y); \ - num_threads.z = 1; \ - num_blocks.z = 1; \ - FUNC<<>>(__VA_ARGS__); \ - } -#define LAUNCH_PARALLEL_2D(FUNC, NX, NY, TX, TY, STREAM, ...) \ - { \ - dim3 num_threads, num_blocks; \ - num_threads.x = min((NX), (TX)); \ - num_blocks.x = iDivCeil((NX), num_threads.x); \ - num_threads.y = min((NY), (TY)); \ - num_blocks.y = iDivCeil((NY), num_threads.y); \ - num_threads.z = 1; \ - num_blocks.z = 1; \ - FUNC<<>>(__VA_ARGS__); \ - } - -#define GET_PARALLEL_IDX_1D(VARNAME, N) \ - const uint VARNAME = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; \ - if (VARNAME >= (N)) { \ - return; \ - } -#define GET_PARALLEL_IDS_2D(VAR_X, VAR_Y, WIDTH, HEIGHT) \ - const uint VAR_X = __mul24(blockIdx.x, blockDim.x) + threadIdx.x; \ - const uint VAR_Y = __mul24(blockIdx.y, blockDim.y) + threadIdx.y; \ - if (VAR_X >= (WIDTH) || VAR_Y >= (HEIGHT)) \ - return; -#define END_PARALLEL() -#define END_PARALLEL_NORET() -#define END_PARALLEL_2D_NORET() -#define END_PARALLEL_2D() -#define RETURN_PARALLEL() return -#define CHECKLAUNCH() C10_CUDA_CHECK(cudaGetLastError()); -#define ISONDEVICE true -#define SYNCDEVICE() HANDLECUDA(cudaDeviceSynchronize()) -#define START_TIME(TN) \ - cudaEvent_t __time_start_##TN, __time_stop_##TN; \ - cudaEventCreate(&__time_start_##TN); \ - cudaEventCreate(&__time_stop_##TN); \ - cudaEventRecord(__time_start_##TN); -#define STOP_TIME(TN) cudaEventRecord(__time_stop_##TN); -#define GET_TIME(TN, TOPTR) \ - cudaEventSynchronize(__time_stop_##TN); \ - cudaEventElapsedTime((TOPTR), __time_start_##TN, __time_stop_##TN); -#define START_TIME_CU(TN) START_TIME(CN) -#define STOP_TIME_CU(TN) STOP_TIME(TN) -#define GET_TIME_CU(TN, TOPTR) GET_TIME(TN, TOPTR) - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward.gpu.cu deleted file mode 100644 index e0da7b7020c0a3f5ae0647030282adf0e0103d39..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.backward.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward_dbg.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward_dbg.gpu.cu deleted file mode 100644 index a95bb421d2d9b6bfec1a9286e035b042b0d9842c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.backward_dbg.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.backward_dbg.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_gradients.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_gradients.gpu.cu deleted file mode 100644 index ff38b08e0dfe46e65a94039c8dec7da721d0421a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_gradients.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.calc_gradients.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_signature.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_signature.gpu.cu deleted file mode 100644 index 81c72192eaa877038d9383cfdd0adf9a91e06f97..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.calc_signature.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.calc_signature.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.construct.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.construct.gpu.cu deleted file mode 100644 index 67583511aec2a6bd4dd8670aeb809939a3d2e19c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.construct.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.construct.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.create_selector.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.create_selector.gpu.cu deleted file mode 100644 index 52e265bcb2ab8ca9e4d08d90d1dc4fef75294520..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.create_selector.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.create_selector.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.destruct.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.destruct.gpu.cu deleted file mode 100644 index e61be93fa4c4893e6c4800f71cf49ef81c717ff0..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.destruct.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.destruct.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.fill_bg.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.fill_bg.gpu.cu deleted file mode 100644 index 6c7b1a48b675b1dbe69992c81a8cbb8c8861911e..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.fill_bg.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.fill_bg.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.forward.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.forward.gpu.cu deleted file mode 100644 index bfb42debeeaa7444daec94a88830c39825239170..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.forward.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.forward.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_cam_gradients.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_cam_gradients.gpu.cu deleted file mode 100644 index 93d666324a4973f44ad4becbeecaf34e0c7b96e5..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_cam_gradients.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.norm_cam_gradients.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_sphere_gradients.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_sphere_gradients.gpu.cu deleted file mode 100644 index 65339caea11645e4b7ba99a0af77c21b4ae2f738..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.norm_sphere_gradients.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.norm_sphere_gradients.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.render.gpu.cu b/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.render.gpu.cu deleted file mode 100644 index eb46adbafbc1c2a60dfb21fa9ce222828e53e31b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/cuda/renderer.render.gpu.cu +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.render.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/global.h b/pytorch3d/pytorch3d/csrc/pulsar/global.h deleted file mode 100644 index 3cea957e1f09d32494bc6e644e8929a22534270d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/global.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_GLOBAL_H -#define PULSAR_GLOBAL_H - -#include "./constants.h" -#ifndef WIN32 -#include -#endif - -#if defined(_WIN64) || defined(_WIN32) -#define uint unsigned int -#define ushort unsigned short -#endif - -#include "./logging.h" // <- include before torch/extension.h - -#define MAX_GRAD_SPHERES 128 - -#ifdef __CUDACC__ -#define INLINE __forceinline__ -#define HOST __host__ -#define DEVICE __device__ -#define GLOBAL __global__ -#define RESTRICT __restrict__ -#define DEBUGBREAK() -#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ -#pragma nv_diag_suppress 1866 -#pragma nv_diag_suppress 2941 -#pragma nv_diag_suppress 2951 -#pragma nv_diag_suppress 2967 -#else -#pragma diag_suppress = attribute_not_allowed -#pragma diag_suppress = 1866 -#pragma diag_suppress = 2941 -#pragma diag_suppress = 2951 -#pragma diag_suppress = 2967 -#endif -#else // __CUDACC__ -#define INLINE inline -#define HOST -#define DEVICE -#define GLOBAL -#define RESTRICT -#define DEBUGBREAK() std::raise(SIGINT) -// Don't care about pytorch warnings; they shouldn't clutter our warnings. -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Weverything" -#include -#pragma clang diagnostic pop -#ifdef WITH_CUDA -#include -#include -#else -#ifndef cudaStream_t -typedef void* cudaStream_t; -#endif -struct int2 { - int x, y; -}; -struct ushort2 { - unsigned short x, y; -}; -struct float2 { - float x, y; -}; -struct float3 { - float x, y, z; -}; -inline float3 make_float3(const float& x, const float& y, const float& z) { - float3 res; - res.x = x; - res.y = y; - res.z = z; - return res; -} -#endif -namespace py = pybind11; - -inline bool operator==(const float3& a, const float3& b) { - return a.x == b.x && a.y == b.y && a.z == b.z; -} -#endif // __CUDACC__ -#define IHD INLINE HOST DEVICE - -// An assertion command that can be used on host and device. -#ifdef PULSAR_ASSERTIONS -#ifdef __CUDACC__ -#define PASSERT(VAL) \ - if (!(VAL)) { \ - printf( \ - "Pulsar assertion failed in %s, line %d: %s.\n", \ - __FILE__, \ - __LINE__, \ - #VAL); \ - } -#else -#define PASSERT(VAL) \ - if (!(VAL)) { \ - printf( \ - "Pulsar assertion failed in %s, line %d: %s.\n", \ - __FILE__, \ - __LINE__, \ - #VAL); \ - std::raise(SIGINT); \ - } -#endif -#else -#define PASSERT(VAL) -#endif - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/README.md b/pytorch3d/pytorch3d/csrc/pulsar/host/README.md deleted file mode 100644 index 34f1bade9134da24f4038425c4b50fe1fffc45dc..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Device-specific host compilation units - -This folder contains `.cpp` files to create compilation units -for device specific functions. See `../include/README.md` for -more information. diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/commands.h b/pytorch3d/pytorch3d/csrc/pulsar/host/commands.h deleted file mode 100644 index 4378303bbc310b879bb25329cc35c29e40ef0367..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/commands.h +++ /dev/null @@ -1,391 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_COMMANDS_H_ -#define PULSAR_NATIVE_COMMANDS_H_ - -#ifdef _MSC_VER -#include -#define __builtin_popcount (int)__popcnt -#endif - -// Definitions for CPU commands. -// #include -// #include - -namespace cg { -struct coalesced_group { - INLINE uint thread_rank() const { - return 0u; - } - INLINE uint size() const { - return 1u; - } - INLINE uint ballot(uint val) const { - return static_cast(val > 0); - } -}; - -struct thread_block { - INLINE uint thread_rank() const { - return 0u; - } - INLINE uint size() const { - return 1u; - } - INLINE void sync() const {} -}; - -INLINE coalesced_group coalesced_threads() { - coalesced_group ret; - return ret; -} - -INLINE thread_block this_thread_block() { - thread_block ret; - return ret; -} -} // namespace cg -#define SHFL_SYNC(a, b, c) (b) -template -T WARP_CUMSUM( - const cg::coalesced_group& group, - const uint& mask, - const T& base) { - return base; -} - -template -DEVICE T -WARP_MAX(const cg::coalesced_group& group, const uint& mask, const T& base) { - return base; -} - -template -DEVICE T -WARP_SUM(const cg::coalesced_group& group, const uint& mask, const T& base) { - return base; -} - -INLINE DEVICE float3 WARP_SUM_FLOAT3( - const cg::coalesced_group& group, - const uint& mask, - const float3& base) { - return base; -} - -#define ACTIVEMASK() (1u << 31) -#define ALIGN(VAL) -#define SYNC() -#define THREADFENCE_B() -#define BALLOT(mask, val) (val != 0) -#define SHARED -// Floating point. -#define FMAX(a, b) std::fmax((a), (b)) -#define FMIN(a, b) std::fmin((a), (b)) -INLINE float atomicMax(float* address, float val) { - *address = std::max(*address, val); - return *address; -} -INLINE float atomicMin(float* address, float val) { - *address = std::min(*address, val); - return *address; -} -#define FMUL(a, b) ((a) * (b)) -#define FDIV(a, b) ((a) / (b)) -#define FSUB(a, b) ((a) - (b)) -#define FABSLEQAS(a, b, c) \ - ((a) <= (b) ? FSUB((b), (a)) <= (c) : FSUB((a), (b)) < (c)) -#define FADD(a, b) ((a) + (b)) -#define FSQRT(a) sqrtf(a) -#define FEXP(a) fasterexp(a) -#define FLN(a) fasterlog(a) -#define FPOW(a, b) powf((a), (b)) -#define FROUND(x) roundf(x) -#define FCEIL(a) ceilf(a) -#define FFLOOR(a) floorf(a) -#define FSATURATE(x) std::max(0.f, std::min(1.f, x)) -#define FABS(a) abs(a) -#define FMA(x, y, z) ((x) * (y) + (z)) -#define I2F(a) static_cast(a) -#define FRCP(x) (1.f / (x)) -#define IASF(x, loc) memcpy(&(loc), &(x), sizeof(x)) -#define FASI(x, loc) memcpy(&(loc), &(x), sizeof(x)) -#define DMAX(a, b) std::max((a), (b)) -#define DMIN(a, b) std::min((a), (b)) -#define DSATURATE(a) DMIN(1., DMAX(0., (a))) -#define DSQRT(a) sqrt(a) -// -// -// -// -// -// -// -// -// -// -// -// -// uint. -#define CLZ(VAL) _clz(VAL) -template -INLINE T ATOMICADD(T* address, T val) { - T old = *address; - *address += val; - return old; -} -template -INLINE void ATOMICADD_F3(T* address, T val) { - ATOMICADD(&(address->x), val.x); - ATOMICADD(&(address->y), val.y); - ATOMICADD(&(address->z), val.z); -} -#define ATOMICADD_B(a, b) ATOMICADD((a), (b)) -#define POPC(a) __builtin_popcount(a) - -// int. -#define IMIN(a, b) std::min((a), (b)) -#define IMAX(a, b) std::max((a), (b)) -#define IABS(a) abs(a) - -// Checks. -// like TORCH_CHECK_ARG in PyTorch > 1.10 -#define ARGCHECK(cond, argN, ...) \ - TORCH_CHECK(cond, "invalid argument ", argN, ": ", __VA_ARGS__) - -// Math. -#define NORM3DF(x, y, z) sqrtf(x* x + y * y + z * z) -#define RNORM3DF(x, y, z) (1.f / sqrtf(x * x + y * y + z * z)) - -// High level. -#define PREFETCH(PTR) -#define GET_SORT_WS_SIZE(RES_PTR, KEY_TYPE, VAL_TYPE, NUM_OBJECTS) \ - *(RES_PTR) = 0; -#define GET_REDUCE_WS_SIZE(RES_PTR, TYPE, REDUCE_OP, NUM_OBJECTS) \ - *(RES_PTR) = 0; -#define GET_SELECT_WS_SIZE( \ - RES_PTR, TYPE_SELECTOR, TYPE_SELECTION, NUM_OBJECTS) \ - *(RES_PTR) = 0; -#define GET_SUM_WS_SIZE(RES_PTR, TYPE_SUM, NUM_OBJECTS) *(RES_PTR) = 0; -#define GET_MM_WS_SIZE(RES_PTR, TYPE, NUM_OBJECTS) *(RES_PTR) = 0; - -#define SORT_DESCENDING( \ - TMPN1, SORT_PTR, SORTED_PTR, VAL_PTR, VAL_SORTED_PTR, NUM_OBJECTS) \ - std::vector TMPN1(NUM_OBJECTS); \ - std::iota(TMPN1.begin(), TMPN1.end(), 0); \ - const auto TMPN1##_val_ptr = (SORT_PTR); \ - std::sort( \ - TMPN1.begin(), TMPN1.end(), [&TMPN1##_val_ptr](size_t i1, size_t i2) { \ - return TMPN1##_val_ptr[i1] > TMPN1##_val_ptr[i2]; \ - }); \ - for (int i = 0; i < (NUM_OBJECTS); ++i) { \ - (SORTED_PTR)[i] = (SORT_PTR)[TMPN1[i]]; \ - } \ - for (int i = 0; i < (NUM_OBJECTS); ++i) { \ - (VAL_SORTED_PTR)[i] = (VAL_PTR)[TMPN1[i]]; \ - } - -#define SORT_ASCENDING( \ - SORT_PTR, SORTED_PTR, VAL_PTR, VAL_SORTED_PTR, NUM_OBJECTS, STREAM) \ - { \ - std::vector TMPN1(NUM_OBJECTS); \ - std::iota(TMPN1.begin(), TMPN1.end(), 0); \ - const auto TMPN1_val_ptr = (SORT_PTR); \ - std::sort( \ - TMPN1.begin(), \ - TMPN1.end(), \ - [&TMPN1_val_ptr](size_t i1, size_t i2) -> bool { \ - return TMPN1_val_ptr[i1] < TMPN1_val_ptr[i2]; \ - }); \ - for (int i = 0; i < (NUM_OBJECTS); ++i) { \ - (SORTED_PTR)[i] = (SORT_PTR)[TMPN1[i]]; \ - } \ - for (int i = 0; i < (NUM_OBJECTS); ++i) { \ - (VAL_SORTED_PTR)[i] = (VAL_PTR)[TMPN1[i]]; \ - } \ - } - -#define SORT_DESCENDING_WS( \ - TMPN1, \ - SORT_PTR, \ - SORTED_PTR, \ - VAL_PTR, \ - VAL_SORTED_PTR, \ - NUM_OBJECTS, \ - WORSPACE_PTR, \ - WORKSPACE_SIZE) \ - SORT_DESCENDING( \ - TMPN1, SORT_PTR, SORTED_PTR, VAL_PTR, VAL_SORTED_PTR, NUM_OBJECTS) - -#define SORT_ASCENDING_WS( \ - SORT_PTR, \ - SORTED_PTR, \ - VAL_PTR, \ - VAL_SORTED_PTR, \ - NUM_OBJECTS, \ - WORSPACE_PTR, \ - WORKSPACE_SIZE, \ - STREAM) \ - SORT_ASCENDING( \ - SORT_PTR, SORTED_PTR, VAL_PTR, VAL_SORTED_PTR, NUM_OBJECTS, STREAM) - -#define REDUCE(REDUCE_PTR, RESULT_PTR, NUM_ITEMS, REDUCE_OP, REDUCE_INIT) \ - { \ - *(RESULT_PTR) = (REDUCE_INIT); \ - for (int i = 0; i < (NUM_ITEMS); ++i) { \ - *(RESULT_PTR) = REDUCE_OP(*(RESULT_PTR), (REDUCE_PTR)[i]); \ - } \ - } -#define REDUCE_WS( \ - REDUCE_PTR, \ - RESULT_PTR, \ - NUM_ITEMS, \ - REDUCE_OP, \ - REDUCE_INIT, \ - WORKSPACE_PTR, \ - WORKSPACE_SIZE, \ - STREAM) \ - REDUCE(REDUCE_PTR, RESULT_PTR, NUM_ITEMS, REDUCE_OP, REDUCE_INIT) - -#define SELECT_FLAGS_WS( \ - FLAGS_PTR, \ - ITEM_PTR, \ - OUT_PTR, \ - NUM_SELECTED_PTR, \ - NUM_ITEMS, \ - WORKSPACE_PTR, \ - WORSPACE_BYTES, \ - STREAM) \ - { \ - *NUM_SELECTED_PTR = 0; \ - ptrdiff_t write_pos = 0; \ - for (int i = 0; i < NUM_ITEMS; ++i) { \ - if (FLAGS_PTR[i]) { \ - OUT_PTR[write_pos++] = ITEM_PTR[i]; \ - *NUM_SELECTED_PTR += 1; \ - } \ - } \ - } - -template -void SUM_WS( - T* SUM_PTR, - T* OUT_PTR, - size_t NUM_OBJECTS, - char* WORKSPACE_PTR, - size_t WORKSPACE_BYTES, - cudaStream_t STREAM) { - *(OUT_PTR) = T(); - for (int i = 0; i < (NUM_OBJECTS); ++i) { - *(OUT_PTR) = *(OUT_PTR) + (SUM_PTR)[i]; - } -} - -template -void MIN_WS( - T* MIN_PTR, - T* OUT_PTR, - size_t NUM_OBJECTS, - char* WORKSPACE_PTR, - size_t WORKSPACE_BYTES, - cudaStream_t STREAM) { - *(OUT_PTR) = T(); - for (int i = 0; i < (NUM_OBJECTS); ++i) { - *(OUT_PTR) = std::min(*(OUT_PTR), (MIN_PTR)[i]); - } -} - -template -void MAX_WS( - T* MAX_PTR, - T* OUT_PTR, - size_t NUM_OBJECTS, - char* WORKSPACE_PTR, - size_t WORKSPACE_BYTES, - cudaStream_t STREAM) { - *(OUT_PTR) = T(); - for (int i = 0; i < (NUM_OBJECTS); ++i) { - *(OUT_PTR) = std::max(*(OUT_PTR), (MAX_PTR)[i]); - } -} -// -// -// -// -#define COPY_HOST_DEV(PTR_D, PTR_H, TYPE, SIZE) \ - std::memcpy((PTR_D), (PTR_H), sizeof(TYPE) * (SIZE)) -// -#define COPY_DEV_HOST(PTR_H, PTR_D, TYPE, SIZE) \ - std::memcpy((PTR_H), (PTR_D), sizeof(TYPE) * (SIZE)) -// -#define COPY_DEV_DEV(PTR_T, PTR_S, TYPE, SIZE) \ - std::memcpy((PTR_T), (PTR_S), sizeof(TYPE) * SIZE) -// - -#define MALLOC(VAR, TYPE, SIZE) MALLOC_HOST(VAR, TYPE, SIZE) -#define FREE(PTR) FREE_HOST(PTR) -#define MEMSET(VAR, VAL, TYPE, SIZE, STREAM) \ - memset((VAR), (VAL), sizeof(TYPE) * (SIZE)) -// - -#define LAUNCH_MAX_PARALLEL_1D(FUNC, N, STREAM, ...) FUNC(__VA_ARGS__); -#define LAUNCH_PARALLEL_1D(FUNC, N, TN, STREAM, ...) FUNC(__VA_ARGS__); -#define LAUNCH_MAX_PARALLEL_2D(FUNC, NX, NY, STREAM, ...) FUNC(__VA_ARGS__); -#define LAUNCH_PARALLEL_2D(FUNC, NX, NY, TX, TY, STREAM, ...) FUNC(__VA_ARGS__); -// -// -// -// -// -#define GET_PARALLEL_IDX_1D(VARNAME, N) \ - for (uint VARNAME = 0; VARNAME < (N); ++VARNAME) { -#define GET_PARALLEL_IDS_2D(VAR_X, VAR_Y, WIDTH, HEIGHT) \ - int2 blockDim; \ - blockDim.x = 1; \ - blockDim.y = 1; \ - uint __parallel_2d_width = WIDTH; \ - uint __parallel_2d_height = HEIGHT; \ - for (uint VAR_Y = 0; VAR_Y < __parallel_2d_height; ++(VAR_Y)) { \ - for (uint VAR_X = 0; VAR_X < __parallel_2d_width; ++(VAR_X)) { -// -// -// -#define END_PARALLEL() \ - end_parallel:; \ - } -#define END_PARALLEL_NORET() } -#define END_PARALLEL_2D() \ - end_parallel:; \ - } \ - } -#define END_PARALLEL_2D_NORET() \ - } \ - } -#define RETURN_PARALLEL() goto end_parallel; -#define CHECKLAUNCH() -#define ISONDEVICE false -#define SYNCDEVICE() -#define START_TIME(TN) \ - auto __time_start_##TN = std::chrono::steady_clock::now(); -#define STOP_TIME(TN) auto __time_stop_##TN = std::chrono::steady_clock::now(); -#define GET_TIME(TN, TOPTR) \ - *TOPTR = std::chrono::duration_cast( \ - __time_stop_##TN - __time_start_##TN) \ - .count() -#define START_TIME_CU(TN) \ - cudaEvent_t __time_start_##TN, __time_stop_##TN; \ - cudaEventCreate(&__time_start_##TN); \ - cudaEventCreate(&__time_stop_##TN); \ - cudaEventRecord(__time_start_##TN); -#define STOP_TIME_CU(TN) cudaEventRecord(__time_stop_##TN); -#define GET_TIME_CU(TN, TOPTR) \ - cudaEventSynchronize(__time_stop_##TN); \ - cudaEventElapsedTime((TOPTR), __time_start_##TN, __time_stop_##TN); - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward.cpu.cpp deleted file mode 100644 index e0da7b7020c0a3f5ae0647030282adf0e0103d39..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.backward.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward_dbg.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward_dbg.cpu.cpp deleted file mode 100644 index a95bb421d2d9b6bfec1a9286e035b042b0d9842c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.backward_dbg.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.backward_dbg.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_gradients.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_gradients.cpu.cpp deleted file mode 100644 index ff38b08e0dfe46e65a94039c8dec7da721d0421a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_gradients.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.calc_gradients.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_signature.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_signature.cpu.cpp deleted file mode 100644 index 81c72192eaa877038d9383cfdd0adf9a91e06f97..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.calc_signature.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.calc_signature.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.construct.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.construct.cpu.cpp deleted file mode 100644 index 67583511aec2a6bd4dd8670aeb809939a3d2e19c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.construct.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.construct.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.create_selector.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.create_selector.cpu.cpp deleted file mode 100644 index 52e265bcb2ab8ca9e4d08d90d1dc4fef75294520..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.create_selector.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.create_selector.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.destruct.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.destruct.cpu.cpp deleted file mode 100644 index e61be93fa4c4893e6c4800f71cf49ef81c717ff0..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.destruct.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.destruct.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.fill_bg.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.fill_bg.cpu.cpp deleted file mode 100644 index 6c7b1a48b675b1dbe69992c81a8cbb8c8861911e..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.fill_bg.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.fill_bg.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.forward.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.forward.cpu.cpp deleted file mode 100644 index bfb42debeeaa7444daec94a88830c39825239170..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.forward.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.forward.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_cam_gradients.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_cam_gradients.cpu.cpp deleted file mode 100644 index 93d666324a4973f44ad4becbeecaf34e0c7b96e5..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_cam_gradients.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.norm_cam_gradients.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_sphere_gradients.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_sphere_gradients.cpu.cpp deleted file mode 100644 index 65339caea11645e4b7ba99a0af77c21b4ae2f738..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.norm_sphere_gradients.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.norm_sphere_gradients.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.render.cpu.cpp b/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.render.cpu.cpp deleted file mode 100644 index eb46adbafbc1c2a60dfb21fa9ce222828e53e31b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/host/renderer.render.cpu.cpp +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "../include/renderer.render.instantiate.h" diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/README.md b/pytorch3d/pytorch3d/csrc/pulsar/include/README.md deleted file mode 100644 index e963ff043abdbbf88af350512f60fb70a02a4774..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# The `include` folder - -This folder contains header files with implementations of several useful -algorithms. These implementations are usually done in files called `x.device.h` -and use macros that route every device specific command to the right -implementation (see `commands.h`). - -If you're using a device specific implementation, include `x.device.h`. -This gives you the high-speed, device specific implementation that lets -you work with all the details of the datastructure. All function calls are -inlined. If you need to work with the high-level interface and be able to -dynamically pick a device, only include `x.h`. The functions there are -templated with a boolean `DEV` flag and are instantiated in device specific -compilation units. You will not be able to use any other functions, but can -use `func(params)` to work on a CUDA device, or `func(params)` -to work on the host. diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/camera.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/camera.device.h deleted file mode 100644 index f003db31ba09e177f0119083fc00cb27fb019c0d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/camera.device.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_CAMERA_DEVICE_H_ -#define PULSAR_NATIVE_INCLUDE_CAMERA_DEVICE_H_ - -#include "../global.h" -#include "./camera.h" -#include "./commands.h" - -namespace pulsar { -IHD CamGradInfo::CamGradInfo() { - cam_pos = make_float3(0.f, 0.f, 0.f); - pixel_0_0_center = make_float3(0.f, 0.f, 0.f); - pixel_dir_x = make_float3(0.f, 0.f, 0.f); - pixel_dir_y = make_float3(0.f, 0.f, 0.f); -} -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/camera.h b/pytorch3d/pytorch3d/csrc/pulsar/include/camera.h deleted file mode 100644 index cbb583a14e7ee4349d11fea9faf8a6f09cb5e66c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/camera.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_CAMERA_H_ -#define PULSAR_NATIVE_INCLUDE_CAMERA_H_ - -#include -#include "../global.h" - -namespace pulsar { -/** - * Everything that's needed to raycast with our camera model. - */ -struct CamInfo { - float3 eye; /** Position in world coordinates. */ - float3 pixel_0_0_center; /** LUC center of pixel position in world - coordinates. */ - float3 pixel_dir_x; /** Direction for increasing x for one pixel to the next, - * in world coordinates. */ - float3 pixel_dir_y; /** Direction for increasing y for one pixel to the next, - * in world coordinates. */ - float3 sensor_dir_z; /** Normalized direction vector from eye through the - * sensor in z direction (optical axis). */ - float half_pixel_size; /** Half size of a pixel, in world coordinates. This - * must be consistent with pixel_dir_x and pixel_dir_y! - */ - float focal_length; /** The focal length, if applicable. */ - uint aperture_width; /** Full image width in px, possibly not fully used - * in case of a shifted principal point. */ - uint aperture_height; /** Full image height in px, possibly not fully used - * in case of a shifted principal point. */ - uint film_width; /** Resulting image width. */ - uint film_height; /** Resulting image height. */ - /** The top left coordinates (inclusive) of the film in the full aperture. */ - uint film_border_left, film_border_top; - int32_t principal_point_offset_x; /** Horizontal principal point offset. */ - int32_t principal_point_offset_y; /** Vertical principal point offset. */ - float min_dist; /** Minimum distance for a ball to be rendered. */ - float max_dist; /** Maximum distance for a ball to be rendered. */ - float norm_fac; /** 1 / (max_dist - min_dist), pre-computed. */ - /** The depth where to place the background, in normalized coordinates where - * 0. is the backmost depth and 1. the frontmost. */ - float background_normalization_depth; - /** The number of image content channels to use. Usually three. */ - uint n_channels; - /** Whether to use an orthogonal instead of a perspective projection. */ - bool orthogonal_projection; - /** Whether to use a right-handed system (inverts the z axis). */ - bool right_handed; -}; - -inline bool operator==(const CamInfo& a, const CamInfo& b) { - return a.film_width == b.film_width && a.film_height == b.film_height && - a.background_normalization_depth == b.background_normalization_depth && - a.n_channels == b.n_channels && - a.orthogonal_projection == b.orthogonal_projection && - a.right_handed == b.right_handed; -}; - -struct CamGradInfo { - HOST DEVICE CamGradInfo(); - float3 cam_pos; - float3 pixel_0_0_center; - float3 pixel_dir_x; - float3 pixel_dir_y; -}; - -// TODO: remove once https://github.com/NVlabs/cub/issues/172 is resolved. -struct IntWrapper { - int val; -}; - -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/closest_sphere_tracker.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/closest_sphere_tracker.device.h deleted file mode 100644 index a533dd0048e7f624af7c14a4017b19fde3accff5..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/closest_sphere_tracker.device.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_CLOSEST_SPHERE_TRACKER_DEVICE_H_ -#define PULSAR_NATIVE_INCLUDE_CLOSEST_SPHERE_TRACKER_DEVICE_H_ - -#include "../global.h" - -namespace pulsar { -namespace Renderer { - -/** - * A facility to track the closest spheres to the camera. - * - * Their max number is defined by MAX_GRAD_SPHERES (this is defined in - * `pulsar/native/global.h`). This is done to keep the performance as high as - * possible because this struct needs to do updates continuously on the GPU. - */ -struct ClosestSphereTracker { - public: - IHD ClosestSphereTracker(const int& n_track) : n_hits(0), n_track(n_track) { - PASSERT(n_track < MAX_GRAD_SPHERES); - // Initialize the sphere IDs to -1 and the weights to 0. - for (int i = 0; i < n_track; ++i) { - this->most_important_sphere_ids[i] = -1; - this->closest_sphere_intersection_depths[i] = MAX_FLOAT; - } - }; - - IHD void track( - const uint& sphere_idx, - const float& intersection_depth, - const uint& coord_x, - const uint& coord_y) { - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_TRACKER_PIX, - "tracker|tracking sphere %u (depth: %f).\n", - sphere_idx, - intersection_depth); - for (int i = IMIN(this->n_hits, n_track) - 1; i >= -1; --i) { - if (i < 0 || - this->closest_sphere_intersection_depths[i] < intersection_depth) { - // Write position is i+1. - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_TRACKER_PIX, - "tracker|determined writing position: %d.\n", - i + 1); - if (i + 1 < n_track) { - // Shift every other sphere back. - for (int j = n_track - 1; j > i + 1; --j) { - this->closest_sphere_intersection_depths[j] = - this->closest_sphere_intersection_depths[j - 1]; - this->most_important_sphere_ids[j] = - this->most_important_sphere_ids[j - 1]; - } - this->closest_sphere_intersection_depths[i + 1] = intersection_depth; - this->most_important_sphere_ids[i + 1] = sphere_idx; - } - break; - } - } -#if PULSAR_LOG_TRACKER_PIX - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_TRACKER_PIX, - "tracker|sphere list after adding sphere %u:\n", - sphere_idx); - for (int i = 0; i < n_track; ++i) { - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_TRACKER_PIX, - "tracker|sphere %d: %d (depth: %f).\n", - i, - this->most_important_sphere_ids[i], - this->closest_sphere_intersection_depths[i]); - } -#endif // PULSAR_LOG_TRACKER_PIX - this->n_hits += 1; - } - - /** - * Get the number of hits registered. - */ - IHD int get_n_hits() const { - return this->n_hits; - } - - /** - * Get the idx closest sphere ID. - * - * For example, get_closest_sphere_id(0) gives the overall closest - * sphere id. - * - * This method is implemented for highly optimized scenarios and will *not* - * perform an index check at runtime if assertions are disabled. idx must be - * >=0 and < IMIN(n_hits, n_track) for a valid result, if it is >= - * n_hits it will return -1. - */ - IHD int get_closest_sphere_id(const int& idx) { - PASSERT(idx >= 0 && idx < n_track); - return this->most_important_sphere_ids[idx]; - } - - /** - * Get the idx closest sphere normalized_depth. - * - * For example, get_closest_sphere_depth(0) gives the overall closest - * sphere depth (normalized). - * - * This method is implemented for highly optimized scenarios and will *not* - * perform an index check at runtime if assertions are disabled. idx must be - * >=0 and < IMIN(n_hits, n_track) for a valid result, if it is >= - * n_hits it will return 1. + FEPS. - */ - IHD float get_closest_sphere_depth(const int& idx) { - PASSERT(idx >= 0 && idx < n_track); - return this->closest_sphere_intersection_depths[idx]; - } - - private: - /** The number of registered hits so far. */ - int n_hits; - /** The number of intersections to track. Must be (malloc(sizeof(TYPE) * (SIZE))) -#define FREE_HOST(PTR) free(PTR) - -/* Include command definitions depending on CPU or GPU use. */ - -#ifdef __CUDACC__ -// TODO: find out which compiler we're using here and use the suppression. -// #pragma push -// #pragma diag_suppress = 68 -#include -// #pragma pop -#include "../cuda/commands.h" -#else -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Weverything" -#pragma clang diagnostic pop -#include "../host/commands.h" -#endif - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/fastermath.h b/pytorch3d/pytorch3d/csrc/pulsar/include/fastermath.h deleted file mode 100644 index cae598f9c0a7f903b502702dcb62173c8841a3b8..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/fastermath.h +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef PULSAR_NATIVE_INCLUDE_FASTERMATH_H_ -#define PULSAR_NATIVE_INCLUDE_FASTERMATH_H_ - -// @lint-ignore-every LICENSELINT -/*=====================================================================* - * Copyright (C) 2011 Paul Mineiro * - * All rights reserved. * - * * - * Redistribution and use in source and binary forms, with * - * or without modification, are permitted provided that the * - * following conditions are met: * - * * - * * Redistributions of source code must retain the * - * above copyright notice, this list of conditions and * - * the following disclaimer. * - * * - * * Redistributions in binary form must reproduce the * - * above copyright notice, this list of conditions and * - * the following disclaimer in the documentation and/or * - * other materials provided with the distribution. * - * * - * * Neither the name of Paul Mineiro nor the names * - * of other contributors may be used to endorse or promote * - * products derived from this software without specific * - * prior written permission. * - * * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * - * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * - * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * - * POSSIBILITY OF SUCH DAMAGE. * - * * - * Contact: Paul Mineiro * - *=====================================================================*/ - -#include -#include "./commands.h" - -#ifdef __cplusplus -#define cast_uint32_t static_cast -#else -#define cast_uint32_t (uint32_t) -#endif - -IHD float fasterlog2(float x) { - union { - float f; - uint32_t i; - } vx = {x}; - float y = vx.i; - y *= 1.1920928955078125e-7f; - return y - 126.94269504f; -} - -IHD float fasterlog(float x) { - // return 0.69314718f * fasterlog2 (x); - union { - float f; - uint32_t i; - } vx = {x}; - float y = vx.i; - y *= 8.2629582881927490e-8f; - return y - 87.989971088f; -} - -IHD float fasterpow2(float p) { - float clipp = (p < -126) ? -126.0f : p; - union { - uint32_t i; - float f; - } v = {cast_uint32_t((1 << 23) * (clipp + 126.94269504f))}; - return v.f; -} - -IHD float fasterexp(float p) { - return fasterpow2(1.442695040f * p); -} - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/math.h b/pytorch3d/pytorch3d/csrc/pulsar/include/math.h deleted file mode 100644 index d77e2ee1aabb8607c706a7faaee052eb6531b557..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/math.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_IMPL_MATH_H_ -#define PULSAR_NATIVE_IMPL_MATH_H_ - -#include "./camera.h" -#include "./commands.h" -#include "./fastermath.h" - -/** - * Get the direction of val. - * - * Returns +1 if val is positive, -1 if val is zero or negative. - */ -IHD int sign_dir(const int& val) { - return -(static_cast((val <= 0)) << 1) + 1; -}; - -/** - * Get the direction of val. - * - * Returns +1 if val is positive, -1 if val is zero or negative. - */ -IHD float sign_dir(const float& val) { - return static_cast(1 - (static_cast((val <= 0)) << 1)); -}; - -/** - * Integer ceil division. - */ -IHD uint iDivCeil(uint a, uint b) { - return (a % b != 0) ? (a / b + 1) : (a / b); -} - -IHD float3 outer_product_sum(const float3& a) { - return make_float3( - a.x * a.x + a.x * a.y + a.x * a.z, - a.x * a.y + a.y * a.y + a.y * a.z, - a.x * a.z + a.y * a.z + a.z * a.z); -} - -// TODO: put intrinsics here. -IHD float3 operator+(const float3& a, const float3& b) { - return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); -} - -IHD void operator+=(float3& a, const float3& b) { - a.x += b.x; - a.y += b.y; - a.z += b.z; -} - -IHD void operator-=(float3& a, const float3& b) { - a.x -= b.x; - a.y -= b.y; - a.z -= b.z; -} - -IHD void operator/=(float3& a, const float& b) { - a.x /= b; - a.y /= b; - a.z /= b; -} - -IHD void operator*=(float3& a, const float& b) { - a.x *= b; - a.y *= b; - a.z *= b; -} - -IHD float3 operator/(const float3& a, const float& b) { - return make_float3(a.x / b, a.y / b, a.z / b); -} - -IHD float3 operator-(const float3& a, const float3& b) { - return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); -} - -IHD float3 operator*(const float3& a, const float& b) { - return make_float3(a.x * b, a.y * b, a.z * b); -} - -IHD float3 operator*(const float3& a, const float3& b) { - return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); -} - -IHD float3 operator*(const float& a, const float3& b) { - return b * a; -} - -INLINE DEVICE float length(const float3& v) { - // TODO: benchmark what's faster. - return NORM3DF(v.x, v.y, v.z); - // return __fsqrt_rn(v.x * v.x + v.y * v.y + v.z * v.z); -} - -/** - * Left-hand multiplication of the constructed rotation matrix with the vector. - */ -IHD float3 rotate( - const float3& v, - const float3& dir_x, - const float3& dir_y, - const float3& dir_z) { - return make_float3( - dir_x.x * v.x + dir_x.y * v.y + dir_x.z * v.z, - dir_y.x * v.x + dir_y.y * v.y + dir_y.z * v.z, - dir_z.x * v.x + dir_z.y * v.y + dir_z.z * v.z); -} - -INLINE DEVICE float3 normalize(const float3& v) { - return v * RNORM3DF(v.x, v.y, v.z); -} - -INLINE DEVICE float dot(const float3& a, const float3& b) { - return FADD(FADD(FMUL(a.x, b.x), FMUL(a.y, b.y)), FMUL(a.z, b.z)); -} - -INLINE DEVICE float3 cross(const float3& a, const float3& b) { - // TODO: faster - return make_float3( - a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); -} - -namespace pulsar { -IHD CamGradInfo operator+(const CamGradInfo& a, const CamGradInfo& b) { - CamGradInfo res; - res.cam_pos = a.cam_pos + b.cam_pos; - res.pixel_0_0_center = a.pixel_0_0_center + b.pixel_0_0_center; - res.pixel_dir_x = a.pixel_dir_x + b.pixel_dir_x; - res.pixel_dir_y = a.pixel_dir_y + b.pixel_dir_y; - return res; -} - -IHD CamGradInfo operator*(const CamGradInfo& a, const float& b) { - CamGradInfo res; - res.cam_pos = a.cam_pos * b; - res.pixel_0_0_center = a.pixel_0_0_center * b; - res.pixel_dir_x = a.pixel_dir_x * b; - res.pixel_dir_y = a.pixel_dir_y * b; - return res; -} - -IHD IntWrapper operator+(const IntWrapper& a, const IntWrapper& b) { - IntWrapper res; - res.val = a.val + b.val; - return res; -} -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.device.h deleted file mode 100644 index dcd9dd50fd1e83229073fc8e86b815d4da4c99f9..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.device.h +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_RENDERER_BACKWARD_DEVICE_H_ -#define PULSAR_NATIVE_RENDERER_BACKWARD_DEVICE_H_ - -#include "./camera.device.h" -#include "./math.h" -#include "./renderer.h" - -namespace pulsar { -namespace Renderer { - -template -void backward( - Renderer* self, - const float* grad_im, - const float* image, - const float* forw_info, - const float* vert_pos, - const float* vert_col, - const float* vert_rad, - const CamInfo& cam, - const float& gamma, - float percent_allowed_difference, - const uint& max_n_hits, - const float* vert_opy_d, - const size_t& num_balls, - const uint& mode, - const bool& dif_pos, - const bool& dif_col, - const bool& dif_rad, - const bool& dif_cam, - const bool& dif_opy, - cudaStream_t stream) { - ARGCHECK(gamma > 0.f && gamma <= 1.f, 6, "gamma must be in [0., 1.]"); - ARGCHECK( - percent_allowed_difference >= 0.f && percent_allowed_difference <= 1.f, - 7, - "percent_allowed_difference must be in [0., 1.]"); - ARGCHECK(max_n_hits >= 1u, 8, "max_n_hits must be >= 1"); - ARGCHECK( - num_balls > 0 && num_balls <= self->max_num_balls, - 9, - "num_balls must be >0 and less than max num balls!"); - ARGCHECK( - cam.film_width == self->cam.film_width && - cam.film_height == self->cam.film_height, - 5, - "cam film size must agree"); - ARGCHECK(mode <= 1, 10, "mode must be <= 1!"); - if (percent_allowed_difference < EPS) { - LOG(WARNING) << "percent_allowed_difference < " << FEPS << "! Clamping to " - << FEPS << "."; - percent_allowed_difference = FEPS; - } - if (percent_allowed_difference > 1.f - FEPS) { - LOG(WARNING) << "percent_allowed_difference > " << (1.f - FEPS) - << "! Clamping to " << (1.f - FEPS) << "."; - percent_allowed_difference = 1.f - FEPS; - } - LOG_IF(INFO, PULSAR_LOG_RENDER) << "Rendering backward pass..."; - // Update camera. - self->cam.eye = cam.eye; - self->cam.pixel_0_0_center = cam.pixel_0_0_center - cam.eye; - self->cam.pixel_dir_x = cam.pixel_dir_x; - self->cam.pixel_dir_y = cam.pixel_dir_y; - self->cam.sensor_dir_z = cam.sensor_dir_z; - self->cam.half_pixel_size = cam.half_pixel_size; - self->cam.focal_length = cam.focal_length; - self->cam.aperture_width = cam.aperture_width; - self->cam.aperture_height = cam.aperture_height; - self->cam.min_dist = cam.min_dist; - self->cam.max_dist = cam.max_dist; - self->cam.norm_fac = cam.norm_fac; - self->cam.principal_point_offset_x = cam.principal_point_offset_x; - self->cam.principal_point_offset_y = cam.principal_point_offset_y; - self->cam.film_border_left = cam.film_border_left; - self->cam.film_border_top = cam.film_border_top; -#ifdef PULSAR_TIMINGS_ENABLED - START_TIME(calc_signature); -#endif - LAUNCH_MAX_PARALLEL_1D( - calc_signature, - num_balls, - stream, - *self, - reinterpret_cast(vert_pos), - vert_col, - vert_rad, - num_balls); - CHECKLAUNCH(); -#ifdef PULSAR_TIMINGS_ENABLED - STOP_TIME(calc_signature); - START_TIME(calc_gradients); -#endif - MEMSET(self->grad_pos_d, 0, float3, num_balls, stream); - MEMSET(self->grad_col_d, 0, float, num_balls * self->cam.n_channels, stream); - MEMSET(self->grad_rad_d, 0, float, num_balls, stream); - MEMSET(self->grad_cam_d, 0, float, 12, stream); - MEMSET(self->grad_cam_buf_d, 0, CamGradInfo, num_balls, stream); - MEMSET(self->grad_opy_d, 0, float, num_balls, stream); - MEMSET(self->ids_sorted_d, 0, int, num_balls, stream); - LAUNCH_PARALLEL_2D( - calc_gradients, - self->cam.film_width, - self->cam.film_height, - GRAD_BLOCK_SIZE, - GRAD_BLOCK_SIZE, - stream, - self->cam, - grad_im, - gamma, - reinterpret_cast(vert_pos), - vert_col, - vert_rad, - vert_opy_d, - num_balls, - image, - forw_info, - self->di_d, - self->ii_d, - dif_pos, - dif_col, - dif_rad, - dif_cam, - dif_opy, - self->grad_rad_d, - self->grad_col_d, - self->grad_pos_d, - self->grad_cam_buf_d, - self->grad_opy_d, - self->ids_sorted_d, - self->n_track); - CHECKLAUNCH(); -#ifdef PULSAR_TIMINGS_ENABLED - STOP_TIME(calc_gradients); - START_TIME(normalize); -#endif - LAUNCH_MAX_PARALLEL_1D( - norm_sphere_gradients, num_balls, stream, *self, num_balls); - CHECKLAUNCH(); - if (dif_cam) { - SUM_WS( - self->grad_cam_buf_d, - reinterpret_cast(self->grad_cam_d), - static_cast(num_balls), - self->workspace_d, - self->workspace_size, - stream); - CHECKLAUNCH(); - SUM_WS( - (IntWrapper*)(self->ids_sorted_d), - (IntWrapper*)(self->n_grad_contributions_d), - static_cast(num_balls), - self->workspace_d, - self->workspace_size, - stream); - CHECKLAUNCH(); - LAUNCH_MAX_PARALLEL_1D( - norm_cam_gradients, static_cast(1), stream, *self); - CHECKLAUNCH(); - } -#ifdef PULSAR_TIMINGS_ENABLED - STOP_TIME(normalize); - float time_ms; - // This blocks the result and prevents batch-processing from parallelizing. - GET_TIME(calc_signature, &time_ms); - std::cout << "Time for signature calculation: " << time_ms << " ms" - << std::endl; - GET_TIME(calc_gradients, &time_ms); - std::cout << "Time for gradient calculation: " << time_ms << " ms" - << std::endl; - GET_TIME(normalize, &time_ms); - std::cout << "Time for aggregation and normalization: " << time_ms << " ms" - << std::endl; -#endif - LOG_IF(INFO, PULSAR_LOG_RENDER) << "Backward pass complete."; -} - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.instantiate.h deleted file mode 100644 index 75e85fd4dc88befc63e84ed4891f8ecb4b659bc4..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward.instantiate.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "./renderer.backward.device.h" - -namespace pulsar { -namespace Renderer { - -template void backward( - Renderer* self, - const float* grad_im, - const float* image, - const float* forw_info, - const float* vert_pos, - const float* vert_col, - const float* vert_rad, - const CamInfo& cam, - const float& gamma, - float percent_allowed_difference, - const uint& max_n_hits, - const float* vert_opy, - const size_t& num_balls, - const uint& mode, - const bool& dif_pos, - const bool& dif_col, - const bool& dif_rad, - const bool& dif_cam, - const bool& dif_opy, - cudaStream_t stream); - -} // namespace Renderer -} // namespace pulsar diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.device.h deleted file mode 100644 index f2bdc7e69027d29a8442a14b08d677cc22dc51c9..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.device.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_RENDERER_BACKWARD_DBG_DEVICE_H_ -#define PULSAR_NATIVE_RENDERER_BACKWARD_DBG_DEVICE_H_ - -#include "./camera.device.h" -#include "./math.h" -#include "./renderer.h" - -namespace pulsar { -namespace Renderer { - -template -void backward_dbg( - Renderer* self, - const float* grad_im, - const float* image, - const float* forw_info, - const float* vert_pos, - const float* vert_col, - const float* vert_rad, - const CamInfo& cam, - const float& gamma, - float percent_allowed_difference, - const uint& max_n_hits, - const float* vert_opy_d, - const size_t& num_balls, - const uint& mode, - const bool& dif_pos, - const bool& dif_col, - const bool& dif_rad, - const bool& dif_cam, - const bool& dif_opy, - const uint& pos_x, - const uint& pos_y, - cudaStream_t stream) { - ARGCHECK(gamma > 0.f && gamma <= 1.f, 6, "gamma must be in [0., 1.]"); - ARGCHECK( - percent_allowed_difference >= 0.f && percent_allowed_difference <= 1.f, - 7, - "percent_allowed_difference must be in [0., 1.]"); - ARGCHECK(max_n_hits >= 1u, 8, "max_n_hits must be >= 1"); - ARGCHECK( - num_balls > 0 && num_balls <= self->max_num_balls, - 9, - "num_balls must be >0 and less than max num balls!"); - ARGCHECK( - cam.film_width == self->cam.film_width && - cam.film_height == self->cam.film_height, - 5, - "cam film size must agree"); - ARGCHECK(mode <= 1, 10, "mode must be <= 1!"); - if (percent_allowed_difference < EPS) { - LOG(WARNING) << "percent_allowed_difference < " << FEPS << "! Clamping to " - << FEPS << "."; - percent_allowed_difference = FEPS; - } - ARGCHECK( - pos_x < cam.film_width && pos_y < cam.film_height, - 15, - "pos_x must be < width and pos_y < height."); - if (percent_allowed_difference > 1.f - FEPS) { - LOG(WARNING) << "percent_allowed_difference > " << (1.f - FEPS) - << "! Clamping to " << (1.f - FEPS) << "."; - percent_allowed_difference = 1.f - FEPS; - } - LOG_IF(INFO, PULSAR_LOG_RENDER) - << "Rendering debug backward pass for x: " << pos_x << ", y: " << pos_y; - // Update camera. - self->cam.eye = cam.eye; - self->cam.pixel_0_0_center = cam.pixel_0_0_center - cam.eye; - self->cam.pixel_dir_x = cam.pixel_dir_x; - self->cam.pixel_dir_y = cam.pixel_dir_y; - self->cam.sensor_dir_z = cam.sensor_dir_z; - self->cam.half_pixel_size = cam.half_pixel_size; - self->cam.focal_length = cam.focal_length; - self->cam.aperture_width = cam.aperture_width; - self->cam.aperture_height = cam.aperture_height; - self->cam.min_dist = cam.min_dist; - self->cam.max_dist = cam.max_dist; - self->cam.norm_fac = cam.norm_fac; - self->cam.principal_point_offset_x = cam.principal_point_offset_x; - self->cam.principal_point_offset_y = cam.principal_point_offset_y; - self->cam.film_border_left = cam.film_border_left; - self->cam.film_border_top = cam.film_border_top; - LAUNCH_MAX_PARALLEL_1D( - calc_signature, - num_balls, - stream, - *self, - reinterpret_cast(vert_pos), - vert_col, - vert_rad, - num_balls); - CHECKLAUNCH(); - MEMSET(self->grad_pos_d, 0, float3, num_balls, stream); - MEMSET(self->grad_col_d, 0, float, num_balls * self->cam.n_channels, stream); - MEMSET(self->grad_rad_d, 0, float, num_balls, stream); - MEMSET(self->grad_cam_d, 0, float, 12, stream); - MEMSET(self->grad_cam_buf_d, 0, CamGradInfo, num_balls, stream); - MEMSET(self->grad_opy_d, 0, float, num_balls, stream); - MEMSET(self->ids_sorted_d, 0, int, num_balls, stream); - LAUNCH_MAX_PARALLEL_2D( - calc_gradients, - (int64_t)1, - (int64_t)1, - stream, - self->cam, - grad_im, - gamma, - reinterpret_cast(vert_pos), - vert_col, - vert_rad, - vert_opy_d, - num_balls, - image, - forw_info, - self->di_d, - self->ii_d, - dif_pos, - dif_col, - dif_rad, - dif_cam, - dif_opy, - self->grad_rad_d, - self->grad_col_d, - self->grad_pos_d, - self->grad_cam_buf_d, - self->grad_opy_d, - self->ids_sorted_d, - self->n_track, - pos_x, - pos_y); - CHECKLAUNCH(); - // We're not doing sphere gradient normalization here. - SUM_WS( - self->grad_cam_buf_d, - reinterpret_cast(self->grad_cam_d), - static_cast(1), - self->workspace_d, - self->workspace_size, - stream); - CHECKLAUNCH(); - // We're not doing camera gradient normalization here. - LOG_IF(INFO, PULSAR_LOG_RENDER) << "Debug backward pass complete."; -} - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.instantiate.h deleted file mode 100644 index 5a7a1ba1f8e56df0a5ff212e7eb769a0564e7f60..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.backward_dbg.instantiate.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "./renderer.backward_dbg.device.h" - -namespace pulsar { -namespace Renderer { - -template void backward_dbg( - Renderer* self, - const float* grad_im, - const float* image, - const float* forw_info, - const float* vert_pos, - const float* vert_col, - const float* vert_rad, - const CamInfo& cam, - const float& gamma, - float percent_allowed_difference, - const uint& max_n_hits, - const float* vert_opy, - const size_t& num_balls, - const uint& mode, - const bool& dif_pos, - const bool& dif_col, - const bool& dif_rad, - const bool& dif_cam, - const bool& dif_opy, - const uint& pos_x, - const uint& pos_y, - cudaStream_t stream); - -} // namespace Renderer -} // namespace pulsar diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.device.h deleted file mode 100644 index 90b3872e9606c8830b039f18c4d465c3f8c23c1f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.device.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CALC_GRADIENTS_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_CALC_GRADIENTS_H_ - -#include "../global.h" -#include "./commands.h" -#include "./renderer.h" - -#include "./renderer.draw.device.h" - -namespace pulsar { -namespace Renderer { - -template -GLOBAL void calc_gradients( - const CamInfo cam, /** Camera in world coordinates. */ - float const* const RESTRICT grad_im, /** The gradient image. */ - const float - gamma, /** The transparency parameter used in the forward pass. */ - float3 const* const RESTRICT vert_poss, /** Vertex position vector. */ - float const* const RESTRICT vert_cols, /** Vertex color vector. */ - float const* const RESTRICT vert_rads, /** Vertex radius vector. */ - float const* const RESTRICT opacity, /** Vertex opacity. */ - const uint num_balls, /** Number of balls. */ - float const* const RESTRICT result_d, /** Result image. */ - float const* const RESTRICT forw_info_d, /** Forward pass info. */ - DrawInfo const* const RESTRICT di_d, /** Draw information. */ - IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */ - // Mode switches. - const bool calc_grad_pos, - const bool calc_grad_col, - const bool calc_grad_rad, - const bool calc_grad_cam, - const bool calc_grad_opy, - // Out variables. - float* const RESTRICT grad_rad_d, /** Radius gradients. */ - float* const RESTRICT grad_col_d, /** Color gradients. */ - float3* const RESTRICT grad_pos_d, /** Position gradients. */ - CamGradInfo* const RESTRICT grad_cam_buf_d, /** Camera gradient buffer. */ - float* const RESTRICT grad_opy_d, /** Opacity gradient buffer. */ - int* const RESTRICT - grad_contributed_d, /** Gradient contribution counter. */ - // Infrastructure. - const int n_track, - const uint offs_x, - const uint offs_y /** Debug offsets. */ -) { - uint limit_x = cam.film_width, limit_y = cam.film_height; - if (offs_x != 0) { - // We're in debug mode. - limit_x = 1; - limit_y = 1; - } - GET_PARALLEL_IDS_2D(coord_x_base, coord_y_base, limit_x, limit_y); - // coord_x_base and coord_y_base are in the film coordinate system. - // We now need to translate to the aperture coordinate system. If - // the principal point was shifted left/up nothing has to be - // subtracted - only shift needs to be added in case it has been - // shifted down/right. - const uint film_coord_x = coord_x_base + offs_x; - const uint ap_coord_x = film_coord_x + - 2 * static_cast(std::max(0, cam.principal_point_offset_x)); - const uint film_coord_y = coord_y_base + offs_y; - const uint ap_coord_y = film_coord_y + - 2 * static_cast(std::max(0, cam.principal_point_offset_y)); - const float3 ray_dir = /** Ray cast through the pixel, normalized. */ - cam.pixel_0_0_center + ap_coord_x * cam.pixel_dir_x + - ap_coord_y * cam.pixel_dir_y; - const float norm_ray_dir = length(ray_dir); - // ray_dir_norm *must* be calculated here in the same way as in the draw - // function to have the same values withno other numerical instabilities - // (for example, ray_dir * FRCP(norm_ray_dir) does not work)! - float3 ray_dir_norm; /** Ray cast through the pixel, normalized. */ - float2 projected_ray; /** Ray intersection with the sensor. */ - if (cam.orthogonal_projection) { - ray_dir_norm = cam.sensor_dir_z; - projected_ray.x = static_cast(ap_coord_x); - projected_ray.y = static_cast(ap_coord_y); - } else { - ray_dir_norm = normalize( - cam.pixel_0_0_center + ap_coord_x * cam.pixel_dir_x + - ap_coord_y * cam.pixel_dir_y); - // This is a reasonable assumption for normal focal lengths and image sizes. - PASSERT(FABS(ray_dir_norm.z) > FEPS); - projected_ray.x = ray_dir_norm.x / ray_dir_norm.z * cam.focal_length; - projected_ray.y = ray_dir_norm.y / ray_dir_norm.z * cam.focal_length; - } - float* result = const_cast( - result_d + film_coord_y * cam.film_width * cam.n_channels + - film_coord_x * cam.n_channels); - const float* grad_im_l = grad_im + - film_coord_y * cam.film_width * cam.n_channels + - film_coord_x * cam.n_channels; - // For writing... - float3 grad_pos; - float grad_rad, grad_opy; - CamGradInfo grad_cam_local = CamGradInfo(); - // Set up shared infrastructure. - const int fwi_loc = film_coord_y * cam.film_width * (3 + 2 * n_track) + - film_coord_x * (3 + 2 * n_track); - float sm_m = forw_info_d[fwi_loc]; - float sm_d = forw_info_d[fwi_loc + 1]; - PULSAR_LOG_DEV_APIX( - PULSAR_LOG_GRAD, - "grad|sm_m: %f, sm_d: %f, result: " - "%f, %f, %f; grad_im: %f, %f, %f.\n", - sm_m, - sm_d, - result[0], - result[1], - result[2], - grad_im_l[0], - grad_im_l[1], - grad_im_l[2]); - // Start processing. - for (int grad_idx = 0; grad_idx < n_track; ++grad_idx) { - int sphere_idx; - FASI(forw_info_d[fwi_loc + 3 + 2 * grad_idx], sphere_idx); - PASSERT( - sphere_idx == -1 || - sphere_idx >= 0 && static_cast(sphere_idx) < num_balls); - if (sphere_idx >= 0) { - // TODO: make more efficient. - grad_pos = make_float3(0.f, 0.f, 0.f); - grad_rad = 0.f; - grad_cam_local = CamGradInfo(); - const DrawInfo di = di_d[sphere_idx]; - grad_opy = 0.f; - draw( - di, - opacity == NULL ? 1.f : opacity[sphere_idx], - cam, - gamma, - ray_dir_norm, - projected_ray, - // Mode switches. - false, // draw only - calc_grad_pos, - calc_grad_col, - calc_grad_rad, - calc_grad_cam, - calc_grad_opy, - // Position info. - ap_coord_x, - ap_coord_y, - sphere_idx, - // Optional in. - &ii_d[sphere_idx], - &ray_dir, - &norm_ray_dir, - grad_im_l, - NULL, - // In/out - &sm_d, - &sm_m, - result, - // Optional out. - NULL, - NULL, - &grad_pos, - grad_col_d + sphere_idx * cam.n_channels, - &grad_rad, - &grad_cam_local, - &grad_opy); - ATOMICADD(&(grad_rad_d[sphere_idx]), grad_rad); - // Color has been added directly. - ATOMICADD_F3(&(grad_pos_d[sphere_idx]), grad_pos); - ATOMICADD_F3( - &(grad_cam_buf_d[sphere_idx].cam_pos), grad_cam_local.cam_pos); - if (!cam.orthogonal_projection) { - ATOMICADD_F3( - &(grad_cam_buf_d[sphere_idx].pixel_0_0_center), - grad_cam_local.pixel_0_0_center); - } - ATOMICADD_F3( - &(grad_cam_buf_d[sphere_idx].pixel_dir_x), - grad_cam_local.pixel_dir_x); - ATOMICADD_F3( - &(grad_cam_buf_d[sphere_idx].pixel_dir_y), - grad_cam_local.pixel_dir_y); - ATOMICADD(&(grad_opy_d[sphere_idx]), grad_opy); - ATOMICADD(&(grad_contributed_d[sphere_idx]), 1); - } - } - END_PARALLEL_2D_NORET(); -}; - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.instantiate.h deleted file mode 100644 index 596c322b28eef850d5466037770cef53caf51cff..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_gradients.instantiate.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "./renderer.calc_gradients.device.h" - -namespace pulsar { -namespace Renderer { - -template GLOBAL void calc_gradients( - const CamInfo cam, /** Camera in world coordinates. */ - float const* const RESTRICT grad_im, /** The gradient image. */ - const float - gamma, /** The transparency parameter used in the forward pass. */ - float3 const* const RESTRICT vert_poss, /** Vertex position vector. */ - float const* const RESTRICT vert_cols, /** Vertex color vector. */ - float const* const RESTRICT vert_rads, /** Vertex radius vector. */ - float const* const RESTRICT opacity, /** Vertex opacity. */ - const uint num_balls, /** Number of balls. */ - float const* const RESTRICT result_d, /** Result image. */ - float const* const RESTRICT forw_info_d, /** Forward pass info. */ - DrawInfo const* const RESTRICT di_d, /** Draw information. */ - IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */ - // Mode switches. - const bool calc_grad_pos, - const bool calc_grad_col, - const bool calc_grad_rad, - const bool calc_grad_cam, - const bool calc_grad_opy, - // Out variables. - float* const RESTRICT grad_rad_d, /** Radius gradients. */ - float* const RESTRICT grad_col_d, /** Color gradients. */ - float3* const RESTRICT grad_pos_d, /** Position gradients. */ - CamGradInfo* const RESTRICT grad_cam_buf_d, /** Camera gradient buffer. */ - float* const RESTRICT grad_opy_d, /** Opacity gradient buffer. */ - int* const RESTRICT - grad_contributed_d, /** Gradient contribution counter. */ - // Infrastructure. - const int n_track, - const uint offs_x, - const uint offs_y); - -} // namespace Renderer -} // namespace pulsar diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.device.h deleted file mode 100644 index bd687fee63d1ee9869ab5beb454a910ff387914c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.device.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_DEVICE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_DEVICE_H_ - -#include "../global.h" -#include "./camera.device.h" -#include "./commands.h" -#include "./math.h" -#include "./renderer.get_screen_area.device.h" -#include "./renderer.h" - -namespace pulsar { -namespace Renderer { - -template -GLOBAL void calc_signature( - Renderer renderer, - float3 const* const RESTRICT vert_poss, - float const* const RESTRICT vert_cols, - float const* const RESTRICT vert_rads, - const uint num_balls) { - /* We're not using RESTRICT here for the pointers within `renderer`. Just one - value is being read from each of the pointers, so the effect would be - negligible or non-existent. */ - GET_PARALLEL_IDX_1D(idx, num_balls); - // Create aliases. - // For reading... - const float3& vert_pos = vert_poss[idx]; /** Vertex position. */ - const float* vert_col = - vert_cols + idx * renderer.cam.n_channels; /** Vertex color. */ - const float& vert_rad = vert_rads[idx]; /** Vertex radius. */ - const CamInfo& cam = renderer.cam; /** Camera in world coordinates. */ - // For writing... - /** Ball ID (either original index of the ball or -1 if not visible). */ - int& id_out = renderer.ids_d[idx]; - /** Intersection helper structure for the ball. */ - IntersectInfo& intersect_helper_out = renderer.ii_d[idx]; - /** Draw helper structure for this ball. */ - DrawInfo& draw_helper_out = renderer.di_d[idx]; - /** Minimum possible intersection depth for this ball. */ - float& closest_possible_intersect_out = renderer.min_depth_d[idx]; - PULSAR_LOG_DEV( - PULSAR_LOG_CALC_SIGNATURE, - "signature %d|vert_pos: %.9f, %.9f, %.9f, vert_col (first three): " - "%.9f, %.9f, %.9f.\n", - idx, - vert_pos.x, - vert_pos.y, - vert_pos.z, - vert_col[0], - vert_col[1], - vert_col[2]); - // Set flags to invalid for a potential early return. - id_out = -1; // Invalid ID. - closest_possible_intersect_out = - MAX_FLOAT; // These spheres are sorted to the very end. - intersect_helper_out.max.x = MAX_USHORT; // No intersection possible. - intersect_helper_out.min.x = MAX_USHORT; - intersect_helper_out.max.y = MAX_USHORT; - intersect_helper_out.min.y = MAX_USHORT; - // Start processing. - /** Ball center in the camera coordinate system. */ - const float3 ball_center_cam = vert_pos - cam.eye; - /** Distance to the ball center in the camera coordinate system. */ - const float t_center = length(ball_center_cam); - /** Closest possible intersection with this ball from the camera. */ - float closest_possible_intersect; - if (cam.orthogonal_projection) { - const float3 ball_center_cam_rot = rotate( - ball_center_cam, - cam.pixel_dir_x / length(cam.pixel_dir_x), - cam.pixel_dir_y / length(cam.pixel_dir_y), - cam.sensor_dir_z); - closest_possible_intersect = ball_center_cam_rot.z - vert_rad; - } else { - closest_possible_intersect = t_center - vert_rad; - } - PULSAR_LOG_DEV( - PULSAR_LOG_CALC_SIGNATURE, - "signature %d|t_center: %f. vert_rad: %f. " - "closest_possible_intersect: %f.\n", - idx, - t_center, - vert_rad, - closest_possible_intersect); - /** - * Corner points of the enclosing projected rectangle of the ball. - * They are first calculated in the camera coordinate system, then - * converted to the pixel coordinate system. - */ - float x_1, x_2, y_1, y_2; - bool hits_screen_plane; - float3 ray_center_norm = ball_center_cam / t_center; - PASSERT(vert_rad >= 0.f); - if (closest_possible_intersect < cam.min_dist || - closest_possible_intersect > cam.max_dist) { - PULSAR_LOG_DEV( - PULSAR_LOG_CALC_SIGNATURE, - "signature %d|ignoring sphere out of min/max bounds: %.9f, " - "min: %.9f, max: %.9f.\n", - idx, - closest_possible_intersect, - cam.min_dist, - cam.max_dist); - RETURN_PARALLEL(); - } - // Find the relevant region on the screen plane. - hits_screen_plane = get_screen_area( - ball_center_cam, - ray_center_norm, - vert_rad, - cam, - idx, - &x_1, - &x_2, - &y_1, - &y_2); - if (!hits_screen_plane) - RETURN_PARALLEL(); - PULSAR_LOG_DEV( - PULSAR_LOG_CALC_SIGNATURE, - "signature %d|in pixels: x_1: %f, x_2: %f, y_1: %f, y_2: %f.\n", - idx, - x_1, - x_2, - y_1, - y_2); - // Check whether the pixel coordinates are on screen. - if (FMAX(x_1, x_2) <= static_cast(cam.film_border_left) || - FMIN(x_1, x_2) >= - static_cast(cam.film_border_left + cam.film_width) - 0.5f || - FMAX(y_1, y_2) <= static_cast(cam.film_border_top) || - FMIN(y_1, y_2) > - static_cast(cam.film_border_top + cam.film_height) - 0.5f) - RETURN_PARALLEL(); - // Write results. - id_out = idx; - intersect_helper_out.min.x = static_cast( - FMAX(FMIN(x_1, x_2), static_cast(cam.film_border_left))); - intersect_helper_out.min.y = static_cast( - FMAX(FMIN(y_1, y_2), static_cast(cam.film_border_top))); - // In the following calculations, the max that needs to be stored is - // exclusive. - // That means that the calculated value needs to be `ceil`ed and incremented - // to find the correct value. - intersect_helper_out.max.x = static_cast(FMIN( - FCEIL(FMAX(x_1, x_2)) + 1, - static_cast(cam.film_border_left + cam.film_width))); - intersect_helper_out.max.y = static_cast(FMIN( - FCEIL(FMAX(y_1, y_2)) + 1, - static_cast(cam.film_border_top + cam.film_height))); - PULSAR_LOG_DEV( - PULSAR_LOG_CALC_SIGNATURE, - "signature %d|limits after refining: x_1: %u, x_2: %u, " - "y_1: %u, y_2: %u.\n", - idx, - intersect_helper_out.min.x, - intersect_helper_out.max.x, - intersect_helper_out.min.y, - intersect_helper_out.max.y); - if (intersect_helper_out.min.x == MAX_USHORT) { - id_out = -1; - RETURN_PARALLEL(); - } - PULSAR_LOG_DEV( - PULSAR_LOG_CALC_SIGNATURE, - "signature %d|writing info. closest_possible_intersect: %.9f. " - "ray_center_norm: %.9f, %.9f, %.9f. t_center: %.9f. radius: %.9f.\n", - idx, - closest_possible_intersect, - ray_center_norm.x, - ray_center_norm.y, - ray_center_norm.z, - t_center, - vert_rad); - closest_possible_intersect_out = closest_possible_intersect; - draw_helper_out.ray_center_norm = ray_center_norm; - draw_helper_out.t_center = t_center; - draw_helper_out.radius = vert_rad; - if (cam.n_channels <= 3) { - draw_helper_out.first_color = vert_col[0]; - for (uint c_id = 1; c_id < cam.n_channels; ++c_id) { - draw_helper_out.color_union.color[c_id - 1] = vert_col[c_id]; - } - } else { - draw_helper_out.color_union.ptr = const_cast(vert_col); - } - END_PARALLEL(); -}; - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.instantiate.h deleted file mode 100644 index 6afa95b44b161d8881b79b22e119c89aad522cc6..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.calc_signature.instantiate.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_INSTANTIATE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_INSTANTIATE_H_ - -#include "./renderer.calc_signature.device.h" - -namespace pulsar { -namespace Renderer { -template GLOBAL void calc_signature( - Renderer renderer, - float3 const* const RESTRICT vert_poss, - float const* const RESTRICT vert_cols, - float const* const RESTRICT vert_rads, - const uint num_balls); -} -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.device.h deleted file mode 100644 index 8e9ada4b2d62f9ea5d0ed003bd13d016d80b6e9d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.device.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CONSTRUCT_DEVICE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_CONSTRUCT_DEVICE_H_ - -#include "../global.h" -#include "./camera.device.h" -#include "./commands.h" -#include "./math.h" -#include "./renderer.h" - -namespace pulsar { -namespace Renderer { - -template -HOST void construct( - Renderer* self, - const size_t& max_num_balls, - const int& width, - const int& height, - const bool& orthogonal_projection, - const bool& right_handed_system, - const float& background_normalization_depth, - const uint& n_channels, - const uint& n_track) { - ARGCHECK( - (max_num_balls > 0 && max_num_balls < MAX_INT), - 2, - ("the maximum number of balls must be >0 and <" + - std::to_string(MAX_INT) + ". Is " + std::to_string(max_num_balls) + ".") - .c_str()); - ARGCHECK(width > 1, 3, "the image width must be > 1"); - ARGCHECK(height > 1, 4, "the image height must be > 1"); - ARGCHECK( - background_normalization_depth > 0.f && - background_normalization_depth < 1.f, - 6, - "background_normalization_depth must be in ]0., 1.[."); - ARGCHECK(n_channels > 0, 7, "n_channels must be >0!"); - ARGCHECK( - n_track > 0 && n_track <= MAX_GRAD_SPHERES, - 8, - ("n_track must be >0 and <" + std::to_string(MAX_GRAD_SPHERES) + ". Is " + - std::to_string(n_track) + ".") - .c_str()); - self->cam.film_width = width; - self->cam.film_height = height; - self->max_num_balls = max_num_balls; - MALLOC(self->result_d, float, width* height* n_channels); - self->cam.orthogonal_projection = orthogonal_projection; - self->cam.right_handed = right_handed_system; - self->cam.background_normalization_depth = background_normalization_depth; - self->cam.n_channels = n_channels; - MALLOC(self->min_depth_d, float, max_num_balls); - MALLOC(self->min_depth_sorted_d, float, max_num_balls); - MALLOC(self->ii_d, IntersectInfo, max_num_balls); - MALLOC(self->ii_sorted_d, IntersectInfo, max_num_balls); - MALLOC(self->ids_d, int, max_num_balls); - MALLOC(self->ids_sorted_d, int, max_num_balls); - size_t sort_id_size = 0; - GET_SORT_WS_SIZE(&sort_id_size, float, int, max_num_balls); - CHECKLAUNCH(); - size_t sort_ii_size = 0; - GET_SORT_WS_SIZE(&sort_ii_size, float, IntersectInfo, max_num_balls); - CHECKLAUNCH(); - size_t sort_di_size = 0; - GET_SORT_WS_SIZE(&sort_di_size, float, DrawInfo, max_num_balls); - CHECKLAUNCH(); - size_t select_ii_size = 0; - GET_SELECT_WS_SIZE(&select_ii_size, char, IntersectInfo, max_num_balls); - size_t select_di_size = 0; - GET_SELECT_WS_SIZE(&select_di_size, char, DrawInfo, max_num_balls); - size_t sum_size = 0; - GET_SUM_WS_SIZE(&sum_size, CamGradInfo, max_num_balls); - size_t sum_cont_size = 0; - GET_SUM_WS_SIZE(&sum_cont_size, int, max_num_balls); - size_t reduce_size = 0; - GET_REDUCE_WS_SIZE( - &reduce_size, IntersectInfo, IntersectInfoMinMax(), max_num_balls); - self->workspace_size = IMAX( - IMAX(IMAX(sort_id_size, sort_ii_size), sort_di_size), - IMAX( - IMAX(select_di_size, select_ii_size), - IMAX(IMAX(sum_size, sum_cont_size), reduce_size))); - MALLOC(self->workspace_d, char, self->workspace_size); - MALLOC(self->di_d, DrawInfo, max_num_balls); - MALLOC(self->di_sorted_d, DrawInfo, max_num_balls); - MALLOC(self->region_flags_d, char, max_num_balls); - MALLOC(self->num_selected_d, size_t, 1); - MALLOC(self->forw_info_d, float, width* height*(3 + 2 * n_track)); - MALLOC(self->min_max_pixels_d, IntersectInfo, 1); - MALLOC(self->grad_pos_d, float3, max_num_balls); - MALLOC(self->grad_col_d, float, max_num_balls* n_channels); - MALLOC(self->grad_rad_d, float, max_num_balls); - MALLOC(self->grad_cam_d, float, 12); - MALLOC(self->grad_cam_buf_d, CamGradInfo, max_num_balls); - MALLOC(self->grad_opy_d, float, max_num_balls); - MALLOC(self->n_grad_contributions_d, int, 1); - self->n_track = static_cast(n_track); -} - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.instantiate.h deleted file mode 100644 index e5ce722e29b063b04cb8efc0e880d9332bd35f23..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.construct.instantiate.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CONSTRUCT_INSTANTIATE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_CONSTRUCT_INSTANTIATE_H_ - -#include "./renderer.construct.device.h" - -namespace pulsar { -namespace Renderer { -template void construct( - Renderer* self, - const size_t& max_num_balls, - const int& width, - const int& height, - const bool& orthogonal_projection, - const bool& right_handed_system, - const float& background_normalization_depth, - const uint& n_channels, - const uint& n_track); -} -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.device.h deleted file mode 100644 index 747ad03cd3a3a49c34d81485a1780d81a332a215..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.device.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CREATE_SELECTOR_DEVICE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_CREATE_SELECTOR_DEVICE_H_ - -#include "../global.h" -#include "./commands.h" -#include "./renderer.h" - -namespace pulsar { -namespace Renderer { - -template -GLOBAL void create_selector( - IntersectInfo const* const RESTRICT ii_sorted_d, - const uint num_balls, - const int min_x, - const int max_x, - const int min_y, - const int max_y, - /* Out variables. */ - char* RESTRICT region_flags_d) { - GET_PARALLEL_IDX_1D(idx, num_balls); - bool hit = (static_cast(ii_sorted_d[idx].min.x) <= max_x) && - (static_cast(ii_sorted_d[idx].max.x) > min_x) && - (static_cast(ii_sorted_d[idx].min.y) <= max_y) && - (static_cast(ii_sorted_d[idx].max.y) > min_y); - region_flags_d[idx] = hit; - END_PARALLEL_NORET(); -} - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.instantiate.h deleted file mode 100644 index 8e91a8bfb8e9b0f03db39c001e9363920b2eb35f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.create_selector.instantiate.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CREATE_SELECTOR_INSTANTIATE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_CREATE_SELECTOR_INSTANTIATE_H_ - -#include "./renderer.create_selector.device.h" - -namespace pulsar { -namespace Renderer { - -template GLOBAL void create_selector( - IntersectInfo const* const RESTRICT ii_sorted_d, - const uint num_balls, - const int min_x, - const int max_x, - const int min_y, - const int max_y, - /* Out variables. */ - char* RESTRICT region_flags_d); - -} -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h deleted file mode 100644 index 8520233c59be062fa72376158a9935afa50c3950..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_DESTRUCT_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_DESTRUCT_H_ - -#include "../global.h" -#include "./commands.h" -#include "./renderer.h" - -namespace pulsar { -namespace Renderer { - -template -HOST void destruct(Renderer* self) { - if (self->result_d != NULL) - FREE(self->result_d); - self->result_d = NULL; - if (self->min_depth_d != NULL) - FREE(self->min_depth_d); - self->min_depth_d = NULL; - if (self->min_depth_sorted_d != NULL) - FREE(self->min_depth_sorted_d); - self->min_depth_sorted_d = NULL; - if (self->ii_d != NULL) - FREE(self->ii_d); - self->ii_d = NULL; - if (self->ii_sorted_d != NULL) - FREE(self->ii_sorted_d); - self->ii_sorted_d = NULL; - if (self->ids_d != NULL) - FREE(self->ids_d); - self->ids_d = NULL; - if (self->ids_sorted_d != NULL) - FREE(self->ids_sorted_d); - self->ids_sorted_d = NULL; - if (self->workspace_d != NULL) - FREE(self->workspace_d); - self->workspace_d = NULL; - if (self->di_d != NULL) - FREE(self->di_d); - self->di_d = NULL; - if (self->di_sorted_d != NULL) - FREE(self->di_sorted_d); - self->di_sorted_d = NULL; - if (self->region_flags_d != NULL) - FREE(self->region_flags_d); - self->region_flags_d = NULL; - if (self->num_selected_d != NULL) - FREE(self->num_selected_d); - self->num_selected_d = NULL; - if (self->forw_info_d != NULL) - FREE(self->forw_info_d); - self->forw_info_d = NULL; - if (self->min_max_pixels_d != NULL) - FREE(self->min_max_pixels_d); - self->min_max_pixels_d = NULL; - if (self->grad_pos_d != NULL) - FREE(self->grad_pos_d); - self->grad_pos_d = NULL; - if (self->grad_col_d != NULL) - FREE(self->grad_col_d); - self->grad_col_d = NULL; - if (self->grad_rad_d != NULL) - FREE(self->grad_rad_d); - self->grad_rad_d = NULL; - if (self->grad_cam_d != NULL) - FREE(self->grad_cam_d); - self->grad_cam_d = NULL; - if (self->grad_cam_buf_d != NULL) - FREE(self->grad_cam_buf_d); - self->grad_cam_buf_d = NULL; - if (self->grad_opy_d != NULL) - FREE(self->grad_opy_d); - self->grad_opy_d = NULL; - if (self->n_grad_contributions_d != NULL) - FREE(self->n_grad_contributions_d); - self->n_grad_contributions_d = NULL; -} - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.instantiate.h deleted file mode 100644 index d41ba5a323d0bed9196dc804fab87929b2a726af..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.destruct.instantiate.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_DESTRUCT_INSTANTIATE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_DESTRUCT_INSTANTIATE_H_ - -#include "./renderer.destruct.device.h" - -namespace pulsar { -namespace Renderer { -template void destruct(Renderer* self); -} -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.draw.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.draw.device.h deleted file mode 100644 index cb8ecabed3eefce77f7120d234fad15b0bed064c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.draw.device.h +++ /dev/null @@ -1,846 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_DEVICE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_CALC_SIGNATURE_DEVICE_H_ - -#include "../global.h" -#include "./camera.device.h" -#include "./commands.h" -#include "./math.h" -#include "./renderer.h" - -namespace pulsar { -namespace Renderer { - -/** - * Draw a ball into the `result`. - * - * Returns whether a hit was noticed. See README for an explanation of sphere - * points and variable notation. - */ -INLINE DEVICE bool draw( - /* In variables. */ - const DrawInfo& draw_info, /** The draw information for this ball. */ - const float& opacity, /** The sphere opacity. */ - const CamInfo& - cam, /** Camera information. Doesn't have to be normalized. */ - const float& gamma, /** 'Transparency' indicator (see paper for details). */ - const float3& ray_dir_norm, /** The direction of the ray, normalized. */ - const float2& projected_ray, /** The intersection of the ray with the image - in pixel space. */ - /** Mode switches. */ - const bool& draw_only, /** Whether we are in draw vs. grad mode. */ - const bool& calc_grad_pos, /** Calculate position gradients. */ - const bool& calc_grad_col, /** Calculate color gradients. */ - const bool& calc_grad_rad, /** Calculate radius gradients. */ - const bool& calc_grad_cam, /** Calculate camera gradients. */ - const bool& calc_grad_opy, /** Calculate opacity gradients. */ - /** Position info. */ - const uint& coord_x, /** The pixel position x to draw at. */ - const uint& coord_y, /** The pixel position y to draw at. */ - const uint& idx, /** The id of the sphere to process. */ - /* Optional in variables. */ - IntersectInfo const* const RESTRICT - intersect_info, /** The intersect information for this ball. */ - float3 const* const RESTRICT ray_dir, /** The ray direction (not normalized) - to draw at. Only used for grad computation. */ - float const* const RESTRICT norm_ray_dir, /** The length of the direction - vector. Only used for grad computation. */ - float const* const RESTRICT grad_pix, /** The gradient for this pixel. Only - used for grad computation. */ - float const* const RESTRICT - ln_pad_over_1minuspad, /** Allowed percentage indicator. */ - /* In or out variables, depending on mode. */ - float* const RESTRICT sm_d, /** Normalization denominator. */ - float* const RESTRICT - sm_m, /** Maximum of normalization weight factors observed. */ - float* const RESTRICT - result, /** Result pixel color. Must be zeros initially. */ - /* Optional out variables. */ - float* const RESTRICT depth_threshold, /** The depth threshold to use. Only - used for rendering. */ - float* const RESTRICT intersection_depth_norm_out, /** The intersection - depth. Only set when rendering. */ - float3* const RESTRICT grad_pos, /** Gradient w.r.t. position. */ - float* const RESTRICT grad_col, /** Gradient w.r.t. color. */ - float* const RESTRICT grad_rad, /** Gradient w.r.t. radius. */ - CamGradInfo* const RESTRICT grad_cam, /** Gradient w.r.t. camera. */ - float* const RESTRICT grad_opy /** Gradient w.r.t. opacity. */ -) { - // TODO: variable reuse? - PASSERT( - isfinite(draw_info.ray_center_norm.x) && - isfinite(draw_info.ray_center_norm.y) && - isfinite(draw_info.ray_center_norm.z)); - PASSERT(isfinite(draw_info.t_center) && draw_info.t_center >= 0.f); - PASSERT( - isfinite(draw_info.radius) && draw_info.radius >= 0.f && - draw_info.radius <= draw_info.t_center); - PASSERT(isfinite(ray_dir_norm.x)); - PASSERT(isfinite(ray_dir_norm.y)); - PASSERT(isfinite(ray_dir_norm.z)); - PASSERT(isfinite(*sm_d)); - PASSERT( - cam.orthogonal_projection && cam.focal_length == 0.f || - cam.focal_length > 0.f); - PASSERT(gamma <= 1.f && gamma >= 1e-5f); - /** The ball center in the camera coordinate system. */ - float3 center = draw_info.ray_center_norm * draw_info.t_center; - /** The vector from the reference point to the ball center. */ - float3 raydiff; - if (cam.orthogonal_projection) { - center = rotate( - center, - cam.pixel_dir_x / length(cam.pixel_dir_x), - cam.pixel_dir_y / length(cam.pixel_dir_y), - cam.sensor_dir_z); - raydiff = - make_float3( // TODO: make offset consistent with `get_screen_area`. - center.x - - (projected_ray.x - - static_cast(cam.aperture_width) * .5f) * - (2.f * cam.half_pixel_size), - center.y - - (projected_ray.y - - static_cast(cam.aperture_height) * .5f) * - (2.f * cam.half_pixel_size), - 0.f); - } else { - /** The reference point on the ray; the point in the same distance - * from the camera as the ball center, but along the ray. - */ - const float3 rayref = ray_dir_norm * draw_info.t_center; - raydiff = center - rayref; - } - /** The closeness of the reference point to ball center in world coords. - * - * In [0., radius]. - */ - const float closeness_world = length(raydiff); - /** The reciprocal radius. */ - const float radius_rcp = FRCP(draw_info.radius); - /** The closeness factor normalized with the ball radius. - * - * In [0., 1.]. - */ - float closeness = FSATURATE(FMA(-closeness_world, radius_rcp, 1.f)); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_DRAW_PIX, - "drawprep %u|center: %.9f, %.9f, %.9f. raydiff: %.9f, " - "%.9f, %.9f. closeness_world: %.9f. closeness: %.9f\n", - idx, - center.x, - center.y, - center.z, - raydiff.x, - raydiff.y, - raydiff.z, - closeness_world, - closeness); - /** Whether this is the 'center pixel' for this ball, the pixel that - * is closest to its projected center. This information is used to - * make sure to draw 'tiny' spheres with less than one pixel in - * projected size. - */ - bool ray_through_center_pixel; - float projected_radius, projected_x, projected_y; - if (cam.orthogonal_projection) { - projected_x = center.x / (2.f * cam.half_pixel_size) + - (static_cast(cam.aperture_width) - 1.f) / 2.f; - projected_y = center.y / (2.f * cam.half_pixel_size) + - (static_cast(cam.aperture_height) - 1.f) / 2.f; - projected_radius = draw_info.radius / (2.f * cam.half_pixel_size); - ray_through_center_pixel = - (FABS(FSUB(projected_x, projected_ray.x)) < 0.5f + FEPS && - FABS(FSUB(projected_y, projected_ray.y)) < 0.5f + FEPS); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_DRAW_PIX, - "drawprep %u|closeness_world: %.9f. closeness: %.9f. " - "projected (x, y): %.9f, %.9f. projected_ray (x, y): " - "%.9f, %.9f. ray_through_center_pixel: %d.\n", - idx, - closeness_world, - closeness, - projected_x, - projected_y, - projected_ray.x, - projected_ray.y, - ray_through_center_pixel); - } else { - // Misusing this variable for half pixel size projected to the depth - // at which the sphere resides. Leave some slack for numerical - // inaccuracy (factor 1.5). - projected_x = FMUL(cam.half_pixel_size * 1.5, draw_info.t_center) * - FRCP(cam.focal_length); - projected_radius = FMUL(draw_info.radius, cam.focal_length) * - FRCP(draw_info.t_center) / (2.f * cam.half_pixel_size); - ray_through_center_pixel = projected_x > closeness_world; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_DRAW_PIX, - "drawprep %u|closeness_world: %.9f. closeness: %.9f. " - "projected half pixel size: %.9f. " - "ray_through_center_pixel: %d.\n", - idx, - closeness_world, - closeness, - projected_x, - ray_through_center_pixel); - } - if (draw_only && draw_info.radius < closeness_world && - !ray_through_center_pixel) { - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_DRAW_PIX, - "drawprep %u|Abandoning since no hit has been detected.\n", - idx); - return false; - } else { - // This is always a hit since we are following the forward execution pass. - // p2 is the closest intersection point with the sphere. - } - if (ray_through_center_pixel && projected_radius < 1.f) { - // Make a tiny sphere visible. - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_DRAW_PIX, - "drawprep %u|Setting closeness to 1 (projected radius: %.9f).\n", - idx, - projected_radius); - closeness = 1.; - } - PASSERT(closeness >= 0.f && closeness <= 1.f); - /** Distance between the camera (`o`) and `p1`, the closest point to the - * ball center along the casted ray. - * - * In [t_center - radius, t_center]. - */ - float o__p1_; - /** The distance from ball center to p1. - * - * In [0., sqrt(t_center ^ 2 - (t_center - radius) ^ 2)]. - */ - float c__p1_; - if (cam.orthogonal_projection) { - o__p1_ = FABS(center.z); - c__p1_ = length(raydiff); - } else { - o__p1_ = dot(center, ray_dir_norm); - /** - * This is being calculated as sqrt(t_center^2 - o__p1_^2) = - * sqrt((t_center + o__p1_) * (t_center - o__p1_)) to avoid - * catastrophic cancellation in floating point representations. - */ - c__p1_ = FSQRT( - (draw_info.t_center + o__p1_) * FMAX(draw_info.t_center - o__p1_, 0.f)); - // PASSERT(o__p1_ >= draw_info.t_center - draw_info.radius); - // Numerical errors lead to too large values. - o__p1_ = FMIN(o__p1_, draw_info.t_center); - // PASSERT(o__p1_ <= draw_info.t_center); - } - /** The distance from the closest point to the sphere center (p1) - * to the closest intersection point (p2). - * - * In [0., radius]. - */ - const float p1__p2_ = - FSQRT((draw_info.radius + c__p1_) * FMAX(draw_info.radius - c__p1_, 0.f)); - PASSERT(p1__p2_ >= 0.f && p1__p2_ <= draw_info.radius); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_DRAW_PIX, - "drawprep %u|o__p1_: %.9f, c__p1_: %.9f, p1__p2_: %.9f.\n", - idx, - o__p1_, - c__p1_, - p1__p2_); - /** The intersection depth of the ray with this ball. - * - * In [t_center - radius, t_center]. - */ - const float intersection_depth = (o__p1_ - p1__p2_); - PASSERT( - cam.orthogonal_projection && - (intersection_depth >= center.z - draw_info.radius && - intersection_depth <= center.z) || - intersection_depth >= draw_info.t_center - draw_info.radius && - intersection_depth <= draw_info.t_center); - /** Normalized distance of the closest intersection point; in [0., 1.]. */ - const float norm_dist = - FMUL(FSUB(intersection_depth, cam.min_dist), cam.norm_fac); - PASSERT(norm_dist >= 0.f && norm_dist <= 1.f); - /** Scaled, normalized distance in [1., 0.] (closest, farthest). */ - const float norm_dist_scaled = FSUB(1.f, norm_dist) / gamma * opacity; - PASSERT(norm_dist_scaled >= 0.f && norm_dist_scaled <= 1.f / gamma); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_DRAW_PIX, - "drawprep %u|intersection_depth: %.9f, norm_dist: %.9f, " - "norm_dist_scaled: %.9f.\n", - idx, - intersection_depth, - norm_dist, - norm_dist_scaled); - float const* const col_ptr = - cam.n_channels > 3 ? draw_info.color_union.ptr : &draw_info.first_color; - // The implementation for the numerically stable weighted softmax is based - // on https://arxiv.org/pdf/1805.02867.pdf . - if (draw_only) { - /** The old maximum observed value. */ - const float sm_m_old = *sm_m; - *sm_m = FMAX(*sm_m, norm_dist_scaled); - const float coeff_exp = FEXP(norm_dist_scaled - *sm_m); - PASSERT(isfinite(coeff_exp)); - /** The color coefficient for the ball color; in [0., 1.]. */ - const float coeff = closeness * coeff_exp * opacity; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_DRAW_PIX, - "draw %u|coeff: %.9f. closeness: %.9f. coeff_exp: %.9f. " - "opacity: %.9f.\n", - idx, - coeff, - closeness, - coeff_exp, - opacity); - // Rendering. - if (sm_m_old == *sm_m) { - // Use the fact that exp(0) = 1 to avoid the exp calculation for - // the case that the maximum remains the same (which it should - // most of the time). - *sm_d = FADD(*sm_d, coeff); - for (uint c_id = 0; c_id < cam.n_channels; ++c_id) { - PASSERT(isfinite(result[c_id])); - result[c_id] = FMA(coeff, col_ptr[c_id], result[c_id]); - } - } else { - const float exp_correction = FEXP(sm_m_old - *sm_m); - *sm_d = FMA(*sm_d, exp_correction, coeff); - for (uint c_id = 0; c_id < cam.n_channels; ++c_id) { - PASSERT(isfinite(result[c_id])); - result[c_id] = - FMA(coeff, col_ptr[c_id], FMUL(result[c_id], exp_correction)); - } - } - PASSERT(isfinite(*sm_d)); - *intersection_depth_norm_out = intersection_depth; - // Update the depth threshold. - *depth_threshold = - 1.f - (FLN(*sm_d + FEPS) + *ln_pad_over_1minuspad + *sm_m) * gamma; - *depth_threshold = - FMA(*depth_threshold, FSUB(cam.max_dist, cam.min_dist), cam.min_dist); - } else { - // Gradient computation. - const float coeff_exp = FEXP(norm_dist_scaled - *sm_m); - const float gamma_rcp = FRCP(gamma); - const float radius_sq = FMUL(draw_info.radius, draw_info.radius); - const float coeff = FMAX( - FMIN(closeness * coeff_exp * opacity, *sm_d - FEPS), - 0.f); // in [0., sm_d - FEPS]. - PASSERT(coeff >= 0.f && coeff <= *sm_d); - const float otherw = *sm_d - coeff; // in [FEPS, sm_d]. - const float p1__p2_safe = FMAX(p1__p2_, FEPS); // in [eps, t_center]. - const float cam_range = FSUB(cam.max_dist, cam.min_dist); // in ]0, inf[ - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|pos: %.9f, %.9f, %.9f. pixeldirx: %.9f, %.9f, %.9f. " - "pixeldiry: %.9f, %.9f, %.9f. pixel00center: %.9f, %.9f, %.9f.\n", - idx, - draw_info.ray_center_norm.x * draw_info.t_center, - draw_info.ray_center_norm.y * draw_info.t_center, - draw_info.ray_center_norm.z * draw_info.t_center, - cam.pixel_dir_x.x, - cam.pixel_dir_x.y, - cam.pixel_dir_x.z, - cam.pixel_dir_y.x, - cam.pixel_dir_y.y, - cam.pixel_dir_y.z, - cam.pixel_0_0_center.x, - cam.pixel_0_0_center.y, - cam.pixel_0_0_center.z); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|ray_dir: %.9f, %.9f, %.9f. " - "ray_dir_norm: %.9f, %.9f, %.9f. " - "draw_info.ray_center_norm: %.9f, %.9f, %.9f.\n", - idx, - ray_dir->x, - ray_dir->y, - ray_dir->z, - ray_dir_norm.x, - ray_dir_norm.y, - ray_dir_norm.z, - draw_info.ray_center_norm.x, - draw_info.ray_center_norm.y, - draw_info.ray_center_norm.z); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|coeff_exp: %.9f. " - "norm_dist_scaled: %.9f. cam.norm_fac: %f.\n", - idx, - coeff_exp, - norm_dist_scaled, - cam.norm_fac); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|p1__p2_: %.9f. p1__p2_safe: %.9f.\n", - idx, - p1__p2_, - p1__p2_safe); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|o__p1_: %.9f. c__p1_: %.9f.\n", - idx, - o__p1_, - c__p1_); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|intersection_depth: %f. norm_dist: %f. " - "coeff: %.9f. closeness: %f. coeff_exp: %f. opacity: " - "%f. color: %f, %f, %f.\n", - idx, - intersection_depth, - norm_dist, - coeff, - closeness, - coeff_exp, - opacity, - draw_info.first_color, - draw_info.color_union.color[0], - draw_info.color_union.color[1]); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|t_center: %.9f. " - "radius: %.9f. max_dist: %f. min_dist: %f. gamma: %f.\n", - idx, - draw_info.t_center, - draw_info.radius, - cam.max_dist, - cam.min_dist, - gamma); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|sm_d: %f. sm_m: %f. grad_pix (first three): %f, %f, %f.\n", - idx, - *sm_d, - *sm_m, - grad_pix[0], - grad_pix[1], - grad_pix[2]); - PULSAR_LOG_DEV_PIX(PULSAR_LOG_GRAD, "grad %u|otherw: %f.\n", idx, otherw); - if (calc_grad_col) { - const float sm_d_norm = FRCP(FMAX(*sm_d, FEPS)); - // First do the multiplication of coeff (in [0., sm_d]) and 1/sm_d. The - // result is a factor in [0., 1.] to be multiplied with the incoming - // gradient. - for (uint c_id = 0; c_id < cam.n_channels; ++c_id) { - ATOMICADD(grad_col + c_id, grad_pix[c_id] * FMUL(coeff, sm_d_norm)); - } - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdcol.x: %f. dresDdcol.x: %f.\n", - idx, - FMUL(coeff, sm_d_norm) * grad_pix[0], - coeff * sm_d_norm); - } - // We disable the computation for too small spheres. - // The comparison is made this way to avoid subtraction of unsigned types. - if (calc_grad_cam || calc_grad_pos || calc_grad_rad || calc_grad_opy) { - //! First find dimDdcoeff. - const float n0 = - otherw * FRCP(FMAX(*sm_d * *sm_d, FEPS)); // in [0., 1. / sm_d]. - PASSERT(isfinite(n0) && n0 >= 0. && n0 <= 1. / *sm_d + 1e2f * FEPS); - // We'll aggergate dimDdcoeff over all the 'color' channels. - float dimDdcoeff = 0.f; - const float otherw_safe_rcp = FRCP(FMAX(otherw, FEPS)); - float othercol; - for (uint c_id = 0; c_id < cam.n_channels; ++c_id) { - othercol = - (result[c_id] * *sm_d - col_ptr[c_id] * coeff) * otherw_safe_rcp; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|othercol[%u]: %.9f.\n", - idx, - c_id, - othercol); - dimDdcoeff += - FMUL(FMUL(grad_pix[c_id], FSUB(col_ptr[c_id], othercol)), n0); - } - PASSERT(isfinite(dimDdcoeff)); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdcoeff: %.9f, n0: %f.\n", - idx, - dimDdcoeff, - n0); - if (calc_grad_opy) { - //! dimDdopacity. - *grad_opy += dimDdcoeff * coeff_exp * closeness * - (1.f + opacity * (1.f - norm_dist) * gamma_rcp); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dcoeffDdopacity: %.9f, dimDdopacity: %.9f.\n", - idx, - coeff_exp * closeness, - dimDdcoeff * coeff_exp * closeness); - } - if (intersect_info->max.x >= intersect_info->min.x + 3 && - intersect_info->max.y >= intersect_info->min.y + 3) { - //! Now find dcoeffDdintersection_depth and dcoeffDdcloseness. - const float dcoeffDdintersection_depth = - -closeness * coeff_exp * opacity * opacity / (gamma * cam_range); - const float dcoeffDdcloseness = coeff_exp * opacity; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dcoeffDdintersection_depth: %.9f. " - "dimDdintersection_depth: %.9f. " - "dcoeffDdcloseness: %.9f. dimDdcloseness: %.9f.\n", - idx, - dcoeffDdintersection_depth, - dimDdcoeff * dcoeffDdintersection_depth, - dcoeffDdcloseness, - dimDdcoeff * dcoeffDdcloseness); - //! Here, the execution paths for orthogonal and pinyhole camera split. - if (cam.orthogonal_projection) { - if (calc_grad_rad) { - //! Find dcoeffDdrad. - float dcoeffDdrad = - dcoeffDdcloseness * (closeness_world / radius_sq) - - dcoeffDdintersection_depth * draw_info.radius / p1__p2_safe; - PASSERT(isfinite(dcoeffDdrad)); - *grad_rad += FMUL(dimDdcoeff, dcoeffDdrad); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdrad: %.9f. dcoeffDdrad: %.9f.\n", - idx, - FMUL(dimDdcoeff, dcoeffDdrad), - dcoeffDdrad); - } - if (calc_grad_pos || calc_grad_cam) { - float3 dimDdcenter = raydiff / - p1__p2_safe; /* making it dintersection_depthDdcenter. */ - dimDdcenter.z = sign_dir(center.z); - PASSERT(FABS(center.z) >= cam.min_dist && cam.min_dist >= FEPS); - dimDdcenter *= dcoeffDdintersection_depth; // dcoeffDdcenter - dimDdcenter -= dcoeffDdcloseness * /* dclosenessDdcenter. */ - raydiff * FRCP(FMAX(length(raydiff) * draw_info.radius, FEPS)); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dcoeffDdcenter: %.9f, %.9f, %.9f.\n", - idx, - dimDdcenter.x, - dimDdcenter.y, - dimDdcenter.z); - // Now dcoeffDdcenter is stored in dimDdcenter. - dimDdcenter *= dimDdcoeff; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdcenter: %.9f, %.9f, %.9f.\n", - idx, - dimDdcenter.x, - dimDdcenter.y, - dimDdcenter.z); - // Prepare for posglob and cam pos. - const float pixel_size = length(cam.pixel_dir_x); - // pixel_size is the same as length(pixeldiry)! - const float pixel_size_rcp = FRCP(pixel_size); - float3 dcenterDdposglob = - (cam.pixel_dir_x + cam.pixel_dir_y) * pixel_size_rcp + - cam.sensor_dir_z; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dcenterDdposglob: %.9f, %.9f, %.9f.\n", - idx, - dcenterDdposglob.x, - dcenterDdposglob.y, - dcenterDdposglob.z); - if (calc_grad_pos) { - //! dcenterDdposglob. - *grad_pos += dimDdcenter * dcenterDdposglob; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdpos: %.9f, %.9f, %.9f.\n", - idx, - dimDdcenter.x * dcenterDdposglob.x, - dimDdcenter.y * dcenterDdposglob.y, - dimDdcenter.z * dcenterDdposglob.z); - } - if (calc_grad_cam) { - //! Camera. - grad_cam->cam_pos -= dimDdcenter * dcenterDdposglob; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdeye: %.9f, %.9f, %.9f.\n", - idx, - -dimDdcenter.x * dcenterDdposglob.x, - -dimDdcenter.y * dcenterDdposglob.y, - -dimDdcenter.z * dcenterDdposglob.z); - // coord_world - /* - float3 dclosenessDdcoord_world = - raydiff * FRCP(FMAX(draw_info.radius * length(raydiff), FEPS)); - float3 dintersection_depthDdcoord_world = -2.f * raydiff; - */ - float3 dimDdcoord_world = /* dcoeffDdcoord_world */ - dcoeffDdcloseness * raydiff * - FRCP(FMAX(draw_info.radius * length(raydiff), FEPS)) - - dcoeffDdintersection_depth * raydiff / p1__p2_safe; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dcoeffDdcoord_world: %.9f, %.9f, %.9f.\n", - idx, - dimDdcoord_world.x, - dimDdcoord_world.y, - dimDdcoord_world.z); - dimDdcoord_world *= dimDdcoeff; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdcoord_world: %.9f, %.9f, %.9f.\n", - idx, - dimDdcoord_world.x, - dimDdcoord_world.y, - dimDdcoord_world.z); - // The third component of dimDdcoord_world is 0! - PASSERT(dimDdcoord_world.z == 0.f); - float3 coord_world = center - raydiff; - coord_world.z = 0.f; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|coord_world: %.9f, %.9f, %.9f.\n", - idx, - coord_world.x, - coord_world.y, - coord_world.z); - // Do this component-wise to save unnecessary matmul steps. - grad_cam->pixel_dir_x += dimDdcoord_world.x * cam.pixel_dir_x * - coord_world.x * pixel_size_rcp * pixel_size_rcp; - grad_cam->pixel_dir_x += dimDdcoord_world.y * cam.pixel_dir_x * - coord_world.y * pixel_size_rcp * pixel_size_rcp; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdpixel_dir_x|coord_world: %.9f, %.9f, %.9f.\n", - idx, - grad_cam->pixel_dir_x.x, - grad_cam->pixel_dir_x.y, - grad_cam->pixel_dir_x.z); - // dcenterkDdpixel_dir_k. - float3 center_in_pixels = draw_info.ray_center_norm * - draw_info.t_center * pixel_size_rcp; - grad_cam->pixel_dir_x += dimDdcenter.x * - (center_in_pixels - - outer_product_sum(cam.pixel_dir_x) * center_in_pixels * - pixel_size_rcp * pixel_size_rcp); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dcenter0dpixel_dir_x: %.9f, %.9f, %.9f.\n", - idx, - (center_in_pixels - - outer_product_sum(cam.pixel_dir_x) * center_in_pixels * - pixel_size_rcp * pixel_size_rcp) - .x, - (center_in_pixels - - outer_product_sum(cam.pixel_dir_x) * center_in_pixels * - pixel_size_rcp * pixel_size_rcp) - .y, - (center_in_pixels - - outer_product_sum(cam.pixel_dir_x) * center_in_pixels * - pixel_size_rcp * pixel_size_rcp) - .z); - grad_cam->pixel_dir_y += dimDdcenter.y * - (center_in_pixels - - outer_product_sum(cam.pixel_dir_y) * center_in_pixels * - pixel_size_rcp * pixel_size_rcp); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dcenter1dpixel_dir_y: %.9f, %.9f, %.9f.\n", - idx, - (center_in_pixels - - outer_product_sum(cam.pixel_dir_y) * center_in_pixels * - pixel_size_rcp * pixel_size_rcp) - .x, - (center_in_pixels - - outer_product_sum(cam.pixel_dir_y) * center_in_pixels * - pixel_size_rcp * pixel_size_rcp) - .y, - (center_in_pixels - - outer_product_sum(cam.pixel_dir_y) * center_in_pixels * - pixel_size_rcp * pixel_size_rcp) - .z); - // dcenterzDdpixel_dir_k. - float sensordirz_norm_rcp = FRCP( - FMAX(length(cross(cam.pixel_dir_y, cam.pixel_dir_x)), FEPS)); - grad_cam->pixel_dir_x += dimDdcenter.z * - (dot(center, cam.sensor_dir_z) * - cross(cam.pixel_dir_y, cam.sensor_dir_z) - - cross(cam.pixel_dir_y, center)) * - sensordirz_norm_rcp; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dcenterzDdpixel_dir_x: %.9f, %.9f, %.9f.\n", - idx, - ((dot(center, cam.sensor_dir_z) * - cross(cam.pixel_dir_y, cam.sensor_dir_z) - - cross(cam.pixel_dir_y, center)) * - sensordirz_norm_rcp) - .x, - ((dot(center, cam.sensor_dir_z) * - cross(cam.pixel_dir_y, cam.sensor_dir_z) - - cross(cam.pixel_dir_y, center)) * - sensordirz_norm_rcp) - .y, - ((dot(center, cam.sensor_dir_z) * - cross(cam.pixel_dir_y, cam.sensor_dir_z) - - cross(cam.pixel_dir_y, center)) * - sensordirz_norm_rcp) - .z); - grad_cam->pixel_dir_y += dimDdcenter.z * - (dot(center, cam.sensor_dir_z) * - cross(cam.pixel_dir_x, cam.sensor_dir_z) - - cross(cam.pixel_dir_x, center)) * - sensordirz_norm_rcp; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dcenterzDdpixel_dir_y: %.9f, %.9f, %.9f.\n", - idx, - ((dot(center, cam.sensor_dir_z) * - cross(cam.pixel_dir_x, cam.sensor_dir_z) - - cross(cam.pixel_dir_x, center)) * - sensordirz_norm_rcp) - .x, - ((dot(center, cam.sensor_dir_z) * - cross(cam.pixel_dir_x, cam.sensor_dir_z) - - cross(cam.pixel_dir_x, center)) * - sensordirz_norm_rcp) - .y, - ((dot(center, cam.sensor_dir_z) * - cross(cam.pixel_dir_x, cam.sensor_dir_z) - - cross(cam.pixel_dir_x, center)) * - sensordirz_norm_rcp) - .z); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdpixel_dir_x: %.9f, %.9f, %.9f.\n", - idx, - grad_cam->pixel_dir_x.x, - grad_cam->pixel_dir_x.y, - grad_cam->pixel_dir_x.z); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdpixel_dir_y: %.9f, %.9f, %.9f.\n", - idx, - grad_cam->pixel_dir_y.x, - grad_cam->pixel_dir_y.y, - grad_cam->pixel_dir_y.z); - } - } - } else { - if (calc_grad_rad) { - //! Find dcoeffDdrad. - float dcoeffDdrad = - dcoeffDdcloseness * (closeness_world / radius_sq) - - dcoeffDdintersection_depth * draw_info.radius / p1__p2_safe; - PASSERT(isfinite(dcoeffDdrad)); - *grad_rad += FMUL(dimDdcoeff, dcoeffDdrad); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdrad: %.9f. dcoeffDdrad: %.9f.\n", - idx, - FMUL(dimDdcoeff, dcoeffDdrad), - dcoeffDdrad); - } - if (calc_grad_pos || calc_grad_cam) { - const float3 tmp1 = center - ray_dir_norm * o__p1_; - const float3 tmp1n = tmp1 / p1__p2_safe; - const float ray_dir_normDotRaydiff = dot(ray_dir_norm, raydiff); - const float3 dcoeffDdray = dcoeffDdintersection_depth * - (tmp1 - o__p1_ * tmp1n) / *norm_ray_dir + - dcoeffDdcloseness * - (ray_dir_norm * -ray_dir_normDotRaydiff + raydiff) / - (closeness_world * draw_info.radius) * - (draw_info.t_center / *norm_ray_dir); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dcoeffDdray: %.9f, %.9f, %.9f. dimDdray: " - "%.9f, %.9f, %.9f.\n", - idx, - dcoeffDdray.x, - dcoeffDdray.y, - dcoeffDdray.z, - dimDdcoeff * dcoeffDdray.x, - dimDdcoeff * dcoeffDdray.y, - dimDdcoeff * dcoeffDdray.z); - const float3 dcoeffDdcenter = - dcoeffDdintersection_depth * (ray_dir_norm + tmp1n) + - dcoeffDdcloseness * - (draw_info.ray_center_norm * ray_dir_normDotRaydiff - - raydiff) / - (closeness_world * draw_info.radius); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dcoeffDdcenter: %.9f, %.9f, %.9f. " - "dimDdcenter: %.9f, %.9f, %.9f.\n", - idx, - dcoeffDdcenter.x, - dcoeffDdcenter.y, - dcoeffDdcenter.z, - dimDdcoeff * dcoeffDdcenter.x, - dimDdcoeff * dcoeffDdcenter.y, - dimDdcoeff * dcoeffDdcenter.z); - if (calc_grad_pos) { - *grad_pos += dimDdcoeff * dcoeffDdcenter; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdposglob: %.9f, %.9f, %.9f.\n", - idx, - dimDdcoeff * dcoeffDdcenter.x, - dimDdcoeff * dcoeffDdcenter.y, - dimDdcoeff * dcoeffDdcenter.z); - } - if (calc_grad_cam) { - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdeye: %.9f, %.9f, %.9f.\n", - idx, - -dimDdcoeff * (dcoeffDdcenter.x + dcoeffDdray.x), - -dimDdcoeff * (dcoeffDdcenter.y + dcoeffDdray.y), - -dimDdcoeff * (dcoeffDdcenter.z + dcoeffDdray.z)); - grad_cam->cam_pos += -dimDdcoeff * (dcoeffDdcenter + dcoeffDdray); - grad_cam->pixel_0_0_center += dimDdcoeff * dcoeffDdray; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdpixel00centerglob: %.9f, %.9f, %.9f.\n", - idx, - dimDdcoeff * dcoeffDdray.x, - dimDdcoeff * dcoeffDdray.y, - dimDdcoeff * dcoeffDdray.z); - grad_cam->pixel_dir_x += - (dimDdcoeff * static_cast(coord_x)) * dcoeffDdray; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdpixel_dir_x: %.9f, %.9f, %.9f.\n", - idx, - (dimDdcoeff * static_cast(coord_x)) * dcoeffDdray.x, - (dimDdcoeff * static_cast(coord_x)) * dcoeffDdray.y, - (dimDdcoeff * static_cast(coord_x)) * dcoeffDdray.z); - grad_cam->pixel_dir_y += - (dimDdcoeff * static_cast(coord_y)) * dcoeffDdray; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_GRAD, - "grad %u|dimDdpixel_dir_y: %.9f, %.9f, %.9f.\n", - idx, - (dimDdcoeff * static_cast(coord_y)) * dcoeffDdray.x, - (dimDdcoeff * static_cast(coord_y)) * dcoeffDdray.y, - (dimDdcoeff * static_cast(coord_y)) * dcoeffDdray.z); - } - } - } - } - } - } - return true; -}; - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.device.h deleted file mode 100644 index 2a737d3eb609781f08120eb734982987866637f4..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.device.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_FILL_BG_DEVICE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_FILL_BG_DEVICE_H_ - -#include "../global.h" -#include "./camera.h" -#include "./commands.h" -#include "./renderer.h" - -namespace pulsar { -namespace Renderer { - -template -GLOBAL void fill_bg( - Renderer renderer, - const CamInfo cam, - float const* const bg_col_d, - const float gamma, - const uint mode) { - GET_PARALLEL_IDS_2D(coord_x, coord_y, cam.film_width, cam.film_height); - int write_loc = coord_y * cam.film_width * (3 + 2 * renderer.n_track) + - coord_x * (3 + 2 * renderer.n_track); - if (renderer.forw_info_d[write_loc + 1] // sm_d - == 0.f) { - // This location has not been processed yet. - // Write first the forw_info: - // sm_m - renderer.forw_info_d[write_loc] = - cam.background_normalization_depth / gamma; - // sm_d - renderer.forw_info_d[write_loc + 1] = 1.f; - // max_closest_possible_intersection_hit - renderer.forw_info_d[write_loc + 2] = -1.f; - // sphere IDs and intersection depths. - for (int i = 0; i < renderer.n_track; ++i) { - int sphere_id = -1; - IASF(sphere_id, renderer.forw_info_d[write_loc + 3 + i * 2]); - renderer.forw_info_d[write_loc + 3 + i * 2 + 1] = -1.f; - } - if (mode == 0) { - // Image background. - for (int i = 0; i < cam.n_channels; ++i) { - renderer.result_d - [coord_y * cam.film_width * cam.n_channels + - coord_x * cam.n_channels + i] = bg_col_d[i]; - } - } - } - END_PARALLEL_2D_NORET(); -}; - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.instantiate.h deleted file mode 100644 index 02830204a6874b8223bde1615fa9ef8ffa4d318c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.fill_bg.instantiate.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "./renderer.fill_bg.device.h" - -namespace pulsar { -namespace Renderer { - -template GLOBAL void fill_bg( - Renderer renderer, - const CamInfo norm, - float const* const bg_col_d, - const float gamma, - const uint mode); - -} // namespace Renderer -} // namespace pulsar diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.device.h deleted file mode 100644 index 3f0412f576de4dd77b3f2be6a27ff8ddb144ca74..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.device.h +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_FORWARD_DEVICE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_FORWARD_DEVICE_H_ - -#include "../global.h" -#include "./camera.device.h" -#include "./commands.h" -#include "./math.h" -#include "./renderer.h" - -namespace pulsar { -namespace Renderer { - -template -void forward( - Renderer* self, - const float* vert_pos, - const float* vert_col, - const float* vert_rad, - const CamInfo& cam, - const float& gamma, - float percent_allowed_difference, - const uint& max_n_hits, - const float* bg_col_d, - const float* opacity_d, - const size_t& num_balls, - const uint& mode, - cudaStream_t stream) { - ARGCHECK(gamma > 0.f && gamma <= 1.f, 6, "gamma must be in [0., 1.]"); - ARGCHECK( - percent_allowed_difference >= 0.f && percent_allowed_difference <= 1.f, - 7, - "percent_allowed_difference must be in [0., 1.]"); - ARGCHECK(max_n_hits >= 1u, 8, "max_n_hits must be >= 1"); - ARGCHECK( - num_balls > 0 && num_balls <= self->max_num_balls, - 9, - ("num_balls must be >0 and <= max num balls! (" + - std::to_string(num_balls) + " vs. " + - std::to_string(self->max_num_balls) + ")") - .c_str()); - ARGCHECK( - cam.film_width == self->cam.film_width && - cam.film_height == self->cam.film_height, - 5, - "cam result width and height must agree"); - ARGCHECK(mode <= 1, 10, "mode must be <= 1!"); - if (percent_allowed_difference > 1.f - FEPS) { - LOG(WARNING) << "percent_allowed_difference > " << (1.f - FEPS) - << "! Clamping to " << (1.f - FEPS) << "."; - percent_allowed_difference = 1.f - FEPS; - } - LOG_IF(INFO, PULSAR_LOG_RENDER) << "Rendering forward pass..."; - // Update camera and transform into a new virtual camera system with - // centered principal point and subsection rendering. - self->cam.eye = cam.eye; - self->cam.pixel_0_0_center = cam.pixel_0_0_center - cam.eye; - self->cam.pixel_dir_x = cam.pixel_dir_x; - self->cam.pixel_dir_y = cam.pixel_dir_y; - self->cam.sensor_dir_z = cam.sensor_dir_z; - self->cam.half_pixel_size = cam.half_pixel_size; - self->cam.focal_length = cam.focal_length; - self->cam.aperture_width = cam.aperture_width; - self->cam.aperture_height = cam.aperture_height; - self->cam.min_dist = cam.min_dist; - self->cam.max_dist = cam.max_dist; - self->cam.norm_fac = cam.norm_fac; - self->cam.principal_point_offset_x = cam.principal_point_offset_x; - self->cam.principal_point_offset_y = cam.principal_point_offset_y; - self->cam.film_border_left = cam.film_border_left; - self->cam.film_border_top = cam.film_border_top; -#ifdef PULSAR_TIMINGS_ENABLED - START_TIME(calc_signature); -#endif - LAUNCH_MAX_PARALLEL_1D( - calc_signature, - num_balls, - stream, - *self, - reinterpret_cast(vert_pos), - vert_col, - vert_rad, - num_balls); - CHECKLAUNCH(); -#ifdef PULSAR_TIMINGS_ENABLED - STOP_TIME(calc_signature); - START_TIME(sort); -#endif - SORT_ASCENDING_WS( - self->min_depth_d, - self->min_depth_sorted_d, - self->ids_d, - self->ids_sorted_d, - num_balls, - self->workspace_d, - self->workspace_size, - stream); - CHECKLAUNCH(); - SORT_ASCENDING_WS( - self->min_depth_d, - self->min_depth_sorted_d, - self->ii_d, - self->ii_sorted_d, - num_balls, - self->workspace_d, - self->workspace_size, - stream); - CHECKLAUNCH(); - SORT_ASCENDING_WS( - self->min_depth_d, - self->min_depth_sorted_d, - self->di_d, - self->di_sorted_d, - num_balls, - self->workspace_d, - self->workspace_size, - stream); - CHECKLAUNCH(); -#ifdef PULSAR_TIMINGS_ENABLED - STOP_TIME(sort); - START_TIME(minmax); -#endif - IntersectInfo pixel_minmax; - pixel_minmax.min.x = MAX_USHORT; - pixel_minmax.min.y = MAX_USHORT; - pixel_minmax.max.x = 0; - pixel_minmax.max.y = 0; - REDUCE_WS( - self->ii_sorted_d, - self->min_max_pixels_d, - num_balls, - IntersectInfoMinMax(), - pixel_minmax, - self->workspace_d, - self->workspace_size, - stream); - COPY_DEV_HOST(&pixel_minmax, self->min_max_pixels_d, IntersectInfo, 1); - LOG_IF(INFO, PULSAR_LOG_RENDER) - << "Region with pixels to render: " << pixel_minmax.min.x << ":" - << pixel_minmax.max.x << " (x), " << pixel_minmax.min.y << ":" - << pixel_minmax.max.y << " (y)."; -#ifdef PULSAR_TIMINGS_ENABLED - STOP_TIME(minmax); - START_TIME(render); -#endif - MEMSET( - self->result_d, - 0, - float, - self->cam.film_width * self->cam.film_height * self->cam.n_channels, - stream); - MEMSET( - self->forw_info_d, - 0, - float, - self->cam.film_width * self->cam.film_height * (3 + 2 * self->n_track), - stream); - if (pixel_minmax.max.y > pixel_minmax.min.y && - pixel_minmax.max.x > pixel_minmax.min.x) { - PASSERT( - pixel_minmax.min.x >= static_cast(self->cam.film_border_left) && - pixel_minmax.min.x < - static_cast( - self->cam.film_border_left + self->cam.film_width) && - pixel_minmax.max.x <= - static_cast( - self->cam.film_border_left + self->cam.film_width) && - pixel_minmax.min.y >= static_cast(self->cam.film_border_top) && - pixel_minmax.min.y < - static_cast( - self->cam.film_border_top + self->cam.film_height) && - pixel_minmax.max.y <= - static_cast( - self->cam.film_border_top + self->cam.film_height)); - // Cut the image in 3x3 regions. - int y_step = RENDER_BLOCK_SIZE * - iDivCeil(pixel_minmax.max.y - pixel_minmax.min.y, - 3u * RENDER_BLOCK_SIZE); - int x_step = RENDER_BLOCK_SIZE * - iDivCeil(pixel_minmax.max.x - pixel_minmax.min.x, - 3u * RENDER_BLOCK_SIZE); - LOG_IF(INFO, PULSAR_LOG_RENDER) << "Using image slices of size " << x_step - << ", " << y_step << " (W, H)."; - for (int y_min = pixel_minmax.min.y; y_min < pixel_minmax.max.y; - y_min += y_step) { - for (int x_min = pixel_minmax.min.x; x_min < pixel_minmax.max.x; - x_min += x_step) { - // Create region selection. - LAUNCH_MAX_PARALLEL_1D( - create_selector, - num_balls, - stream, - self->ii_sorted_d, - num_balls, - x_min, - x_min + x_step, - y_min, - y_min + y_step, - self->region_flags_d); - CHECKLAUNCH(); - SELECT_FLAGS_WS( - self->region_flags_d, - self->ii_sorted_d, - self->ii_d, - self->num_selected_d, - num_balls, - self->workspace_d, - self->workspace_size, - stream); - CHECKLAUNCH(); - SELECT_FLAGS_WS( - self->region_flags_d, - self->di_sorted_d, - self->di_d, - self->num_selected_d, - num_balls, - self->workspace_d, - self->workspace_size, - stream); - CHECKLAUNCH(); - SELECT_FLAGS_WS( - self->region_flags_d, - self->ids_sorted_d, - self->ids_d, - self->num_selected_d, - num_balls, - self->workspace_d, - self->workspace_size, - stream); - CHECKLAUNCH(); - LAUNCH_PARALLEL_2D( - render, - x_step, - y_step, - RENDER_BLOCK_SIZE, - RENDER_BLOCK_SIZE, - stream, - self->num_selected_d, - self->ii_d, - self->di_d, - self->min_depth_d, - self->ids_d, - opacity_d, - self->cam, - gamma, - percent_allowed_difference, - max_n_hits, - bg_col_d, - mode, - x_min, - y_min, - x_step, - y_step, - self->result_d, - self->forw_info_d, - self->n_track); - CHECKLAUNCH(); - } - } - } - if (mode == 0) { - LAUNCH_MAX_PARALLEL_2D( - fill_bg, - static_cast(self->cam.film_width), - static_cast(self->cam.film_height), - stream, - *self, - self->cam, - bg_col_d, - gamma, - mode); - CHECKLAUNCH(); - } -#ifdef PULSAR_TIMINGS_ENABLED - STOP_TIME(render); - float time_ms; - // This blocks the result and prevents batch-processing from parallelizing. - GET_TIME(calc_signature, &time_ms); - std::cout << "Time for signature calculation: " << time_ms << " ms" - << std::endl; - GET_TIME(sort, &time_ms); - std::cout << "Time for sorting: " << time_ms << " ms" << std::endl; - GET_TIME(minmax, &time_ms); - std::cout << "Time for minmax pixel calculation: " << time_ms << " ms" - << std::endl; - GET_TIME(render, &time_ms); - std::cout << "Time for rendering: " << time_ms << " ms" << std::endl; -#endif - LOG_IF(INFO, PULSAR_LOG_RENDER) << "Forward pass complete."; -} - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.instantiate.h deleted file mode 100644 index 7f57bc8681b7c7f1356f3c3e134595ab2d1955f0..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.forward.instantiate.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "./renderer.forward.device.h" - -namespace pulsar { -namespace Renderer { - -template void forward( - Renderer* self, - const float* vert_pos, - const float* vert_col, - const float* vert_rad, - const CamInfo& cam, - const float& gamma, - float percent_allowed_difference, - const uint& max_n_hits, - const float* bg_col_d, - const float* opacity_d, - const size_t& num_balls, - const uint& mode, - cudaStream_t stream); - -} // namespace Renderer -} // namespace pulsar diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.get_screen_area.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.get_screen_area.device.h deleted file mode 100644 index 1a85a1bd20cfa0773e395163871ea5a7a8b39347..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.get_screen_area.device.h +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_GET_SCREEN_AREA_DEVICE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_GET_SCREEN_AREA_DEVICE_H_ - -#include "../global.h" -#include "./camera.device.h" -#include "./commands.h" -#include "./math.h" - -namespace pulsar { -namespace Renderer { - -/** - * Find the closest enclosing screen area rectangle in pixels that encloses a - * ball. - * - * The method returns the two x and the two y values of the boundaries. They - * are not ordered yet and you need to find min and max for the left/right and - * lower/upper boundary. - * - * The return values are floats and need to be rounded appropriately. - */ -INLINE DEVICE bool get_screen_area( - const float3& ball_center_cam, - const float3& ray_center_norm, - const float& vert_rad, - const CamInfo& cam, - const uint& idx, - /* Out variables. */ - float* x_1, - float* x_2, - float* y_1, - float* y_2) { - float cos_alpha = dot(cam.sensor_dir_z, ray_center_norm); - float2 o__c_, alpha, theta; - if (cos_alpha < EPS) { - PULSAR_LOG_DEV( - PULSAR_LOG_CALC_SIGNATURE, - "signature %d|ball not visible. cos_alpha: %.9f.\n", - idx, - cos_alpha); - // No intersection, ball won't be visible. - return false; - } - // Multiply the direction vector with the camera rotation matrix - // to have the optical axis being the canonical z vector (0, 0, 1). - // TODO: optimize. - const float3 ball_center_cam_rot = rotate( - ball_center_cam, - cam.pixel_dir_x / length(cam.pixel_dir_x), - cam.pixel_dir_y / length(cam.pixel_dir_y), - cam.sensor_dir_z); - PULSAR_LOG_DEV( - PULSAR_LOG_CALC_SIGNATURE, - "signature %d|ball_center_cam_rot: %f, %f, %f.\n", - idx, - ball_center_cam.x, - ball_center_cam.y, - ball_center_cam.z); - const float pixel_size_norm_fac = FRCP(2.f * cam.half_pixel_size); - const float optical_offset_x = - (static_cast(cam.aperture_width) - 1.f) * .5f; - const float optical_offset_y = - (static_cast(cam.aperture_height) - 1.f) * .5f; - if (cam.orthogonal_projection) { - *x_1 = - FMA(ball_center_cam_rot.x - vert_rad, - pixel_size_norm_fac, - optical_offset_x); - *x_2 = - FMA(ball_center_cam_rot.x + vert_rad, - pixel_size_norm_fac, - optical_offset_x); - *y_1 = - FMA(ball_center_cam_rot.y - vert_rad, - pixel_size_norm_fac, - optical_offset_y); - *y_2 = - FMA(ball_center_cam_rot.y + vert_rad, - pixel_size_norm_fac, - optical_offset_y); - return true; - } else { - o__c_.x = FMAX( - FSQRT( - ball_center_cam_rot.x * ball_center_cam_rot.x + - ball_center_cam_rot.z * ball_center_cam_rot.z), - FEPS); - o__c_.y = FMAX( - FSQRT( - ball_center_cam_rot.y * ball_center_cam_rot.y + - ball_center_cam_rot.z * ball_center_cam_rot.z), - FEPS); - PULSAR_LOG_DEV( - PULSAR_LOG_CALC_SIGNATURE, - "signature %d|o__c_: %f, %f.\n", - idx, - o__c_.x, - o__c_.y); - alpha.x = sign_dir(ball_center_cam_rot.x) * - acos(FMIN(FMAX(ball_center_cam_rot.z / o__c_.x, -1.f), 1.f)); - alpha.y = -sign_dir(ball_center_cam_rot.y) * - acos(FMIN(FMAX(ball_center_cam_rot.z / o__c_.y, -1.f), 1.f)); - theta.x = asin(FMIN(FMAX(vert_rad / o__c_.x, -1.f), 1.f)); - theta.y = asin(FMIN(FMAX(vert_rad / o__c_.y, -1.f), 1.f)); - PULSAR_LOG_DEV( - PULSAR_LOG_CALC_SIGNATURE, - "signature %d|alpha.x: %f, alpha.y: %f, theta.x: %f, theta.y: %f.\n", - idx, - alpha.x, - alpha.y, - theta.x, - theta.y); - *x_1 = tan(alpha.x - theta.x) * cam.focal_length; - *x_2 = tan(alpha.x + theta.x) * cam.focal_length; - *y_1 = tan(alpha.y - theta.y) * cam.focal_length; - *y_2 = tan(alpha.y + theta.y) * cam.focal_length; - PULSAR_LOG_DEV( - PULSAR_LOG_CALC_SIGNATURE, - "signature %d|in sensor plane: x_1: %f, x_2: %f, y_1: %f, y_2: %f.\n", - idx, - *x_1, - *x_2, - *y_1, - *y_2); - *x_1 = FMA(*x_1, pixel_size_norm_fac, optical_offset_x); - *x_2 = FMA(*x_2, pixel_size_norm_fac, optical_offset_x); - *y_1 = FMA(*y_1, -pixel_size_norm_fac, optical_offset_y); - *y_2 = FMA(*y_2, -pixel_size_norm_fac, optical_offset_y); - return true; - } -}; - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.h deleted file mode 100644 index d6755ee91887b8f6316563b03cee9c524a6f7315..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.h +++ /dev/null @@ -1,468 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_H_ - -#include - -#include "../global.h" -#include "./camera.h" - -namespace pulsar { -namespace Renderer { - -//! Remember to order struct members from larger size to smaller size -//! to avoid padding (for more info, see for example here: -//! http://www.catb.org/esr/structure-packing/). - -/** - * This is the information that's needed to do a fast screen point - * intersection with one of the balls. - * - * Aim to keep this below 8 bytes (256 bytes per cache-line / 32 threads in a - * warp = 8 bytes per thread). - */ -struct IntersectInfo { - ushort2 min; /** minimum x, y in pixel coordinates. */ - ushort2 max; /** maximum x, y in pixel coordinates. */ -}; -static_assert( - sizeof(IntersectInfo) == 8, - "The compiled size of `IntersectInfo` is wrong."); - -/** - * Reduction operation to find the limits of multiple IntersectInfo objects. - */ -struct IntersectInfoMinMax { - IHD IntersectInfo - operator()(const IntersectInfo& a, const IntersectInfo& b) const { - // Treat the special case of an invalid intersect info object or one for - // a ball out of bounds. - if (b.max.x == MAX_USHORT && b.min.x == MAX_USHORT && - b.max.y == MAX_USHORT && b.min.y == MAX_USHORT) { - return a; - } - if (a.max.x == MAX_USHORT && a.min.x == MAX_USHORT && - a.max.y == MAX_USHORT && a.min.y == MAX_USHORT) { - return b; - } - IntersectInfo result; - result.min.x = std::min(a.min.x, b.min.x); - result.min.y = std::min(a.min.y, b.min.y); - result.max.x = std::max(a.max.x, b.max.x); - result.max.y = std::max(a.max.y, b.max.y); - return result; - } -}; - -/** - * All information that's needed to draw a ball. - * - * It's necessary to keep this information in float (not half) format, - * because the loss in accuracy would be too high and lead to artifacts. - */ -struct DrawInfo { - float3 ray_center_norm; /** Ray to the ball center, normalized. */ - /** Ball color. - * - * This might be the full color in the case of n_channels <= 3. Otherwise, - * a pointer to the original 'color' data is stored in the following union. - */ - float first_color; - union { - float color[2]; - float* ptr; - } color_union; - float t_center; /** Distance from the camera to the ball center. */ - float radius; /** Ball radius. */ -}; -static_assert( - sizeof(DrawInfo) == 8 * 4, - "The compiled size of `DrawInfo` is wrong."); - -/** - * An object to collect all associated data with the renderer. - * - * The `_d` suffixed pointers point to memory 'on-device', potentially on the - * GPU. All other variables are expected to point to CPU memory. - */ -struct Renderer { - /** Dummy initializer to make sure all pointers are set to NULL to - * be safe for the device-specific 'construct' and 'destruct' methods. - */ - inline Renderer() { - max_num_balls = 0; - result_d = NULL; - min_depth_d = NULL; - min_depth_sorted_d = NULL; - ii_d = NULL; - ii_sorted_d = NULL; - ids_d = NULL; - ids_sorted_d = NULL; - workspace_d = NULL; - di_d = NULL; - di_sorted_d = NULL; - region_flags_d = NULL; - num_selected_d = NULL; - forw_info_d = NULL; - grad_pos_d = NULL; - grad_col_d = NULL; - grad_rad_d = NULL; - grad_cam_d = NULL; - grad_opy_d = NULL; - grad_cam_buf_d = NULL; - n_grad_contributions_d = NULL; - }; - /** The camera for this renderer. In world-coordinates. */ - CamInfo cam; - /** - * The maximum amount of balls the renderer can handle. Resources are - * pre-allocated to account for this size. Less than this amount of balls - * can be rendered, but not more. - */ - int max_num_balls; - /** The result buffer. */ - float* result_d; - /** Closest possible intersection depth per sphere w.r.t. the camera. */ - float* min_depth_d; - /** Closest possible intersection depth per sphere, ordered ascending. */ - float* min_depth_sorted_d; - /** The intersect infos per sphere. */ - IntersectInfo* ii_d; - /** The intersect infos per sphere, ordered by their closest possible - * intersection depth (asc.). */ - IntersectInfo* ii_sorted_d; - /** Original sphere IDs. */ - int* ids_d; - /** Original sphere IDs, ordered by their closest possible intersection depth - * (asc.). */ - int* ids_sorted_d; - /** Workspace for CUB routines. */ - char* workspace_d; - /** Workspace size for CUB routines. */ - size_t workspace_size; - /** The draw information structures for each sphere. */ - DrawInfo* di_d; - /** The draw information structures sorted by closest possible intersection - * depth (asc.). */ - DrawInfo* di_sorted_d; - /** Region association buffer. */ - char* region_flags_d; - /** Num spheres in the current region. */ - size_t* num_selected_d; - /** Pointer to information from the forward pass. */ - float* forw_info_d; - /** Struct containing information about the min max pixels that contain - * rendered information in the image. */ - IntersectInfo* min_max_pixels_d; - /** Gradients w.r.t. position. */ - float3* grad_pos_d; - /** Gradients w.r.t. color. */ - float* grad_col_d; - /** Gradients w.r.t. radius. */ - float* grad_rad_d; - /** Gradients w.r.t. camera parameters. */ - float* grad_cam_d; - /** Gradients w.r.t. opacity. */ - float* grad_opy_d; - /** Camera gradient information by sphere. - * - * Here, every sphere's contribution to the camera gradients is stored. It is - * aggregated and written to grad_cam_d in a separate step. This avoids write - * conflicts when processing the spheres. - */ - CamGradInfo* grad_cam_buf_d; - /** Total of all gradient contributions for this image. */ - int* n_grad_contributions_d; - /** The number of spheres to track for backpropagation. */ - int n_track; -}; - -inline bool operator==(const Renderer& a, const Renderer& b) { - return a.cam == b.cam && a.max_num_balls == b.max_num_balls; -} - -/** - * Construct a renderer. - */ -template -void construct( - Renderer* self, - const size_t& max_num_balls, - const int& width, - const int& height, - const bool& orthogonal_projection, - const bool& right_handed_system, - const float& background_normalization_depth, - const uint& n_channels, - const uint& n_track); - -/** - * Destruct the renderer and free the associated memory. - */ -template -void destruct(Renderer* self); - -/** - * Create a selection of points inside a rectangle. - * - * This write boolen values into `region_flags_d', which can - * for example be used by a CUB function to extract the selection. - */ -template -GLOBAL void create_selector( - IntersectInfo const* const RESTRICT ii_sorted_d, - const uint num_balls, - const int min_x, - const int max_x, - const int min_y, - const int max_y, - /* Out variables. */ - char* RESTRICT region_flags_d); - -/** - * Calculate a signature for a ball. - * - * Populate the `ids_d`, `ii_d`, `di_d` and `min_depth_d` fields of the - * renderer. For spheres not visible in the image, sets the id field to -1, - * min_depth_d to MAX_FLOAT and the ii_d.min.x fields to MAX_USHORT. - */ -template -GLOBAL void calc_signature( - Renderer renderer, - float3 const* const RESTRICT vert_poss, - float const* const RESTRICT vert_cols, - float const* const RESTRICT vert_rads, - const uint num_balls); - -/** - * The block size for rendering. - * - * This should be as large as possible, but is limited due to the amount - * of variables we use and the memory required per thread. - */ -#define RENDER_BLOCK_SIZE 16 -/** - * The buffer size of spheres to be loaded and analyzed for relevance. - * - * This must be at least RENDER_BLOCK_SIZE * RENDER_BLOCK_SIZE so that - * for every iteration through the loading loop every thread could add a - * 'hit' to the buffer. - */ -#define RENDER_BUFFER_SIZE RENDER_BLOCK_SIZE* RENDER_BLOCK_SIZE * 2 -/** - * The threshold after which the spheres that are in the render buffer - * are rendered and the buffer is flushed. - * - * Must be less than RENDER_BUFFER_SIZE. - */ -#define RENDER_BUFFER_LOAD_THRESH 16 * 4 - -/** - * The render function. - * - * Assumptions: - * * the focal length is appropriately chosen, - * * ray_dir_norm.z is > EPS. - * * to be completed... - */ -template -GLOBAL void render( - size_t const* const RESTRICT - num_balls, /** Number of balls relevant for this pass. */ - IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */ - DrawInfo const* const RESTRICT di_d, /** Draw information. */ - float const* const RESTRICT min_depth_d, /** Minimum depth per sphere. */ - int const* const RESTRICT id_d, /** IDs. */ - float const* const RESTRICT op_d, /** Opacity. */ - const CamInfo cam_norm, /** Camera normalized with all vectors to be in the - * camera coordinate system. - */ - const float gamma, /** Transparency parameter. **/ - const float percent_allowed_difference, /** Maximum allowed - error in color. */ - const uint max_n_hits, - const float* bg_col_d, - const uint mode, - const int x_min, - const int y_min, - const int x_step, - const int y_step, - // Out variables. - float* const RESTRICT result_d, /** The result image. */ - float* const RESTRICT forw_info_d, /** Additional information needed for the - grad computation. */ - // Infrastructure. - const int n_track /** The number of spheres to track. */ -); - -/** - * Makes sure to paint background information. - * - * This is required as a separate post-processing step because certain - * pixels may not be processed during the forward pass if there is no - * possibility for a sphere to be present at their location. - */ -template -GLOBAL void fill_bg( - Renderer renderer, - const CamInfo norm, - float const* const bg_col_d, - const float gamma, - const uint mode); - -/** - * Rendering forward pass. - * - * Takes a renderer and sphere data as inputs and creates a rendering. - */ -template -void forward( - Renderer* self, - const float* vert_pos, - const float* vert_col, - const float* vert_rad, - const CamInfo& cam, - const float& gamma, - float percent_allowed_difference, - const uint& max_n_hits, - const float* bg_col_d, - const float* opacity_d, - const size_t& num_balls, - const uint& mode, - cudaStream_t stream); - -/** - * Normalize the camera gradients by the number of spheres that contributed. - */ -template -GLOBAL void norm_cam_gradients(Renderer renderer); - -/** - * Normalize the sphere gradients. - * - * We're assuming that the samples originate from a Monte Carlo - * sampling process and normalize by number and sphere area. - */ -template -GLOBAL void norm_sphere_gradients(Renderer renderer, const int num_balls); - -#define GRAD_BLOCK_SIZE 16 -/** Calculate the gradients. - */ -template -GLOBAL void calc_gradients( - const CamInfo cam, /** Camera in world coordinates. */ - float const* const RESTRICT grad_im, /** The gradient image. */ - const float - gamma, /** The transparency parameter used in the forward pass. */ - float3 const* const RESTRICT vert_poss, /** Vertex position vector. */ - float const* const RESTRICT vert_cols, /** Vertex color vector. */ - float const* const RESTRICT vert_rads, /** Vertex radius vector. */ - float const* const RESTRICT opacity, /** Vertex opacity. */ - const uint num_balls, /** Number of balls. */ - float const* const RESTRICT result_d, /** Result image. */ - float const* const RESTRICT forw_info_d, /** Forward pass info. */ - DrawInfo const* const RESTRICT di_d, /** Draw information. */ - IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */ - // Mode switches. - const bool calc_grad_pos, - const bool calc_grad_col, - const bool calc_grad_rad, - const bool calc_grad_cam, - const bool calc_grad_opy, - // Out variables. - float* const RESTRICT grad_rad_d, /** Radius gradients. */ - float* const RESTRICT grad_col_d, /** Color gradients. */ - float3* const RESTRICT grad_pos_d, /** Position gradients. */ - CamGradInfo* const RESTRICT grad_cam_buf_d, /** Camera gradient buffer. */ - float* const RESTRICT grad_opy_d, /** Opacity gradient buffer. */ - int* const RESTRICT - grad_contributed_d, /** Gradient contribution counter. */ - // Infrastructure. - const int n_track, - const uint offs_x = 0, - const uint offs_y = 0); - -/** - * A full backward pass. - * - * Creates the gradients for the given gradient_image and the spheres. - */ -template -void backward( - Renderer* self, - const float* grad_im, - const float* image, - const float* forw_info, - const float* vert_pos, - const float* vert_col, - const float* vert_rad, - const CamInfo& cam, - const float& gamma, - float percent_allowed_difference, - const uint& max_n_hits, - const float* vert_opy, - const size_t& num_balls, - const uint& mode, - const bool& dif_pos, - const bool& dif_col, - const bool& dif_rad, - const bool& dif_cam, - const bool& dif_opy, - cudaStream_t stream); - -/** - * A debug backward pass. - * - * This is a function to debug the gradient calculation. It calculates the - * gradients for exactly one pixel (set with pos_x and pos_y) without averaging. - * - * *Uses only the first sphere for camera gradient calculation!* - */ -template -void backward_dbg( - Renderer* self, - const float* grad_im, - const float* image, - const float* forw_info, - const float* vert_pos, - const float* vert_col, - const float* vert_rad, - const CamInfo& cam, - const float& gamma, - float percent_allowed_difference, - const uint& max_n_hits, - const float* vert_opy, - const size_t& num_balls, - const uint& mode, - const bool& dif_pos, - const bool& dif_col, - const bool& dif_rad, - const bool& dif_cam, - const bool& dif_opy, - const uint& pos_x, - const uint& pos_y, - cudaStream_t stream); - -template -void nn( - const float* ref_ptr, - const float* tar_ptr, - const uint& k, - const uint& d, - const uint& n, - float* dist_ptr, - int32_t* inds_ptr, - cudaStream_t stream); - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.device.h deleted file mode 100644 index e1dfd55d0b1363c1d8d38709460e00a75efeef5a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.device.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_NORM_CAM_GRADIENTS_DEVICE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_NORM_CAM_GRADIENTS_DEVICE_H_ - -#include "../global.h" -#include "./camera.device.h" -#include "./commands.h" -#include "./math.h" -#include "./renderer.h" - -namespace pulsar { -namespace Renderer { - -/** - * Normalize the camera gradients by the number of spheres that contributed. - */ -template -GLOBAL void norm_cam_gradients(Renderer renderer) { - GET_PARALLEL_IDX_1D(idx, 1); - CamGradInfo* cgi = reinterpret_cast(renderer.grad_cam_d); - *cgi = *cgi * FRCP(static_cast(*renderer.n_grad_contributions_d)); - END_PARALLEL_NORET(); -}; - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.instantiate.h deleted file mode 100644 index 98e05a67e470237a9328d7a441e7b700a7ce675d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_cam_gradients.instantiate.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "./renderer.norm_cam_gradients.device.h" - -namespace pulsar { -namespace Renderer { - -template GLOBAL void norm_cam_gradients(Renderer renderer); - -} // namespace Renderer -} // namespace pulsar diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h deleted file mode 100644 index 37e0eb00a5179911216a5d2827feb83ade487755..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_NORM_SPHERE_GRADIENTS_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_NORM_SPHERE_GRADIENTS_H_ - -#include "../global.h" -#include "./commands.h" -#include "./math.h" -#include "./renderer.h" - -namespace pulsar { -namespace Renderer { - -/** - * Normalize the sphere gradients. - * - * We're assuming that the samples originate from a Monte Carlo - * sampling process and normalize by number and sphere area. - */ -template -GLOBAL void norm_sphere_gradients(Renderer renderer, const int num_balls) { - GET_PARALLEL_IDX_1D(idx, num_balls); - float norm_fac = 0.f; - IntersectInfo ii; - if (renderer.ids_sorted_d[idx] > 0) { - ii = renderer.ii_d[idx]; - // Normalize the sphere gradients as averages. - // This avoids the case that there are small spheres in a scene with still - // un-converged colors whereas the big spheres already converged, just - // because their integrated learning rate is 'higher'. - norm_fac = FRCP(static_cast(renderer.ids_sorted_d[idx])); - } - PULSAR_LOG_DEV_NODE( - PULSAR_LOG_NORMALIZE, - "ids_sorted_d[idx]: %d, norm_fac: %.9f.\n", - renderer.ids_sorted_d[idx], - norm_fac); - renderer.grad_rad_d[idx] *= norm_fac; - for (uint c_idx = 0; c_idx < renderer.cam.n_channels; ++c_idx) { - renderer.grad_col_d[idx * renderer.cam.n_channels + c_idx] *= norm_fac; - } - renderer.grad_pos_d[idx] *= norm_fac; - renderer.grad_opy_d[idx] *= norm_fac; - - if (renderer.ids_sorted_d[idx] > 0) { - // For the camera, we need to be more correct and have the gradients - // be proportional to the area they cover in the image. - // This leads to a formulation very much like in monte carlo integration: - norm_fac = FRCP(static_cast(renderer.ids_sorted_d[idx])) * - (static_cast(ii.max.x) - static_cast(ii.min.x)) * - (static_cast(ii.max.y) - static_cast(ii.min.y)) * - 1e-3f; // for better numerics. - } - renderer.grad_cam_buf_d[idx].cam_pos *= norm_fac; - renderer.grad_cam_buf_d[idx].pixel_0_0_center *= norm_fac; - renderer.grad_cam_buf_d[idx].pixel_dir_x *= norm_fac; - renderer.grad_cam_buf_d[idx].pixel_dir_y *= norm_fac; - // The sphere only contributes to the camera gradients if it is - // large enough in screen space. - if (renderer.ids_sorted_d[idx] > 0 && ii.max.x >= ii.min.x + 3 && - ii.max.y >= ii.min.y + 3) - renderer.ids_sorted_d[idx] = 1; - END_PARALLEL_NORET(); -}; - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.instantiate.h deleted file mode 100644 index bedcf81611cb20b2b404776f477cb3fe174608d2..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.instantiate.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "./renderer.norm_sphere_gradients.device.h" - -namespace pulsar { -namespace Renderer { - -template GLOBAL void norm_sphere_gradients( - Renderer renderer, - const int num_balls); - -} // namespace Renderer -} // namespace pulsar diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.device.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.device.h deleted file mode 100644 index 66a62c31be9ad5e6106bc24ce23dd60a901329f1..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.device.h +++ /dev/null @@ -1,416 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_RENDER_DEVICE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_RENDER_DEVICE_H_ - -#include "../global.h" -#include "./camera.device.h" -#include "./commands.h" -#include "./math.h" -#include "./renderer.h" - -#include "./closest_sphere_tracker.device.h" -#include "./renderer.draw.device.h" - -namespace pulsar { -namespace Renderer { - -template -GLOBAL void render( - size_t const* const RESTRICT - num_balls, /** Number of balls relevant for this pass. */ - IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */ - DrawInfo const* const RESTRICT di_d, /** Draw information. */ - float const* const RESTRICT min_depth_d, /** Minimum depth per sphere. */ - int const* const RESTRICT ids_d, /** IDs. */ - float const* const RESTRICT op_d, /** Opacity. */ - const CamInfo cam_norm, /** Camera normalized with all vectors to be in the - * camera coordinate system. - */ - const float gamma, /** Transparency parameter. **/ - const float percent_allowed_difference, /** Maximum allowed - error in color. */ - const uint max_n_hits, - const float* bg_col, - const uint mode, - const int x_min, - const int y_min, - const int x_step, - const int y_step, - // Out variables. - float* const RESTRICT result_d, /** The result image. */ - float* const RESTRICT forw_info_d, /** Additional information needed for the - grad computation. */ - const int n_track /** The number of spheres to track for backprop. */ -) { - // Do not early stop threads in this block here. They can all contribute to - // the scanning process, we just have to prevent from writing their result. - GET_PARALLEL_IDS_2D(offs_x, offs_y, x_step, y_step); - // Variable declarations and const initializations. - const float ln_pad_over_1minuspad = - FLN(percent_allowed_difference / (1.f - percent_allowed_difference)); - /** A facility to track the closest spheres to the camera - (in preparation for gradient calculation). */ - ClosestSphereTracker tracker(n_track); - const uint coord_x = x_min + offs_x; /** Ray coordinate x. */ - const uint coord_y = y_min + offs_y; /** Ray coordinate y. */ - float3 ray_dir_norm; /** Ray cast through the pixel, normalized. */ - float2 projected_ray; /** Ray intersection with the sensor. */ - if (cam_norm.orthogonal_projection) { - ray_dir_norm = cam_norm.sensor_dir_z; - projected_ray.x = static_cast(coord_x); - projected_ray.y = static_cast(coord_y); - } else { - ray_dir_norm = normalize( - cam_norm.pixel_0_0_center + coord_x * cam_norm.pixel_dir_x + - coord_y * cam_norm.pixel_dir_y); - // This is a reasonable assumption for normal focal lengths and image sizes. - PASSERT(FABS(ray_dir_norm.z) > FEPS); - projected_ray.x = ray_dir_norm.x / ray_dir_norm.z * cam_norm.focal_length; - projected_ray.y = ray_dir_norm.y / ray_dir_norm.z * cam_norm.focal_length; - } - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_RENDER_PIX, - "render|ray_dir_norm: %.9f, %.9f, %.9f. projected_ray: %.9f, %.9f.\n", - ray_dir_norm.x, - ray_dir_norm.y, - ray_dir_norm.z, - projected_ray.x, - projected_ray.y); - // Set up shared infrastructure. - /** This entire thread block. */ - cg::thread_block thread_block = cg::this_thread_block(); - /** The collaborators within a warp. */ - cg::coalesced_group thread_warp = cg::coalesced_threads(); - /** The number of loaded balls in the load buffer di_l. */ - SHARED uint n_loaded; - /** Draw information buffer. */ - SHARED DrawInfo di_l[RENDER_BUFFER_SIZE]; - /** The original sphere id of each loaded sphere. */ - SHARED uint sphere_id_l[RENDER_BUFFER_SIZE]; - /** The number of pixels in this block that are done. */ - SHARED int n_pixels_done; - /** Whether loading of balls is completed. */ - SHARED bool loading_done; - /** The number of balls loaded overall (just for statistics). */ - SHARED int n_balls_loaded; - /** The area this thread block covers. */ - SHARED IntersectInfo block_area; - if (thread_block.thread_rank() == 0) { - // Initialize the shared variables. - n_loaded = 0; - block_area.min.x = static_cast(coord_x); - block_area.max.x = static_cast(IMIN( - coord_x + blockDim.x, cam_norm.film_border_left + cam_norm.film_width)); - block_area.min.y = static_cast(coord_y); - block_area.max.y = static_cast(IMIN( - coord_y + blockDim.y, cam_norm.film_border_top + cam_norm.film_height)); - n_pixels_done = 0; - loading_done = false; - n_balls_loaded = 0; - } - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_RENDER_PIX, - "render|block_area.min: %d, %d. block_area.max: %d, %d.\n", - block_area.min.x, - block_area.min.y, - block_area.max.x, - block_area.max.y); - // Initialization of the pixel with the background color. - /** - * The result of this very pixel. - * the offset calculation might overflow if this thread is out of - * bounds of the film. However, in this case result is not - * accessed, so this is fine. - */ - float* result = result_d + - (coord_y - cam_norm.film_border_top) * cam_norm.film_width * - cam_norm.n_channels + - (coord_x - cam_norm.film_border_left) * cam_norm.n_channels; - if (coord_x >= cam_norm.film_border_left && - coord_x < cam_norm.film_border_left + cam_norm.film_width && - coord_y >= cam_norm.film_border_top && - coord_y < cam_norm.film_border_top + cam_norm.film_height) { - // Initialize the result. - if (mode == 0u) { - for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id) - result[c_id] = bg_col[c_id]; - } else { - result[0] = 0.f; - } - } - /** Normalization denominator. */ - float sm_d = 1.f; - /** Normalization tracker for stable softmax. The maximum observed value. */ - float sm_m = cam_norm.background_normalization_depth / gamma; - /** Whether this pixel has had all information needed for drawing. */ - bool done = - (coord_x < cam_norm.film_border_left || - coord_x >= cam_norm.film_border_left + cam_norm.film_width || - coord_y < cam_norm.film_border_top || - coord_y >= cam_norm.film_border_top + cam_norm.film_height); - /** The depth threshold for a new point to have at least - * `percent_allowed_difference` influence on the result color. All points that - * are further away than this are ignored. - */ - float depth_threshold = done ? -1.f : MAX_FLOAT; - /** The closest intersection possible of a ball that was hit by this pixel - * ray. */ - float max_closest_possible_intersection_hit = -1.f; - bool hit; /** Whether a sphere was hit. */ - float intersection_depth; /** The intersection_depth for a sphere at this - pixel. */ - float closest_possible_intersection; /** The closest possible intersection - for this sphere. */ - float max_closest_possible_intersection; - // Sync up threads so that everyone is similarly initialized. - thread_block.sync(); - //! Coalesced loading and intersection analysis of balls. - for (uint ball_idx = thread_block.thread_rank(); - ball_idx < iDivCeil(static_cast(*num_balls), thread_block.size()) * - thread_block.size() && - !loading_done && n_pixels_done < thread_block.size(); - ball_idx += thread_block.size()) { - if (ball_idx < static_cast(*num_balls)) { // Account for overflow. - const IntersectInfo& ii = ii_d[ball_idx]; - hit = (ii.min.x <= block_area.max.x) && (ii.max.x > block_area.min.x) && - (ii.min.y <= block_area.max.y) && (ii.max.y > block_area.min.y); - if (hit) { - uint write_idx = ATOMICADD_B(&n_loaded, 1u); - di_l[write_idx] = di_d[ball_idx]; - sphere_id_l[write_idx] = static_cast(ids_d[ball_idx]); - PULSAR_LOG_DEV_PIXB( - PULSAR_LOG_RENDER_PIX, - "render|found intersection with sphere %u.\n", - sphere_id_l[write_idx]); - } - if (ii.min.x == MAX_USHORT) - // This is an invalid sphere (out of image). These spheres have - // maximum depth. Since we ordered the spheres by earliest possible - // intersection depth we re certain that there will no other sphere - // that is relevant after this one. - loading_done = true; - } - // Reset n_pixels_done. - n_pixels_done = 0; - thread_block.sync(); // Make sure n_loaded is updated. - if (n_loaded > RENDER_BUFFER_LOAD_THRESH) { - // The load buffer is full enough. Draw. - if (thread_block.thread_rank() == 0) - n_balls_loaded += n_loaded; - max_closest_possible_intersection = 0.f; - // This excludes threads outside of the image boundary. Also, it reduces - // block artifacts. - if (!done) { - for (uint draw_idx = 0; draw_idx < n_loaded; ++draw_idx) { - intersection_depth = 0.f; - if (cam_norm.orthogonal_projection) { - // The closest possible intersection is the distance to the camera - // plane. - closest_possible_intersection = min_depth_d[sphere_id_l[draw_idx]]; - } else { - closest_possible_intersection = - di_l[draw_idx].t_center - di_l[draw_idx].radius; - } - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_RENDER_PIX, - "render|drawing sphere %u (depth: %f, " - "closest possible intersection: %f).\n", - sphere_id_l[draw_idx], - di_l[draw_idx].t_center, - closest_possible_intersection); - hit = draw( - di_l[draw_idx], // Sphere to draw. - op_d == NULL ? 1.f : op_d[sphere_id_l[draw_idx]], // Opacity. - cam_norm, // Cam. - gamma, // Gamma. - ray_dir_norm, // Ray direction. - projected_ray, // Ray intersection with the image. - // Mode switches. - true, // Draw. - false, - false, - false, - false, - false, // No gradients. - // Position info. - coord_x, - coord_y, - sphere_id_l[draw_idx], - // Optional in variables. - NULL, // intersect information. - NULL, // ray_dir. - NULL, // norm_ray_dir. - NULL, // grad_pix. - &ln_pad_over_1minuspad, - // in/out variables - &sm_d, - &sm_m, - result, - // Optional out. - &depth_threshold, - &intersection_depth, - NULL, - NULL, - NULL, - NULL, - NULL // gradients. - ); - if (hit) { - max_closest_possible_intersection_hit = FMAX( - max_closest_possible_intersection_hit, - closest_possible_intersection); - tracker.track( - sphere_id_l[draw_idx], intersection_depth, coord_x, coord_y); - } - max_closest_possible_intersection = FMAX( - max_closest_possible_intersection, closest_possible_intersection); - } - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_RENDER_PIX, - "render|max_closest_possible_intersection: %f, " - "depth_threshold: %f.\n", - max_closest_possible_intersection, - depth_threshold); - } - done = done || - (percent_allowed_difference > 0.f && - max_closest_possible_intersection > depth_threshold) || - tracker.get_n_hits() >= max_n_hits; - uint warp_done = thread_warp.ballot(done); - if (thread_warp.thread_rank() == 0) - ATOMICADD_B(&n_pixels_done, POPC(warp_done)); - // This sync is necessary to keep n_loaded until all threads are done with - // painting. - thread_block.sync(); - n_loaded = 0; - } - thread_block.sync(); - } - if (thread_block.thread_rank() == 0) - n_balls_loaded += n_loaded; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_RENDER_PIX, - "render|loaded %d balls in total.\n", - n_balls_loaded); - if (!done) { - for (uint draw_idx = 0; draw_idx < n_loaded; ++draw_idx) { - intersection_depth = 0.f; - if (cam_norm.orthogonal_projection) { - // The closest possible intersection is the distance to the camera - // plane. - closest_possible_intersection = min_depth_d[sphere_id_l[draw_idx]]; - } else { - closest_possible_intersection = - di_l[draw_idx].t_center - di_l[draw_idx].radius; - } - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_RENDER_PIX, - "render|drawing sphere %u (depth: %f, " - "closest possible intersection: %f).\n", - sphere_id_l[draw_idx], - di_l[draw_idx].t_center, - closest_possible_intersection); - hit = draw( - di_l[draw_idx], // Sphere to draw. - op_d == NULL ? 1.f : op_d[sphere_id_l[draw_idx]], // Opacity. - cam_norm, // Cam. - gamma, // Gamma. - ray_dir_norm, // Ray direction. - projected_ray, // Ray intersection with the image. - // Mode switches. - true, // Draw. - false, - false, - false, - false, - false, // No gradients. - // Logging info. - coord_x, - coord_y, - sphere_id_l[draw_idx], - // Optional in variables. - NULL, // intersect information. - NULL, // ray_dir. - NULL, // norm_ray_dir. - NULL, // grad_pix. - &ln_pad_over_1minuspad, - // in/out variables - &sm_d, - &sm_m, - result, - // Optional out. - &depth_threshold, - &intersection_depth, - NULL, - NULL, - NULL, - NULL, - NULL // gradients. - ); - if (hit) { - max_closest_possible_intersection_hit = FMAX( - max_closest_possible_intersection_hit, - closest_possible_intersection); - tracker.track( - sphere_id_l[draw_idx], intersection_depth, coord_x, coord_y); - } - } - } - if (coord_x < cam_norm.film_border_left || - coord_y < cam_norm.film_border_top || - coord_x >= cam_norm.film_border_left + cam_norm.film_width || - coord_y >= cam_norm.film_border_top + cam_norm.film_height) { - RETURN_PARALLEL(); - } - if (mode == 1u) { - // The subtractions, for example coord_y - cam_norm.film_border_left, are - // safe even though both components are uints. We checked their relation - // just above. - result_d - [(coord_y - cam_norm.film_border_top) * cam_norm.film_width * - cam_norm.n_channels + - (coord_x - cam_norm.film_border_left) * cam_norm.n_channels] = - static_cast(tracker.get_n_hits()); - } else { - float sm_d_normfac = FRCP(FMAX(sm_d, FEPS)); - for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id) - result[c_id] *= sm_d_normfac; - int write_loc = (coord_y - cam_norm.film_border_top) * cam_norm.film_width * - (3 + 2 * n_track) + - (coord_x - cam_norm.film_border_left) * (3 + 2 * n_track); - forw_info_d[write_loc] = sm_m; - forw_info_d[write_loc + 1] = sm_d; - forw_info_d[write_loc + 2] = max_closest_possible_intersection_hit; - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_RENDER_PIX, - "render|writing the %d most important ball infos.\n", - IMIN(n_track, tracker.get_n_hits())); - for (int i = 0; i < n_track; ++i) { - int sphere_id = tracker.get_closest_sphere_id(i); - IASF(sphere_id, forw_info_d[write_loc + 3 + i * 2]); - forw_info_d[write_loc + 3 + i * 2 + 1] = - tracker.get_closest_sphere_depth(i) == MAX_FLOAT - ? -1.f - : tracker.get_closest_sphere_depth(i); - PULSAR_LOG_DEV_PIX( - PULSAR_LOG_RENDER_PIX, - "render|writing %d most important: id: %d, normalized depth: %f.\n", - i, - tracker.get_closest_sphere_id(i), - tracker.get_closest_sphere_depth(i)); - } - } - END_PARALLEL_2D(); -} - -} // namespace Renderer -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.instantiate.h b/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.instantiate.h deleted file mode 100644 index 9c1f326e63b8b4860137d9f0d0f440896adb2a88..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/include/renderer.render.instantiate.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_INCLUDE_RENDERER_RENDER_INSTANTIATE_H_ -#define PULSAR_NATIVE_INCLUDE_RENDERER_RENDER_INSTANTIATE_H_ - -#include "./renderer.render.device.h" - -namespace pulsar { -namespace Renderer { -template GLOBAL void render( - size_t const* const RESTRICT - num_balls, /** Number of balls relevant for this pass. */ - IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */ - DrawInfo const* const RESTRICT di_d, /** Draw information. */ - float const* const RESTRICT min_depth_d, /** Minimum depth per sphere. */ - int const* const RESTRICT id_d, /** IDs. */ - float const* const RESTRICT op_d, /** Opacity. */ - const CamInfo cam_norm, /** Camera normalized with all vectors to be in the - * camera coordinate system. - */ - const float gamma, /** Transparency parameter. **/ - const float percent_allowed_difference, /** Maximum allowed - error in color. */ - const uint max_n_hits, - const float* bg_col_d, - const uint mode, - const int x_min, - const int y_min, - const int x_step, - const int y_step, - // Out variables. - float* const RESTRICT result_d, /** The result image. */ - float* const RESTRICT forw_info_d, /** Additional information needed for the - grad computation. */ - const int n_track /** The number of spheres to track for backprop. */ -); -} -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/logging.h b/pytorch3d/pytorch3d/csrc/pulsar/logging.h deleted file mode 100644 index 63d472257671287156ccf77531c6897beff1fcd2..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/logging.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_LOGGING_H_ -#define PULSAR_LOGGING_H_ - -// #define PULSAR_LOGGING_ENABLED -/** - * Enable detailed per-operation timings. - * - * This timing scheme is not appropriate to measure batched calculations. - * Use `PULSAR_TIMINGS_BATCHED_ENABLED` for that. - */ -// #define PULSAR_TIMINGS_ENABLED -/** - * Time batched operations. - */ -// #define PULSAR_TIMINGS_BATCHED_ENABLED -#if defined(PULSAR_TIMINGS_BATCHED_ENABLED) && defined(PULSAR_TIMINGS_ENABLED) -#pragma message("Pulsar|batched and unbatched timings enabled. This will not") -#pragma message("Pulsar|create meaningful results.") -#endif - -#ifdef PULSAR_LOGGING_ENABLED - -// Control logging. -// 0: INFO, 1: WARNING, 2: ERROR, 3: FATAL (Abort after logging). -#define CAFFE2_LOG_THRESHOLD 0 -#define PULSAR_LOG_INIT false -#define PULSAR_LOG_FORWARD false -#define PULSAR_LOG_CALC_SIGNATURE false -#define PULSAR_LOG_RENDER false -#define PULSAR_LOG_RENDER_PIX false -#define PULSAR_LOG_RENDER_PIX_X 428 -#define PULSAR_LOG_RENDER_PIX_Y 669 -#define PULSAR_LOG_RENDER_PIX_ALL false -#define PULSAR_LOG_TRACKER_PIX false -#define PULSAR_LOG_TRACKER_PIX_X 428 -#define PULSAR_LOG_TRACKER_PIX_Y 669 -#define PULSAR_LOG_TRACKER_PIX_ALL false -#define PULSAR_LOG_DRAW_PIX false -#define PULSAR_LOG_DRAW_PIX_X 428 -#define PULSAR_LOG_DRAW_PIX_Y 669 -#define PULSAR_LOG_DRAW_PIX_ALL false -#define PULSAR_LOG_BACKWARD false -#define PULSAR_LOG_GRAD false -#define PULSAR_LOG_GRAD_X 509 -#define PULSAR_LOG_GRAD_Y 489 -#define PULSAR_LOG_GRAD_ALL false -#define PULSAR_LOG_NORMALIZE false -#define PULSAR_LOG_NORMALIZE_X 0 -#define PULSAR_LOG_NORMALIZE_ALL false - -#define PULSAR_LOG_DEV(ID, ...) \ - if ((ID)) { \ - printf(__VA_ARGS__); \ - } -#define PULSAR_LOG_DEV_APIX(ID, MSG, ...) \ - if ((ID) && (film_coord_x == (ID##_X) && film_coord_y == (ID##_Y)) || \ - ID##_ALL) { \ - printf( \ - "%u %u (ap %u %u)|" MSG, \ - film_coord_x, \ - film_coord_y, \ - ap_coord_x, \ - ap_coord_y, \ - __VA_ARGS__); \ - } -#define PULSAR_LOG_DEV_PIX(ID, MSG, ...) \ - if ((ID) && (coord_x == (ID##_X) && coord_y == (ID##_Y)) || ID##_ALL) { \ - printf("%u %u|" MSG, coord_x, coord_y, __VA_ARGS__); \ - } -#ifdef __CUDACC__ -#define PULSAR_LOG_DEV_PIXB(ID, MSG, ...) \ - if ((ID) && static_cast(block_area.min.x) <= (ID##_X) && \ - static_cast(block_area.max.x) > (ID##_X) && \ - static_cast(block_area.min.y) <= (ID##_Y) && \ - static_cast(block_area.max.y) > (ID##_Y)) { \ - printf("%u %u|" MSG, coord_x, coord_y, __VA_ARGS__); \ - } -#else -#define PULSAR_LOG_DEV_PIXB(ID, MSG, ...) \ - if ((ID) && coord_x == (ID##_X) && coord_y == (ID##_Y)) { \ - printf("%u %u|" MSG, coord_x, coord_y, __VA_ARGS__); \ - } -#endif -#define PULSAR_LOG_DEV_NODE(ID, MSG, ...) \ - if ((ID) && idx == (ID##_X) || (ID##_ALL)) { \ - printf("%u|" MSG, idx, __VA_ARGS__); \ - } - -#else - -#define CAFFE2_LOG_THRESHOLD 2 - -#define PULSAR_LOG_RENDER false -#define PULSAR_LOG_INIT false -#define PULSAR_LOG_FORWARD false -#define PULSAR_LOG_BACKWARD false -#define PULSAR_LOG_TRACKER_PIX false - -#define PULSAR_LOG_DEV(...) -#define PULSAR_LOG_DEV_APIX(...) -#define PULSAR_LOG_DEV_PIX(...) -#define PULSAR_LOG_DEV_PIXB(...) -#define PULSAR_LOG_DEV_NODE(...) - -#endif - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.cpp b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.cpp deleted file mode 100644 index c3794e7edf90f4af50632ea91bc131bd87fd751f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "./camera.h" -#include "../include/math.h" - -namespace pulsar { -namespace pytorch { - -CamInfo cam_info_from_params( - const torch::Tensor& cam_pos, - const torch::Tensor& pixel_0_0_center, - const torch::Tensor& pixel_vec_x, - const torch::Tensor& pixel_vec_y, - const torch::Tensor& principal_point_offset, - const float& focal_length, - const uint& width, - const uint& height, - const float& min_dist, - const float& max_dist, - const bool& right_handed) { - CamInfo res; - fill_cam_vecs( - cam_pos.detach().cpu(), - pixel_0_0_center.detach().cpu(), - pixel_vec_x.detach().cpu(), - pixel_vec_y.detach().cpu(), - principal_point_offset.detach().cpu(), - right_handed, - &res); - res.half_pixel_size = 0.5f * length(res.pixel_dir_x); - if (length(res.pixel_dir_y) * 0.5f - res.half_pixel_size > EPS) { - throw std::runtime_error("Pixel sizes must agree in x and y direction!"); - } - res.focal_length = focal_length; - res.aperture_width = - width + 2u * static_cast(abs(res.principal_point_offset_x)); - res.aperture_height = - height + 2u * static_cast(abs(res.principal_point_offset_y)); - res.pixel_0_0_center -= - res.pixel_dir_x * static_cast(abs(res.principal_point_offset_x)); - res.pixel_0_0_center -= - res.pixel_dir_y * static_cast(abs(res.principal_point_offset_y)); - res.film_width = width; - res.film_height = height; - res.film_border_left = - static_cast(std::max(0, 2 * res.principal_point_offset_x)); - res.film_border_top = - static_cast(std::max(0, 2 * res.principal_point_offset_y)); - LOG_IF(INFO, PULSAR_LOG_INIT) - << "Aperture width, height: " << res.aperture_width << ", " - << res.aperture_height; - LOG_IF(INFO, PULSAR_LOG_INIT) - << "Film width, height: " << res.film_width << ", " << res.film_height; - LOG_IF(INFO, PULSAR_LOG_INIT) - << "Film border left, top: " << res.film_border_left << ", " - << res.film_border_top; - res.min_dist = min_dist; - res.max_dist = max_dist; - res.norm_fac = 1.f / (max_dist - min_dist); - return res; -}; - -} // namespace pytorch -} // namespace pulsar diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.h b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.h deleted file mode 100644 index 9ecd95353ad76efd2760a4a634493917fda7b468..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/camera.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_CAMERA_H_ -#define PULSAR_NATIVE_CAMERA_H_ - -#include -#include "../global.h" - -#include "../include/camera.h" - -namespace pulsar { -namespace pytorch { - -inline void fill_cam_vecs( - const torch::Tensor& pos_vec, - const torch::Tensor& pixel_0_0_center, - const torch::Tensor& pixel_dir_x, - const torch::Tensor& pixel_dir_y, - const torch::Tensor& principal_point_offset, - const bool& right_handed, - CamInfo* res) { - res->eye.x = pos_vec.data_ptr()[0]; - res->eye.y = pos_vec.data_ptr()[1]; - res->eye.z = pos_vec.data_ptr()[2]; - res->pixel_0_0_center.x = pixel_0_0_center.data_ptr()[0]; - res->pixel_0_0_center.y = pixel_0_0_center.data_ptr()[1]; - res->pixel_0_0_center.z = pixel_0_0_center.data_ptr()[2]; - res->pixel_dir_x.x = pixel_dir_x.data_ptr()[0]; - res->pixel_dir_x.y = pixel_dir_x.data_ptr()[1]; - res->pixel_dir_x.z = pixel_dir_x.data_ptr()[2]; - res->pixel_dir_y.x = pixel_dir_y.data_ptr()[0]; - res->pixel_dir_y.y = pixel_dir_y.data_ptr()[1]; - res->pixel_dir_y.z = pixel_dir_y.data_ptr()[2]; - auto sensor_dir_z = pixel_dir_y.cross(pixel_dir_x, -1); - sensor_dir_z /= sensor_dir_z.norm(); - if (right_handed) { - sensor_dir_z *= -1.f; - } - res->sensor_dir_z.x = sensor_dir_z.data_ptr()[0]; - res->sensor_dir_z.y = sensor_dir_z.data_ptr()[1]; - res->sensor_dir_z.z = sensor_dir_z.data_ptr()[2]; - res->principal_point_offset_x = principal_point_offset.data_ptr()[0]; - res->principal_point_offset_y = principal_point_offset.data_ptr()[1]; -} - -CamInfo cam_info_from_params( - const torch::Tensor& cam_pos, - const torch::Tensor& pixel_0_0_center, - const torch::Tensor& pixel_vec_x, - const torch::Tensor& pixel_vec_y, - const torch::Tensor& principal_point_offset, - const float& focal_length, - const uint& width, - const uint& height, - const float& min_dist, - const float& max_dist, - const bool& right_handed); - -} // namespace pytorch -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.cpp b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.cpp deleted file mode 100644 index 4349aea796cfea0a63a5f76f7669816993fe3f2b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.cpp +++ /dev/null @@ -1,1599 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "./renderer.h" -#include "../include/commands.h" -#include "./camera.h" -#include "./util.h" - -#include -#ifdef WITH_CUDA -#include -#include -#endif - -#ifndef TORCH_CHECK_ARG -// torch <= 1.10 -#define TORCH_CHECK_ARG(cond, argN, ...) \ - TORCH_CHECK(cond, "invalid argument ", argN, ": ", __VA_ARGS__) -#endif - -namespace PRE = ::pulsar::Renderer; - -namespace pulsar { -namespace pytorch { - -Renderer::Renderer( - const unsigned int& width, - const unsigned int& height, - const unsigned int& max_n_balls, - const bool& orthogonal_projection, - const bool& right_handed_system, - const float& background_normalization_depth, - const uint& n_channels, - const uint& n_track) { - LOG_IF(INFO, PULSAR_LOG_INIT) << "Initializing renderer."; - TORCH_CHECK_ARG(width > 0, 1, "image width must be > 0!"); - TORCH_CHECK_ARG(height > 0, 2, "image height must be > 0!"); - TORCH_CHECK_ARG(max_n_balls > 0, 3, "max_n_balls must be > 0!"); - TORCH_CHECK_ARG( - background_normalization_depth > 0.f && - background_normalization_depth < 1.f, - 5, - "background_normalization_depth must be in ]0., 1.["); - TORCH_CHECK_ARG(n_channels > 0, 6, "n_channels must be > 0"); - TORCH_CHECK_ARG( - n_track > 0 && n_track <= MAX_GRAD_SPHERES, - 7, - ("n_track must be > 0 and <" + std::to_string(MAX_GRAD_SPHERES) + - ". Is " + std::to_string(n_track) + ".") - .c_str()); - LOG_IF(INFO, PULSAR_LOG_INIT) - << "Image width: " << width << ", height: " << height; - this->renderer_vec.emplace_back(); - this->device_type = c10::DeviceType::CPU; - this->device_index = -1; - PRE::construct( - this->renderer_vec.data(), - max_n_balls, - width, - height, - orthogonal_projection, - right_handed_system, - background_normalization_depth, - n_channels, - n_track); - this->device_tracker = torch::zeros(1); -}; - -Renderer::~Renderer() { - if (this->device_type == c10::DeviceType::CUDA) { -// Can't happen in the case that not compiled with CUDA. -#ifdef WITH_CUDA - at::cuda::CUDAGuard device_guard(this->device_tracker.device()); - for (auto nrend : this->renderer_vec) { - PRE::destruct(&nrend); - } -#endif - } else { - for (auto nrend : this->renderer_vec) { - PRE::destruct(&nrend); - } - } -} - -bool Renderer::operator==(const Renderer& rhs) const { - LOG_IF(INFO, PULSAR_LOG_INIT) << "Equality check."; - bool renderer_agrees = (this->renderer_vec[0] == rhs.renderer_vec[0]); - LOG_IF(INFO, PULSAR_LOG_INIT) << " Renderer agrees: " << renderer_agrees; - bool device_agrees = - (this->device_tracker.device() == rhs.device_tracker.device()); - LOG_IF(INFO, PULSAR_LOG_INIT) << " Device agrees: " << device_agrees; - return (renderer_agrees && device_agrees); -}; - -void Renderer::ensure_on_device(torch::Device device, bool /*non_blocking*/) { - TORCH_CHECK_ARG( - device.type() == c10::DeviceType::CUDA || - device.type() == c10::DeviceType::CPU, - 1, - "Only CPU and CUDA device types are supported."); - if (device.type() != this->device_type || - device.index() != this->device_index) { -#ifdef WITH_CUDA - LOG_IF(INFO, PULSAR_LOG_INIT) - << "Transferring render buffers between devices."; - int prev_active; - cudaGetDevice(&prev_active); - if (this->device_type == c10::DeviceType::CUDA) { - LOG_IF(INFO, PULSAR_LOG_INIT) << " Destructing on CUDA."; - cudaSetDevice(this->device_index); - for (auto& nrend : this->renderer_vec) { - PRE::destruct(&nrend); - } - } else { - LOG_IF(INFO, PULSAR_LOG_INIT) << " Destructing on CPU."; - for (auto& nrend : this->renderer_vec) { - PRE::destruct(&nrend); - } - } - if (device.type() == c10::DeviceType::CUDA) { - LOG_IF(INFO, PULSAR_LOG_INIT) << " Constructing on CUDA."; - cudaSetDevice(device.index()); - for (auto& nrend : this->renderer_vec) { - PRE::construct( - &nrend, - this->renderer_vec[0].max_num_balls, - this->renderer_vec[0].cam.film_width, - this->renderer_vec[0].cam.film_height, - this->renderer_vec[0].cam.orthogonal_projection, - this->renderer_vec[0].cam.right_handed, - this->renderer_vec[0].cam.background_normalization_depth, - this->renderer_vec[0].cam.n_channels, - this->n_track()); - } - } else { - LOG_IF(INFO, PULSAR_LOG_INIT) << " Constructing on CPU."; - for (auto& nrend : this->renderer_vec) { - PRE::construct( - &nrend, - this->renderer_vec[0].max_num_balls, - this->renderer_vec[0].cam.film_width, - this->renderer_vec[0].cam.film_height, - this->renderer_vec[0].cam.orthogonal_projection, - this->renderer_vec[0].cam.right_handed, - this->renderer_vec[0].cam.background_normalization_depth, - this->renderer_vec[0].cam.n_channels, - this->n_track()); - } - } - cudaSetDevice(prev_active); - this->device_type = device.type(); - this->device_index = device.index(); -#else - throw std::runtime_error( - "pulsar was built without CUDA " - "but a device move to a CUDA device was initiated."); -#endif - } -}; - -void Renderer::ensure_n_renderers_gte(const size_t& batch_size) { - if (this->renderer_vec.size() < batch_size) { - ptrdiff_t diff = batch_size - this->renderer_vec.size(); - LOG_IF(INFO, PULSAR_LOG_INIT) - << "Increasing render buffers by " << diff - << " to account for batch size " << batch_size; - for (ptrdiff_t i = 0; i < diff; ++i) { - this->renderer_vec.emplace_back(); - if (this->device_type == c10::DeviceType::CUDA) { -#ifdef WITH_CUDA - PRE::construct( - &this->renderer_vec[this->renderer_vec.size() - 1], - this->max_num_balls(), - this->width(), - this->height(), - this->renderer_vec[0].cam.orthogonal_projection, - this->renderer_vec[0].cam.right_handed, - this->renderer_vec[0].cam.background_normalization_depth, - this->renderer_vec[0].cam.n_channels, - this->n_track()); -#endif - } else { - PRE::construct( - &this->renderer_vec[this->renderer_vec.size() - 1], - this->max_num_balls(), - this->width(), - this->height(), - this->renderer_vec[0].cam.orthogonal_projection, - this->renderer_vec[0].cam.right_handed, - this->renderer_vec[0].cam.background_normalization_depth, - this->renderer_vec[0].cam.n_channels, - this->n_track()); - } - } - } -} - -std::tuple Renderer::arg_check( - const torch::Tensor& vert_pos, - const torch::Tensor& vert_col, - const torch::Tensor& vert_radii, - const torch::Tensor& cam_pos, - const torch::Tensor& pixel_0_0_center, - const torch::Tensor& pixel_vec_x, - const torch::Tensor& pixel_vec_y, - const torch::Tensor& focal_length, - const torch::Tensor& principal_point_offsets, - const float& gamma, - const float& max_depth, - float& min_depth, - const c10::optional& bg_col, - const c10::optional& opacity, - const float& percent_allowed_difference, - const uint& max_n_hits, - const uint& mode) { - LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD) << "Arg check."; - size_t batch_size = 1; - size_t n_points; - bool batch_processing = false; - if (vert_pos.ndimension() == 3) { - // Check all parameters adhere batch size. - batch_processing = true; - batch_size = vert_pos.size(0); - TORCH_CHECK_ARG( - vert_col.ndimension() == 3 && - vert_col.size(0) == static_cast(batch_size), - 2, - "vert_col needs to have batch size."); - TORCH_CHECK_ARG( - vert_radii.ndimension() == 2 && - vert_radii.size(0) == static_cast(batch_size), - 3, - "vert_radii must be specified per batch."); - TORCH_CHECK_ARG( - cam_pos.ndimension() == 2 && - cam_pos.size(0) == static_cast(batch_size), - 4, - "cam_pos must be specified per batch and have the correct batch size."); - TORCH_CHECK_ARG( - pixel_0_0_center.ndimension() == 2 && - pixel_0_0_center.size(0) == static_cast(batch_size), - 5, - "pixel_0_0_center must be specified per batch."); - TORCH_CHECK_ARG( - pixel_vec_x.ndimension() == 2 && - pixel_vec_x.size(0) == static_cast(batch_size), - 6, - "pixel_vec_x must be specified per batch."); - TORCH_CHECK_ARG( - pixel_vec_y.ndimension() == 2 && - pixel_vec_y.size(0) == static_cast(batch_size), - 7, - "pixel_vec_y must be specified per batch."); - TORCH_CHECK_ARG( - focal_length.ndimension() == 1 && - focal_length.size(0) == static_cast(batch_size), - 8, - "focal_length must be specified per batch."); - TORCH_CHECK_ARG( - principal_point_offsets.ndimension() == 2 && - principal_point_offsets.size(0) == static_cast(batch_size), - 9, - "principal_point_offsets must be specified per batch."); - if (opacity.has_value()) { - TORCH_CHECK_ARG( - opacity.value().ndimension() == 2 && - opacity.value().size(0) == static_cast(batch_size), - 13, - "Opacity needs to be specified batch-wise."); - } - // Check all parameters are for a matching number of points. - n_points = vert_pos.size(1); - TORCH_CHECK_ARG( - vert_col.size(1) == static_cast(n_points), - 2, - ("The number of points for vertex positions (" + - std::to_string(n_points) + ") and vertex colors (" + - std::to_string(vert_col.size(1)) + ") doesn't agree.") - .c_str()); - TORCH_CHECK_ARG( - vert_radii.size(1) == static_cast(n_points), - 3, - ("The number of points for vertex positions (" + - std::to_string(n_points) + ") and vertex radii (" + - std::to_string(vert_col.size(1)) + ") doesn't agree.") - .c_str()); - if (opacity.has_value()) { - TORCH_CHECK_ARG( - opacity.value().size(1) == static_cast(n_points), - 13, - "Opacity needs to be specified per point."); - } - // Check all parameters have the correct last dimension size. - TORCH_CHECK_ARG( - vert_pos.size(2) == 3, - 1, - ("Vertex positions must be 3D (have shape " + - std::to_string(vert_pos.size(2)) + ")!") - .c_str()); - TORCH_CHECK_ARG( - vert_col.size(2) == this->renderer_vec[0].cam.n_channels, - 2, - ("Vertex colors must have the right number of channels (have shape " + - std::to_string(vert_col.size(2)) + ", need " + - std::to_string(this->renderer_vec[0].cam.n_channels) + ")!") - .c_str()); - TORCH_CHECK_ARG( - cam_pos.size(1) == 3, - 4, - ("Camera position must be 3D (has shape " + - std::to_string(cam_pos.size(1)) + ")!") - .c_str()); - TORCH_CHECK_ARG( - pixel_0_0_center.size(1) == 3, - 5, - ("pixel_0_0_center must be 3D (has shape " + - std::to_string(pixel_0_0_center.size(1)) + ")!") - .c_str()); - TORCH_CHECK_ARG( - pixel_vec_x.size(1) == 3, - 6, - ("pixel_vec_x must be 3D (has shape " + - std::to_string(pixel_vec_x.size(1)) + ")!") - .c_str()); - TORCH_CHECK_ARG( - pixel_vec_y.size(1) == 3, - 7, - ("pixel_vec_y must be 3D (has shape " + - std::to_string(pixel_vec_y.size(1)) + ")!") - .c_str()); - TORCH_CHECK_ARG( - principal_point_offsets.size(1) == 2, - 9, - "principal_point_offsets must contain x and y offsets."); - // Ensure enough renderers are available for the batch. - ensure_n_renderers_gte(batch_size); - } else { - // Check all parameters are of correct dimension. - TORCH_CHECK_ARG( - vert_col.ndimension() == 2, 2, "vert_col needs to have dimension 2."); - TORCH_CHECK_ARG( - vert_radii.ndimension() == 1, 3, "vert_radii must have dimension 1."); - TORCH_CHECK_ARG( - cam_pos.ndimension() == 1, 4, "cam_pos must have dimension 1."); - TORCH_CHECK_ARG( - pixel_0_0_center.ndimension() == 1, - 5, - "pixel_0_0_center must have dimension 1."); - TORCH_CHECK_ARG( - pixel_vec_x.ndimension() == 1, 6, "pixel_vec_x must have dimension 1."); - TORCH_CHECK_ARG( - pixel_vec_y.ndimension() == 1, 7, "pixel_vec_y must have dimension 1."); - TORCH_CHECK_ARG( - focal_length.ndimension() == 0, - 8, - "focal_length must have dimension 0."); - TORCH_CHECK_ARG( - principal_point_offsets.ndimension() == 1, - 9, - "principal_point_offsets must have dimension 1."); - if (opacity.has_value()) { - TORCH_CHECK_ARG( - opacity.value().ndimension() == 1, - 13, - "Opacity needs to be specified per sample."); - } - // Check each. - n_points = vert_pos.size(0); - TORCH_CHECK_ARG( - vert_col.size(0) == static_cast(n_points), - 2, - ("The number of points for vertex positions (" + - std::to_string(n_points) + ") and vertex colors (" + - std::to_string(vert_col.size(0)) + ") doesn't agree.") - .c_str()); - TORCH_CHECK_ARG( - vert_radii.size(0) == static_cast(n_points), - 3, - ("The number of points for vertex positions (" + - std::to_string(n_points) + ") and vertex radii (" + - std::to_string(vert_col.size(0)) + ") doesn't agree.") - .c_str()); - if (opacity.has_value()) { - TORCH_CHECK_ARG( - opacity.value().size(0) == static_cast(n_points), - 12, - "Opacity needs to be specified per point."); - } - // Check all parameters have the correct last dimension size. - TORCH_CHECK_ARG( - vert_pos.size(1) == 3, - 1, - ("Vertex positions must be 3D (have shape " + - std::to_string(vert_pos.size(1)) + ")!") - .c_str()); - TORCH_CHECK_ARG( - vert_col.size(1) == this->renderer_vec[0].cam.n_channels, - 2, - ("Vertex colors must have the right number of channels (have shape " + - std::to_string(vert_col.size(1)) + ", need " + - std::to_string(this->renderer_vec[0].cam.n_channels) + ")!") - .c_str()); - TORCH_CHECK_ARG( - cam_pos.size(0) == 3, - 4, - ("Camera position must be 3D (has shape " + - std::to_string(cam_pos.size(0)) + ")!") - .c_str()); - TORCH_CHECK_ARG( - pixel_0_0_center.size(0) == 3, - 5, - ("pixel_0_0_center must be 3D (has shape " + - std::to_string(pixel_0_0_center.size(0)) + ")!") - .c_str()); - TORCH_CHECK_ARG( - pixel_vec_x.size(0) == 3, - 6, - ("pixel_vec_x must be 3D (has shape " + - std::to_string(pixel_vec_x.size(0)) + ")!") - .c_str()); - TORCH_CHECK_ARG( - pixel_vec_y.size(0) == 3, - 7, - ("pixel_vec_y must be 3D (has shape " + - std::to_string(pixel_vec_y.size(0)) + ")!") - .c_str()); - TORCH_CHECK_ARG( - principal_point_offsets.size(0) == 2, - 9, - "principal_point_offsets must have x and y component."); - } - // Check device placement. - auto dev = torch::device_of(vert_pos).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 1, - ("Vertex positions must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Are stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - dev = torch::device_of(vert_col).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 2, - ("Vertex colors must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Are stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - dev = torch::device_of(vert_radii).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 3, - ("Vertex radii must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Are stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - dev = torch::device_of(cam_pos).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 4, - ("Camera position must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Are stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - dev = torch::device_of(pixel_0_0_center).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 5, - ("pixel_0_0_center must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Are stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - dev = torch::device_of(pixel_vec_x).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 6, - ("pixel_vec_x must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Are stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - dev = torch::device_of(pixel_vec_y).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 7, - ("pixel_vec_y must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Are stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - dev = torch::device_of(principal_point_offsets).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 9, - ("principal_point_offsets must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Are stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - if (opacity.has_value()) { - dev = torch::device_of(opacity.value()).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 13, - ("opacity must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Is stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - } - // Type checks. - TORCH_CHECK_ARG( - vert_pos.scalar_type() == c10::kFloat, 1, "pulsar requires float types."); - TORCH_CHECK_ARG( - vert_col.scalar_type() == c10::kFloat, 2, "pulsar requires float types."); - TORCH_CHECK_ARG( - vert_radii.scalar_type() == c10::kFloat, - 3, - "pulsar requires float types."); - TORCH_CHECK_ARG( - cam_pos.scalar_type() == c10::kFloat, 4, "pulsar requires float types."); - TORCH_CHECK_ARG( - pixel_0_0_center.scalar_type() == c10::kFloat, - 5, - "pulsar requires float types."); - TORCH_CHECK_ARG( - pixel_vec_x.scalar_type() == c10::kFloat, - 6, - "pulsar requires float types."); - TORCH_CHECK_ARG( - pixel_vec_y.scalar_type() == c10::kFloat, - 7, - "pulsar requires float types."); - TORCH_CHECK_ARG( - focal_length.scalar_type() == c10::kFloat, - 8, - "pulsar requires float types."); - TORCH_CHECK_ARG( - // Unfortunately, the PyTorch interface is inconsistent for - // Int32: in Python, there exists an explicit int32 type, in - // C++ this is currently `c10::kInt`. - principal_point_offsets.scalar_type() == c10::kInt, - 9, - "principal_point_offsets must be provided as int32."); - if (opacity.has_value()) { - TORCH_CHECK_ARG( - opacity.value().scalar_type() == c10::kFloat, - 13, - "opacity must be a float type."); - } - // Content checks. - TORCH_CHECK_ARG( - (vert_radii > FEPS).all().item(), - 3, - ("Vertex radii must be > FEPS (min is " + - std::to_string(vert_radii.min().item()) + ").") - .c_str()); - if (this->orthogonal()) { - TORCH_CHECK_ARG( - (focal_length == 0.f).all().item(), - 8, - ("for an orthogonal projection focal length must be zero (abs max: " + - std::to_string(focal_length.abs().max().item()) + ").") - .c_str()); - } else { - TORCH_CHECK_ARG( - (focal_length > FEPS).all().item(), - 8, - ("for a perspective projection focal length must be > FEPS (min " + - std::to_string(focal_length.min().item()) + ").") - .c_str()); - } - TORCH_CHECK_ARG( - gamma <= 1.f && gamma >= 1E-5f, - 10, - ("gamma must be in [1E-5, 1] (" + std::to_string(gamma) + ").").c_str()); - if (min_depth == 0.f) { - min_depth = focal_length.max().item() + 2.f * FEPS; - } - TORCH_CHECK_ARG( - min_depth > focal_length.max().item(), - 12, - ("min_depth must be > focal_length (" + std::to_string(min_depth) + - " vs. " + std::to_string(focal_length.max().item()) + ").") - .c_str()); - TORCH_CHECK_ARG( - max_depth > min_depth + FEPS, - 11, - ("max_depth must be > min_depth + FEPS (" + std::to_string(max_depth) + - " vs. " + std::to_string(min_depth + FEPS) + ").") - .c_str()); - TORCH_CHECK_ARG( - percent_allowed_difference >= 0.f && percent_allowed_difference < 1.f, - 14, - ("percent_allowed_difference must be in [0., 1.[ (" + - std::to_string(percent_allowed_difference) + ").") - .c_str()); - TORCH_CHECK_ARG(max_n_hits > 0, 14, "max_n_hits must be > 0!"); - TORCH_CHECK_ARG(mode < 2, 15, "mode must be in {0, 1}."); - torch::Tensor real_bg_col; - if (bg_col.has_value()) { - TORCH_CHECK_ARG( - bg_col.value().device().type() == this->device_type && - bg_col.value().device().index() == this->device_index, - 13, - "bg_col must be stored on the renderer device!"); - TORCH_CHECK_ARG( - bg_col.value().ndimension() == 1 && - bg_col.value().size(0) == renderer_vec[0].cam.n_channels, - 13, - "bg_col must have the same number of channels as the image,)."); - real_bg_col = bg_col.value(); - } else { - real_bg_col = torch::ones( - {renderer_vec[0].cam.n_channels}, - c10::Device(this->device_type, this->device_index)) - .to(c10::kFloat); - } - if (opacity.has_value()) { - TORCH_CHECK_ARG( - (opacity.value() >= 0.f).all().item(), - 13, - "opacity must be >= 0."); - TORCH_CHECK_ARG( - (opacity.value() <= 1.f).all().item(), - 13, - "opacity must be <= 1."); - } - LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD) - << " batch_size: " << batch_size; - LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD) - << " n_points: " << n_points; - LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD) - << " batch_processing: " << batch_processing; - return std::tuple( - batch_size, n_points, batch_processing, real_bg_col); -} - -std::tuple Renderer::forward( - const torch::Tensor& vert_pos, - const torch::Tensor& vert_col, - const torch::Tensor& vert_radii, - const torch::Tensor& cam_pos, - const torch::Tensor& pixel_0_0_center, - const torch::Tensor& pixel_vec_x, - const torch::Tensor& pixel_vec_y, - const torch::Tensor& focal_length, - const torch::Tensor& principal_point_offsets, - const float& gamma, - const float& max_depth, - float min_depth, - const c10::optional& bg_col, - const c10::optional& opacity, - const float& percent_allowed_difference, - const uint& max_n_hits, - const uint& mode) { - // Parameter checks. - this->ensure_on_device(this->device_tracker.device()); - size_t batch_size; - size_t n_points; - bool batch_processing; - torch::Tensor real_bg_col; - std::tie(batch_size, n_points, batch_processing, real_bg_col) = - this->arg_check( - vert_pos, - vert_col, - vert_radii, - cam_pos, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - focal_length, - principal_point_offsets, - gamma, - max_depth, - min_depth, - bg_col, - opacity, - percent_allowed_difference, - max_n_hits, - mode); - LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Extracting camera objects..."; - // Create the camera information. - std::vector cam_infos(batch_size); - if (batch_processing) { - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - cam_infos[batch_i] = cam_info_from_params( - cam_pos[batch_i], - pixel_0_0_center[batch_i], - pixel_vec_x[batch_i], - pixel_vec_y[batch_i], - principal_point_offsets[batch_i], - focal_length[batch_i].item(), - this->renderer_vec[0].cam.film_width, - this->renderer_vec[0].cam.film_height, - min_depth, - max_depth, - this->renderer_vec[0].cam.right_handed); - } - } else { - cam_infos[0] = cam_info_from_params( - cam_pos, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - principal_point_offsets, - focal_length.item(), - this->renderer_vec[0].cam.film_width, - this->renderer_vec[0].cam.film_height, - min_depth, - max_depth, - this->renderer_vec[0].cam.right_handed); - } - LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Processing..."; - // Let's go! - // Contiguous version of opacity, if available. We need to create this object - // in scope to keep it alive. - torch::Tensor opacity_contiguous; - float const* opacity_ptr = nullptr; - if (opacity.has_value()) { - opacity_contiguous = opacity.value().contiguous(); - opacity_ptr = opacity_contiguous.data_ptr(); - } - if (this->device_type == c10::DeviceType::CUDA) { -// No else check necessary - if not compiled with CUDA -// we can't even reach this code (the renderer can't be -// moved to a CUDA device). -#ifdef WITH_CUDA - int prev_active; - cudaGetDevice(&prev_active); - cudaSetDevice(this->device_index); -#ifdef PULSAR_TIMINGS_BATCHED_ENABLED - START_TIME_CU(batch_forward); -#endif - if (batch_processing) { - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - // These calls are non-blocking and just kick off the computations. - PRE::forward( - &this->renderer_vec[batch_i], - vert_pos[batch_i].contiguous().data_ptr(), - vert_col[batch_i].contiguous().data_ptr(), - vert_radii[batch_i].contiguous().data_ptr(), - cam_infos[batch_i], - gamma, - percent_allowed_difference, - max_n_hits, - real_bg_col.contiguous().data_ptr(), - opacity_ptr, - n_points, - mode, - at::cuda::getCurrentCUDAStream()); - } - } else { - PRE::forward( - this->renderer_vec.data(), - vert_pos.contiguous().data_ptr(), - vert_col.contiguous().data_ptr(), - vert_radii.contiguous().data_ptr(), - cam_infos[0], - gamma, - percent_allowed_difference, - max_n_hits, - real_bg_col.contiguous().data_ptr(), - opacity_ptr, - n_points, - mode, - at::cuda::getCurrentCUDAStream()); - } -#ifdef PULSAR_TIMINGS_BATCHED_ENABLED - STOP_TIME_CU(batch_forward); - float time_ms; - GET_TIME_CU(batch_forward, &time_ms); - std::cout << "Forward render batched time per example: " - << time_ms / static_cast(batch_size) << "ms" << std::endl; -#endif - cudaSetDevice(prev_active); -#endif - } else { -#ifdef PULSAR_TIMINGS_BATCHED_ENABLED - START_TIME(batch_forward); -#endif - if (batch_processing) { - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - // These calls are non-blocking and just kick off the computations. - PRE::forward( - &this->renderer_vec[batch_i], - vert_pos[batch_i].contiguous().data_ptr(), - vert_col[batch_i].contiguous().data_ptr(), - vert_radii[batch_i].contiguous().data_ptr(), - cam_infos[batch_i], - gamma, - percent_allowed_difference, - max_n_hits, - real_bg_col.contiguous().data_ptr(), - opacity_ptr, - n_points, - mode, - nullptr); - } - } else { - PRE::forward( - this->renderer_vec.data(), - vert_pos.contiguous().data_ptr(), - vert_col.contiguous().data_ptr(), - vert_radii.contiguous().data_ptr(), - cam_infos[0], - gamma, - percent_allowed_difference, - max_n_hits, - real_bg_col.contiguous().data_ptr(), - opacity_ptr, - n_points, - mode, - nullptr); - } -#ifdef PULSAR_TIMINGS_BATCHED_ENABLED - STOP_TIME(batch_forward); - float time_ms; - GET_TIME(batch_forward, &time_ms); - std::cout << "Forward render batched time per example: " - << time_ms / static_cast(batch_size) << "ms" << std::endl; -#endif - } - LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Extracting results..."; - // Create the results. - std::vector results(batch_size); - std::vector forw_infos(batch_size); - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - results[batch_i] = from_blob( - this->renderer_vec[batch_i].result_d, - {this->renderer_vec[0].cam.film_height, - this->renderer_vec[0].cam.film_width, - this->renderer_vec[0].cam.n_channels}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - if (mode == 1) - results[batch_i] = results[batch_i].slice(2, 0, 1, 1); - forw_infos[batch_i] = from_blob( - this->renderer_vec[batch_i].forw_info_d, - {this->renderer_vec[0].cam.film_height, - this->renderer_vec[0].cam.film_width, - 3 + 2 * this->n_track()}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - } - LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Forward render complete."; - if (batch_processing) { - return std::tuple( - torch::stack(results), torch::stack(forw_infos)); - } else { - return std::tuple(results[0], forw_infos[0]); - } -}; - -std::tuple< - at::optional, - at::optional, - at::optional, - at::optional, - at::optional, - at::optional, - at::optional, - at::optional> -Renderer::backward( - const torch::Tensor& grad_im, - const torch::Tensor& image, - const torch::Tensor& forw_info, - const torch::Tensor& vert_pos, - const torch::Tensor& vert_col, - const torch::Tensor& vert_radii, - const torch::Tensor& cam_pos, - const torch::Tensor& pixel_0_0_center, - const torch::Tensor& pixel_vec_x, - const torch::Tensor& pixel_vec_y, - const torch::Tensor& focal_length, - const torch::Tensor& principal_point_offsets, - const float& gamma, - const float& max_depth, - float min_depth, - const c10::optional& bg_col, - const c10::optional& opacity, - const float& percent_allowed_difference, - const uint& max_n_hits, - const uint& mode, - const bool& dif_pos, - const bool& dif_col, - const bool& dif_rad, - const bool& dif_cam, - const bool& dif_opy, - const at::optional>& dbg_pos) { - this->ensure_on_device(this->device_tracker.device()); - size_t batch_size; - size_t n_points; - bool batch_processing; - torch::Tensor real_bg_col; - std::tie(batch_size, n_points, batch_processing, real_bg_col) = - this->arg_check( - vert_pos, - vert_col, - vert_radii, - cam_pos, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - focal_length, - principal_point_offsets, - gamma, - max_depth, - min_depth, - bg_col, - opacity, - percent_allowed_difference, - max_n_hits, - mode); - // Additional checks for the gradient computation. - TORCH_CHECK_ARG( - (grad_im.ndimension() == 3 + batch_processing && - static_cast(grad_im.size(0 + batch_processing)) == - this->height() && - static_cast(grad_im.size(1 + batch_processing)) == this->width() && - static_cast(grad_im.size(2 + batch_processing)) == - this->renderer_vec[0].cam.n_channels), - 1, - "The gradient image size is not correct."); - TORCH_CHECK_ARG( - (image.ndimension() == 3 + batch_processing && - static_cast(image.size(0 + batch_processing)) == this->height() && - static_cast(image.size(1 + batch_processing)) == this->width() && - static_cast(image.size(2 + batch_processing)) == - this->renderer_vec[0].cam.n_channels), - 2, - "The result image size is not correct."); - TORCH_CHECK_ARG( - grad_im.scalar_type() == c10::kFloat, - 1, - "The gradient image must be of float type."); - TORCH_CHECK_ARG( - image.scalar_type() == c10::kFloat, - 2, - "The image must be of float type."); - if (dif_opy) { - TORCH_CHECK_ARG( - opacity.has_value(), 13, "dif_opy set requires opacity values."); - } - if (batch_processing) { - TORCH_CHECK_ARG( - grad_im.size(0) == static_cast(batch_size), - 1, - "Gradient image batch size must agree."); - TORCH_CHECK_ARG( - image.size(0) == static_cast(batch_size), - 2, - "Image batch size must agree."); - TORCH_CHECK_ARG( - forw_info.size(0) == static_cast(batch_size), - 3, - "forward info must have batch size."); - } - TORCH_CHECK_ARG( - (forw_info.ndimension() == 3 + batch_processing && - static_cast(forw_info.size(0 + batch_processing)) == - this->height() && - static_cast(forw_info.size(1 + batch_processing)) == - this->width() && - static_cast(forw_info.size(2 + batch_processing)) == - 3 + 2 * this->n_track()), - 3, - "The forward info image size is not correct."); - TORCH_CHECK_ARG( - forw_info.scalar_type() == c10::kFloat, - 3, - "The forward info must be of float type."); - // Check device. - auto dev = torch::device_of(grad_im).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 1, - ("grad_im must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Are stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - dev = torch::device_of(image).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 2, - ("image must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Are stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - dev = torch::device_of(forw_info).value(); - TORCH_CHECK_ARG( - dev.type() == this->device_type && dev.index() == this->device_index, - 3, - ("forw_info must be stored on device " + - c10::DeviceTypeName(this->device_type) + ", index " + - std::to_string(this->device_index) + "! Are stored on " + - c10::DeviceTypeName(dev.type()) + ", index " + - std::to_string(dev.index()) + ".") - .c_str()); - if (dbg_pos.has_value()) { - TORCH_CHECK_ARG( - dbg_pos.value().first < this->width() && - dbg_pos.value().second < this->height(), - 23, - "The debug position must be within image bounds."); - } - // Prepare the return value. - std::tuple< - at::optional, - at::optional, - at::optional, - at::optional, - at::optional, - at::optional, - at::optional, - at::optional> - ret; - if (mode == 1 || (!dif_pos && !dif_col && !dif_rad && !dif_cam && !dif_opy)) { - return ret; - } - // Create the camera information. - std::vector cam_infos(batch_size); - if (batch_processing) { - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - cam_infos[batch_i] = cam_info_from_params( - cam_pos[batch_i], - pixel_0_0_center[batch_i], - pixel_vec_x[batch_i], - pixel_vec_y[batch_i], - principal_point_offsets[batch_i], - focal_length[batch_i].item(), - this->renderer_vec[0].cam.film_width, - this->renderer_vec[0].cam.film_height, - min_depth, - max_depth, - this->renderer_vec[0].cam.right_handed); - } - } else { - cam_infos[0] = cam_info_from_params( - cam_pos, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - principal_point_offsets, - focal_length.item(), - this->renderer_vec[0].cam.film_width, - this->renderer_vec[0].cam.film_height, - min_depth, - max_depth, - this->renderer_vec[0].cam.right_handed); - } - // Let's go! - // Contiguous version of opacity, if available. We need to create this object - // in scope to keep it alive. - torch::Tensor opacity_contiguous; - float const* opacity_ptr = nullptr; - if (opacity.has_value()) { - opacity_contiguous = opacity.value().contiguous(); - opacity_ptr = opacity_contiguous.data_ptr(); - } - if (this->device_type == c10::DeviceType::CUDA) { -// No else check necessary - it's not possible to move -// the renderer to a CUDA device if not built with CUDA. -#ifdef WITH_CUDA - int prev_active; - cudaGetDevice(&prev_active); - cudaSetDevice(this->device_index); -#ifdef PULSAR_TIMINGS_BATCHED_ENABLED - START_TIME_CU(batch_backward); -#endif - if (batch_processing) { - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - // These calls are non-blocking and just kick off the computations. - if (dbg_pos.has_value()) { - PRE::backward_dbg( - &this->renderer_vec[batch_i], - grad_im[batch_i].contiguous().data_ptr(), - image[batch_i].contiguous().data_ptr(), - forw_info[batch_i].contiguous().data_ptr(), - vert_pos[batch_i].contiguous().data_ptr(), - vert_col[batch_i].contiguous().data_ptr(), - vert_radii[batch_i].contiguous().data_ptr(), - cam_infos[batch_i], - gamma, - percent_allowed_difference, - max_n_hits, - opacity_ptr, - n_points, - mode, - dif_pos, - dif_col, - dif_rad, - dif_cam, - dif_opy, - dbg_pos.value().first, - dbg_pos.value().second, - at::cuda::getCurrentCUDAStream()); - } else { - PRE::backward( - &this->renderer_vec[batch_i], - grad_im[batch_i].contiguous().data_ptr(), - image[batch_i].contiguous().data_ptr(), - forw_info[batch_i].contiguous().data_ptr(), - vert_pos[batch_i].contiguous().data_ptr(), - vert_col[batch_i].contiguous().data_ptr(), - vert_radii[batch_i].contiguous().data_ptr(), - cam_infos[batch_i], - gamma, - percent_allowed_difference, - max_n_hits, - opacity_ptr, - n_points, - mode, - dif_pos, - dif_col, - dif_rad, - dif_cam, - dif_opy, - at::cuda::getCurrentCUDAStream()); - } - } - } else { - if (dbg_pos.has_value()) { - PRE::backward_dbg( - this->renderer_vec.data(), - grad_im.contiguous().data_ptr(), - image.contiguous().data_ptr(), - forw_info.contiguous().data_ptr(), - vert_pos.contiguous().data_ptr(), - vert_col.contiguous().data_ptr(), - vert_radii.contiguous().data_ptr(), - cam_infos[0], - gamma, - percent_allowed_difference, - max_n_hits, - opacity_ptr, - n_points, - mode, - dif_pos, - dif_col, - dif_rad, - dif_cam, - dif_opy, - dbg_pos.value().first, - dbg_pos.value().second, - at::cuda::getCurrentCUDAStream()); - } else { - PRE::backward( - this->renderer_vec.data(), - grad_im.contiguous().data_ptr(), - image.contiguous().data_ptr(), - forw_info.contiguous().data_ptr(), - vert_pos.contiguous().data_ptr(), - vert_col.contiguous().data_ptr(), - vert_radii.contiguous().data_ptr(), - cam_infos[0], - gamma, - percent_allowed_difference, - max_n_hits, - opacity_ptr, - n_points, - mode, - dif_pos, - dif_col, - dif_rad, - dif_cam, - dif_opy, - at::cuda::getCurrentCUDAStream()); - } - } - cudaSetDevice(prev_active); -#ifdef PULSAR_TIMINGS_BATCHED_ENABLED - STOP_TIME_CU(batch_backward); - float time_ms; - GET_TIME_CU(batch_backward, &time_ms); - std::cout << "Backward render batched time per example: " - << time_ms / static_cast(batch_size) << "ms" << std::endl; -#endif -#endif // WITH_CUDA - } else { -#ifdef PULSAR_TIMINGS_BATCHED_ENABLED - START_TIME(batch_backward); -#endif - if (batch_processing) { - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - // These calls are non-blocking and just kick off the computations. - if (dbg_pos.has_value()) { - PRE::backward_dbg( - &this->renderer_vec[batch_i], - grad_im[batch_i].contiguous().data_ptr(), - image[batch_i].contiguous().data_ptr(), - forw_info[batch_i].contiguous().data_ptr(), - vert_pos[batch_i].contiguous().data_ptr(), - vert_col[batch_i].contiguous().data_ptr(), - vert_radii[batch_i].contiguous().data_ptr(), - cam_infos[batch_i], - gamma, - percent_allowed_difference, - max_n_hits, - opacity_ptr, - n_points, - mode, - dif_pos, - dif_col, - dif_rad, - dif_cam, - dif_opy, - dbg_pos.value().first, - dbg_pos.value().second, - nullptr); - } else { - PRE::backward( - &this->renderer_vec[batch_i], - grad_im[batch_i].contiguous().data_ptr(), - image[batch_i].contiguous().data_ptr(), - forw_info[batch_i].contiguous().data_ptr(), - vert_pos[batch_i].contiguous().data_ptr(), - vert_col[batch_i].contiguous().data_ptr(), - vert_radii[batch_i].contiguous().data_ptr(), - cam_infos[batch_i], - gamma, - percent_allowed_difference, - max_n_hits, - opacity_ptr, - n_points, - mode, - dif_pos, - dif_col, - dif_rad, - dif_cam, - dif_opy, - nullptr); - } - } - } else { - if (dbg_pos.has_value()) { - PRE::backward_dbg( - this->renderer_vec.data(), - grad_im.contiguous().data_ptr(), - image.contiguous().data_ptr(), - forw_info.contiguous().data_ptr(), - vert_pos.contiguous().data_ptr(), - vert_col.contiguous().data_ptr(), - vert_radii.contiguous().data_ptr(), - cam_infos[0], - gamma, - percent_allowed_difference, - max_n_hits, - opacity_ptr, - n_points, - mode, - dif_pos, - dif_col, - dif_rad, - dif_cam, - dif_opy, - dbg_pos.value().first, - dbg_pos.value().second, - nullptr); - } else { - PRE::backward( - this->renderer_vec.data(), - grad_im.contiguous().data_ptr(), - image.contiguous().data_ptr(), - forw_info.contiguous().data_ptr(), - vert_pos.contiguous().data_ptr(), - vert_col.contiguous().data_ptr(), - vert_radii.contiguous().data_ptr(), - cam_infos[0], - gamma, - percent_allowed_difference, - max_n_hits, - opacity_ptr, - n_points, - mode, - dif_pos, - dif_col, - dif_rad, - dif_cam, - dif_opy, - nullptr); - } - } -#ifdef PULSAR_TIMINGS_BATCHED_ENABLED - STOP_TIME(batch_backward); - float time_ms; - GET_TIME(batch_backward, &time_ms); - std::cout << "Backward render batched time per example: " - << time_ms / static_cast(batch_size) << "ms" << std::endl; -#endif - } - if (dif_pos) { - if (batch_processing) { - std::vector results(batch_size); - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - results[batch_i] = from_blob( - reinterpret_cast(this->renderer_vec[batch_i].grad_pos_d), - {static_cast(n_points), 3}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - } - std::get<0>(ret) = torch::stack(results); - } else { - std::get<0>(ret) = from_blob( - reinterpret_cast(this->renderer_vec[0].grad_pos_d), - {static_cast(n_points), 3}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - } - } - if (dif_col) { - if (batch_processing) { - std::vector results(batch_size); - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - results[batch_i] = from_blob( - reinterpret_cast(this->renderer_vec[batch_i].grad_col_d), - {static_cast(n_points), - this->renderer_vec[0].cam.n_channels}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - } - std::get<1>(ret) = torch::stack(results); - } else { - std::get<1>(ret) = from_blob( - reinterpret_cast(this->renderer_vec[0].grad_col_d), - {static_cast(n_points), - this->renderer_vec[0].cam.n_channels}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - } - } - if (dif_rad) { - if (batch_processing) { - std::vector results(batch_size); - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - results[batch_i] = from_blob( - reinterpret_cast(this->renderer_vec[batch_i].grad_rad_d), - {static_cast(n_points)}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - } - std::get<2>(ret) = torch::stack(results); - } else { - std::get<2>(ret) = from_blob( - reinterpret_cast(this->renderer_vec[0].grad_rad_d), - {static_cast(n_points)}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - } - } - if (dif_cam) { - if (batch_processing) { - std::vector res_p1(batch_size); - std::vector res_p2(batch_size); - std::vector res_p3(batch_size); - std::vector res_p4(batch_size); - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - res_p1[batch_i] = from_blob( - reinterpret_cast(this->renderer_vec[batch_i].grad_cam_d), - {3}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - res_p2[batch_i] = from_blob( - reinterpret_cast( - this->renderer_vec[batch_i].grad_cam_d + 3), - {3}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - res_p3[batch_i] = from_blob( - reinterpret_cast( - this->renderer_vec[batch_i].grad_cam_d + 6), - {3}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - res_p4[batch_i] = from_blob( - reinterpret_cast( - this->renderer_vec[batch_i].grad_cam_d + 9), - {3}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - } - std::get<3>(ret) = torch::stack(res_p1); - std::get<4>(ret) = torch::stack(res_p2); - std::get<5>(ret) = torch::stack(res_p3); - std::get<6>(ret) = torch::stack(res_p4); - } else { - std::get<3>(ret) = from_blob( - reinterpret_cast(this->renderer_vec[0].grad_cam_d), - {3}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - std::get<4>(ret) = from_blob( - reinterpret_cast(this->renderer_vec[0].grad_cam_d + 3), - {3}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - std::get<5>(ret) = from_blob( - reinterpret_cast(this->renderer_vec[0].grad_cam_d + 6), - {3}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - std::get<6>(ret) = from_blob( - reinterpret_cast(this->renderer_vec[0].grad_cam_d + 9), - {3}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - } - } - if (dif_opy) { - if (batch_processing) { - std::vector results(batch_size); - for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { - results[batch_i] = from_blob( - reinterpret_cast(this->renderer_vec[batch_i].grad_opy_d), - {static_cast(n_points)}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - } - std::get<7>(ret) = torch::stack(results); - } else { - std::get<7>(ret) = from_blob( - reinterpret_cast(this->renderer_vec[0].grad_opy_d), - {static_cast(n_points)}, - this->device_type, - this->device_index, - torch::kFloat, - this->device_type == c10::DeviceType::CUDA -#ifdef WITH_CUDA - ? at::cuda::getCurrentCUDAStream() -#else - ? (cudaStream_t) nullptr -#endif - : (cudaStream_t) nullptr); - } - } - return ret; -}; - -} // namespace pytorch -} // namespace pulsar diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.h b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.h deleted file mode 100644 index 2525ca3f3dd9036320401b2a0059a2d5b6b864a4..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/renderer.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_PYTORCH_RENDERER_H_ -#define PULSAR_NATIVE_PYTORCH_RENDERER_H_ - -#include "../global.h" -#include "../include/renderer.h" - -namespace pulsar { -namespace pytorch { - -struct Renderer { - public: - /** - * Pytorch Pulsar differentiable rendering module. - */ - explicit Renderer( - const unsigned int& width, - const unsigned int& height, - const uint& max_n_balls, - const bool& orthogonal_projection, - const bool& right_handed_system, - const float& background_normalization_depth, - const uint& n_channels, - const uint& n_track); - ~Renderer(); - - std::tuple forward( - const torch::Tensor& vert_pos, - const torch::Tensor& vert_col, - const torch::Tensor& vert_radii, - const torch::Tensor& cam_pos, - const torch::Tensor& pixel_0_0_center, - const torch::Tensor& pixel_vec_x, - const torch::Tensor& pixel_vec_y, - const torch::Tensor& focal_length, - const torch::Tensor& principal_point_offsets, - const float& gamma, - const float& max_depth, - float min_depth, - const c10::optional& bg_col, - const c10::optional& opacity, - const float& percent_allowed_difference, - const uint& max_n_hits, - const uint& mode); - - std::tuple< - at::optional, - at::optional, - at::optional, - at::optional, - at::optional, - at::optional, - at::optional, - at::optional> - backward( - const torch::Tensor& grad_im, - const torch::Tensor& image, - const torch::Tensor& forw_info, - const torch::Tensor& vert_pos, - const torch::Tensor& vert_col, - const torch::Tensor& vert_radii, - const torch::Tensor& cam_pos, - const torch::Tensor& pixel_0_0_center, - const torch::Tensor& pixel_vec_x, - const torch::Tensor& pixel_vec_y, - const torch::Tensor& focal_length, - const torch::Tensor& principal_point_offsets, - const float& gamma, - const float& max_depth, - float min_depth, - const c10::optional& bg_col, - const c10::optional& opacity, - const float& percent_allowed_difference, - const uint& max_n_hits, - const uint& mode, - const bool& dif_pos, - const bool& dif_col, - const bool& dif_rad, - const bool& dif_cam, - const bool& dif_opy, - const at::optional>& dbg_pos); - - // Infrastructure. - /** - * Ensure that the renderer is placed on this device. - * Is nearly a no-op if the device is correct. - */ - void ensure_on_device(torch::Device device, bool non_blocking = false); - - /** - * Ensure that at least n renderers are available. - */ - void ensure_n_renderers_gte(const size_t& batch_size); - - /** - * Check the parameters. - */ - std::tuple arg_check( - const torch::Tensor& vert_pos, - const torch::Tensor& vert_col, - const torch::Tensor& vert_radii, - const torch::Tensor& cam_pos, - const torch::Tensor& pixel_0_0_center, - const torch::Tensor& pixel_vec_x, - const torch::Tensor& pixel_vec_y, - const torch::Tensor& focal_length, - const torch::Tensor& principal_point_offsets, - const float& gamma, - const float& max_depth, - float& min_depth, - const c10::optional& bg_col, - const c10::optional& opacity, - const float& percent_allowed_difference, - const uint& max_n_hits, - const uint& mode); - - bool operator==(const Renderer& rhs) const; - inline friend std::ostream& operator<<( - std::ostream& stream, - const Renderer& self) { - stream << "pulsar::Renderer["; - // Device info. - stream << self.device_type; - if (self.device_index != -1) - stream << ", ID " << self.device_index; - stream << "]"; - return stream; - } - - inline uint width() const { - return this->renderer_vec[0].cam.film_width; - } - inline uint height() const { - return this->renderer_vec[0].cam.film_height; - } - inline int max_num_balls() const { - return this->renderer_vec[0].max_num_balls; - } - inline bool orthogonal() const { - return this->renderer_vec[0].cam.orthogonal_projection; - } - inline bool right_handed() const { - return this->renderer_vec[0].cam.right_handed; - } - inline uint n_track() const { - return static_cast(this->renderer_vec[0].n_track); - } - - /** A tensor that is registered as a buffer with this Module to track its - * device placement. Unfortunately, pytorch doesn't offer tracking Module - * device placement in a better way as of now. - */ - torch::Tensor device_tracker; - - protected: - /** The device type for this renderer. */ - c10::DeviceType device_type; - /** The device index for this renderer. */ - c10::DeviceIndex device_index; - /** Pointer to the underlying pulsar renderers. */ - std::vector renderer_vec; -}; - -} // namespace pytorch -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.cpp b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.cpp deleted file mode 100644 index 1dd41ed4e5ef40d2c78b3d71fb5dff7cdaa4e6c0..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifdef WITH_CUDA -#include -#include -#endif -#include - -#include "./tensor_util.h" - -namespace pulsar { -namespace pytorch { - -torch::Tensor sphere_ids_from_result_info_nograd( - const torch::Tensor& forw_info) { - torch::Tensor result = torch::zeros( - {forw_info.size(0), - forw_info.size(1), - forw_info.size(2), - (forw_info.size(3) - 3) / 2}, - torch::TensorOptions().device(forw_info.device()).dtype(torch::kInt32)); - // Get the relevant slice, contiguous. - torch::Tensor tmp = - forw_info - .slice( - /*dim=*/3, /*start=*/3, /*end=*/forw_info.size(3), /*step=*/2) - .contiguous(); - if (forw_info.device().type() == c10::DeviceType::CUDA) { -#ifdef WITH_CUDA - cudaMemcpyAsync( - result.data_ptr(), - tmp.data_ptr(), - sizeof(uint32_t) * tmp.size(0) * tmp.size(1) * tmp.size(2) * - tmp.size(3), - cudaMemcpyDeviceToDevice, - at::cuda::getCurrentCUDAStream()); -#else - throw std::runtime_error( - "Copy on CUDA device initiated but built " - "without CUDA support."); -#endif - } else { - memcpy( - result.data_ptr(), - tmp.data_ptr(), - sizeof(uint32_t) * tmp.size(0) * tmp.size(1) * tmp.size(2) * - tmp.size(3)); - } - // `tmp` is freed after this, the memory might get reallocated. However, - // only kernels in the same stream should ever be able to write to this - // memory, which are executed only after the memcpy is complete. That's - // why we can just continue. - return result; -} - -} // namespace pytorch -} // namespace pulsar diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.h b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.h deleted file mode 100644 index 9f1d677cbfd4377f27224e05abc66085a06aa60c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/tensor_util.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_PYTORCH_TENSOR_UTIL_H_ -#define PULSAR_NATIVE_PYTORCH_TENSOR_UTIL_H_ - -#include - -namespace pulsar { -namespace pytorch { - -torch::Tensor sphere_ids_from_result_info_nograd( - const torch::Tensor& forw_info); - -} -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.cpp b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.cpp deleted file mode 100644 index 7d25b6e8504c765b816e7793419e9de63a7719dd..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifdef WITH_CUDA -#include - -namespace pulsar { -namespace pytorch { - -void cudaDevToDev( - void* trg, - const void* src, - const int& size, - const cudaStream_t& stream) { - cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToDevice, stream); -} - -void cudaDevToHost( - void* trg, - const void* src, - const int& size, - const cudaStream_t& stream) { - cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToHost, stream); -} - -} // namespace pytorch -} // namespace pulsar -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.h b/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.h deleted file mode 100644 index be3dc80defbb78c6e65722a1dda5d70e288e73c7..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/pytorch/util.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#ifndef PULSAR_NATIVE_PYTORCH_UTIL_H_ -#define PULSAR_NATIVE_PYTORCH_UTIL_H_ - -#include -#include "../global.h" - -namespace pulsar { -namespace pytorch { - -void cudaDevToDev( - void* trg, - const void* src, - const int& size, - const cudaStream_t& stream); -void cudaDevToHost( - void* trg, - const void* src, - const int& size, - const cudaStream_t& stream); - -/** - * This method takes a memory pointer and wraps it into a pytorch tensor. - * - * This is preferred over `torch::from_blob`, since that requires a CUDA - * managed pointer. However, working with these for high performance - * operations is slower. Most of the rendering operations should stay - * local to the respective GPU anyways, so unmanaged pointers are - * preferred. - */ -template -torch::Tensor from_blob( - const T* ptr, - const torch::IntArrayRef& shape, - const c10::DeviceType& device_type, - const c10::DeviceIndex& device_index, - const torch::Dtype& dtype, - const cudaStream_t& stream) { - torch::Tensor ret = torch::zeros( - shape, torch::device({device_type, device_index}).dtype(dtype)); - const int num_elements = - std::accumulate(shape.begin(), shape.end(), 1, std::multiplies{}); - if (device_type == c10::DeviceType::CUDA) { -#ifdef WITH_CUDA - cudaDevToDev( - ret.data_ptr(), - static_cast(ptr), - sizeof(T) * num_elements, - stream); -#else - throw std::runtime_error( - "Initiating devToDev copy on a build without CUDA."); -#endif - // TODO: check for synchronization. - } else { - memcpy(ret.data_ptr(), ptr, sizeof(T) * num_elements); - } - return ret; -}; - -} // namespace pytorch -} // namespace pulsar - -#endif diff --git a/pytorch3d/pytorch3d/csrc/pulsar/warnings.cpp b/pytorch3d/pytorch3d/csrc/pulsar/warnings.cpp deleted file mode 100644 index 54615ac1392db7788c643e93a40b4824b59ec102..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/pulsar/warnings.cpp +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "./global.h" -#include "./logging.h" - -/** - * A compilation unit to provide warnings about the code and avoid - * repeated messages. - */ -#ifdef PULSAR_ASSERTIONS -#pragma message("WARNING: assertions are enabled in Pulsar.") -#endif -#ifdef PULSAR_LOGGING_ENABLED -#pragma message("WARNING: logging is enabled in Pulsar.") -#endif diff --git a/pytorch3d/pytorch3d/csrc/rasterize_coarse/bitmask.cuh b/pytorch3d/pytorch3d/csrc/rasterize_coarse/bitmask.cuh deleted file mode 100644 index 6ffcac87caa13f37a5ccb12b565d33450bc035c2..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/rasterize_coarse/bitmask.cuh +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#define BINMASK_H - -// A BitMask represents a bool array of shape (H, W, N). We pack values into -// the bits of unsigned ints; a single unsigned int has B = 32 bits, so to hold -// all values we use H * W * (N / B) = H * W * D values. We want to store -// BitMasks in shared memory, so we assume that the memory has already been -// allocated for it elsewhere. -class BitMask { - public: - __device__ BitMask(unsigned int* data, int H, int W, int N) - : data(data), H(H), W(W), B(8 * sizeof(unsigned int)), D(N / B) { - // TODO: check if the data is null. - N = ceilf(N % 32); // take ceil incase N % 32 != 0 - block_clear(); // clear the data - } - - // Use all threads in the current block to clear all bits of this BitMask - __device__ void block_clear() { - for (int i = threadIdx.x; i < H * W * D; i += blockDim.x) { - data[i] = 0; - } - __syncthreads(); - } - - __device__ int _get_elem_idx(int y, int x, int d) { - return y * W * D + x * D + d / B; - } - - __device__ int _get_bit_idx(int d) { - return d % B; - } - - // Turn on a single bit (y, x, d) - __device__ void set(int y, int x, int d) { - int elem_idx = _get_elem_idx(y, x, d); - int bit_idx = _get_bit_idx(d); - const unsigned int mask = 1U << bit_idx; - atomicOr(data + elem_idx, mask); - } - - // Turn off a single bit (y, x, d) - __device__ void unset(int y, int x, int d) { - int elem_idx = _get_elem_idx(y, x, d); - int bit_idx = _get_bit_idx(d); - const unsigned int mask = ~(1U << bit_idx); - atomicAnd(data + elem_idx, mask); - } - - // Check whether the bit (y, x, d) is on or off - __device__ bool get(int y, int x, int d) { - int elem_idx = _get_elem_idx(y, x, d); - int bit_idx = _get_bit_idx(d); - return (data[elem_idx] >> bit_idx) & 1U; - } - - // Compute the number of bits set in the row (y, x, :) - __device__ int count(int y, int x) { - int total = 0; - for (int i = 0; i < D; ++i) { - int elem_idx = y * W * D + x * D + i; - unsigned int elem = data[elem_idx]; - total += __popc(elem); - } - return total; - } - - private: - unsigned int* data; - int H, W, B, D; -}; diff --git a/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.cu b/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.cu deleted file mode 100644 index bb6acaf1224262f3615dc5750e474f83fa8325c6..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.cu +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include "rasterize_coarse/bitmask.cuh" -#include "rasterize_points/rasterization_utils.cuh" -#include "utils/float_math.cuh" -#include "utils/geometry_utils.cuh" // For kEpsilon -- gross - -__global__ void TriangleBoundingBoxKernel( - const float* face_verts, // (F, 3, 3) - const int F, - const float blur_radius, - float* bboxes, // (4, F) - bool* skip_face) { // (F,) - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int num_threads = blockDim.x * gridDim.x; - const float sqrt_radius = sqrt(blur_radius); - for (int f = tid; f < F; f += num_threads) { - const float v0x = face_verts[f * 9 + 0 * 3 + 0]; - const float v0y = face_verts[f * 9 + 0 * 3 + 1]; - const float v0z = face_verts[f * 9 + 0 * 3 + 2]; - const float v1x = face_verts[f * 9 + 1 * 3 + 0]; - const float v1y = face_verts[f * 9 + 1 * 3 + 1]; - const float v1z = face_verts[f * 9 + 1 * 3 + 2]; - const float v2x = face_verts[f * 9 + 2 * 3 + 0]; - const float v2y = face_verts[f * 9 + 2 * 3 + 1]; - const float v2z = face_verts[f * 9 + 2 * 3 + 2]; - const float xmin = FloatMin3(v0x, v1x, v2x) - sqrt_radius; - const float xmax = FloatMax3(v0x, v1x, v2x) + sqrt_radius; - const float ymin = FloatMin3(v0y, v1y, v2y) - sqrt_radius; - const float ymax = FloatMax3(v0y, v1y, v2y) + sqrt_radius; - const float zmin = FloatMin3(v0z, v1z, v2z); - const bool skip = zmin < kEpsilon; - bboxes[0 * F + f] = xmin; - bboxes[1 * F + f] = xmax; - bboxes[2 * F + f] = ymin; - bboxes[3 * F + f] = ymax; - skip_face[f] = skip; - } -} - -__global__ void PointBoundingBoxKernel( - const float* points, // (P, 3) - const float* radius, // (P,) - const int P, - float* bboxes, // (4, P) - bool* skip_points) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int num_threads = blockDim.x * gridDim.x; - for (int p = tid; p < P; p += num_threads) { - const float x = points[p * 3 + 0]; - const float y = points[p * 3 + 1]; - const float z = points[p * 3 + 2]; - const float r = radius[p]; - // TODO: change to kEpsilon to match triangles? - const bool skip = z < 0; - bboxes[0 * P + p] = x - r; - bboxes[1 * P + p] = x + r; - bboxes[2 * P + p] = y - r; - bboxes[3 * P + p] = y + r; - skip_points[p] = skip; - } -} - -__global__ void RasterizeCoarseCudaKernel( - const float* bboxes, // (4, E) (xmin, xmax, ymin, ymax) - const bool* should_skip, // (E,) - const int64_t* elem_first_idxs, - const int64_t* elems_per_batch, - const int N, - const int E, - const int H, - const int W, - const int bin_size, - const int chunk_size, - const int max_elem_per_bin, - int* elems_per_bin, - int* bin_elems) { - extern __shared__ char sbuf[]; - const int M = max_elem_per_bin; - // Integer divide round up - const int num_bins_x = 1 + (W - 1) / bin_size; - const int num_bins_y = 1 + (H - 1) / bin_size; - - // NDC range depends on the ratio of W/H - // The shorter side from (H, W) is given an NDC range of 2.0 and - // the other side is scaled by the ratio of H:W. - const float NDC_x_half_range = NonSquareNdcRange(W, H) / 2.0f; - const float NDC_y_half_range = NonSquareNdcRange(H, W) / 2.0f; - - // Size of half a pixel in NDC units is the NDC half range - // divided by the corresponding image dimension - const float half_pix_x = NDC_x_half_range / W; - const float half_pix_y = NDC_y_half_range / H; - - // This is a boolean array of shape (num_bins_y, num_bins_x, chunk_size) - // stored in shared memory that will track whether each elem in the chunk - // falls into each bin of the image. - BitMask binmask((unsigned int*)sbuf, num_bins_y, num_bins_x, chunk_size); - - // Have each block handle a chunk of elements - const int chunks_per_batch = 1 + (E - 1) / chunk_size; - const int num_chunks = N * chunks_per_batch; - - for (int chunk = blockIdx.x; chunk < num_chunks; chunk += gridDim.x) { - const int batch_idx = chunk / chunks_per_batch; // batch index - const int chunk_idx = chunk % chunks_per_batch; - const int elem_chunk_start_idx = chunk_idx * chunk_size; - - binmask.block_clear(); - const int64_t elem_start_idx = elem_first_idxs[batch_idx]; - const int64_t elem_stop_idx = elem_start_idx + elems_per_batch[batch_idx]; - - // Have each thread handle a different face within the chunk - for (int e = threadIdx.x; e < chunk_size; e += blockDim.x) { - const int e_idx = elem_chunk_start_idx + e; - - // Check that we are still within the same element of the batch - if (e_idx >= elem_stop_idx || e_idx < elem_start_idx) { - continue; - } - - if (should_skip[e_idx]) { - continue; - } - const float xmin = bboxes[0 * E + e_idx]; - const float xmax = bboxes[1 * E + e_idx]; - const float ymin = bboxes[2 * E + e_idx]; - const float ymax = bboxes[3 * E + e_idx]; - - // Brute-force search over all bins; TODO(T54294966) something smarter. - for (int by = 0; by < num_bins_y; ++by) { - // Y coordinate of the top and bottom of the bin. - // PixToNdc gives the location of the center of each pixel, so we - // need to add/subtract a half pixel to get the true extent of the bin. - // Reverse ordering of Y axis so that +Y is upwards in the image. - const float bin_y_min = - PixToNonSquareNdc(by * bin_size, H, W) - half_pix_y; - const float bin_y_max = - PixToNonSquareNdc((by + 1) * bin_size - 1, H, W) + half_pix_y; - const bool y_overlap = (ymin <= bin_y_max) && (bin_y_min < ymax); - - for (int bx = 0; bx < num_bins_x; ++bx) { - // X coordinate of the left and right of the bin. - // Reverse ordering of x axis so that +X is left. - const float bin_x_max = - PixToNonSquareNdc((bx + 1) * bin_size - 1, W, H) + half_pix_x; - const float bin_x_min = - PixToNonSquareNdc(bx * bin_size, W, H) - half_pix_x; - - const bool x_overlap = (xmin <= bin_x_max) && (bin_x_min < xmax); - if (y_overlap && x_overlap) { - binmask.set(by, bx, e); - } - } - } - } - __syncthreads(); - // Now we have processed every elem in the current chunk. We need to - // count the number of elems in each bin so we can write the indices - // out to global memory. We have each thread handle a different bin. - for (int byx = threadIdx.x; byx < num_bins_y * num_bins_x; - byx += blockDim.x) { - const int by = byx / num_bins_x; - const int bx = byx % num_bins_x; - const int count = binmask.count(by, bx); - const int elems_per_bin_idx = - batch_idx * num_bins_y * num_bins_x + by * num_bins_x + bx; - - // This atomically increments the (global) number of elems found - // in the current bin, and gets the previous value of the counter; - // this effectively allocates space in the bin_faces array for the - // elems in the current chunk that fall into this bin. - const int start = atomicAdd(elems_per_bin + elems_per_bin_idx, count); - if (start + count > M) { - // The number of elems in this bin is so big that they won't fit. - // We print a warning using CUDA's printf. This may be invisible - // to notebook users, but apparent to others. It would be nice to - // also have a Python-friendly warning, but it is not obvious - // how to do this without slowing down the normal case. - const char* warning = - "Bin size was too small in the coarse rasterization phase. " - "This caused an overflow, meaning output may be incomplete. " - "To solve, " - "try increasing max_faces_per_bin / max_points_per_bin, " - "decreasing bin_size, " - "or setting bin_size to 0 to use the naive rasterization."; - printf(warning); - continue; - } - - // Now loop over the binmask and write the active bits for this bin - // out to bin_faces. - int next_idx = batch_idx * num_bins_y * num_bins_x * M + - by * num_bins_x * M + bx * M + start; - for (int e = 0; e < chunk_size; ++e) { - if (binmask.get(by, bx, e)) { - // TODO(T54296346) find the correct method for handling errors in - // CUDA. Throw an error if num_faces_per_bin > max_faces_per_bin. - // Either decrease bin size or increase max_faces_per_bin - bin_elems[next_idx] = elem_chunk_start_idx + e; - next_idx++; - } - } - } - __syncthreads(); - } -} - -at::Tensor RasterizeCoarseCuda( - const at::Tensor& bboxes, - const at::Tensor& should_skip, - const at::Tensor& elem_first_idxs, - const at::Tensor& elems_per_batch, - const std::tuple image_size, - const int bin_size, - const int max_elems_per_bin) { - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(bboxes.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int H = std::get<0>(image_size); - const int W = std::get<1>(image_size); - - const int E = bboxes.size(1); - const int N = elems_per_batch.size(0); - const int M = max_elems_per_bin; - - // Integer divide round up - const int num_bins_y = 1 + (H - 1) / bin_size; - const int num_bins_x = 1 + (W - 1) / bin_size; - - if (num_bins_y >= kMaxItemsPerBin || num_bins_x >= kMaxItemsPerBin) { - std::stringstream ss; - ss << "In RasterizeCoarseCuda got num_bins_y: " << num_bins_y - << ", num_bins_x: " << num_bins_x << ", " - << "; that's too many!"; - AT_ERROR(ss.str()); - } - auto opts = elems_per_batch.options().dtype(at::kInt); - at::Tensor elems_per_bin = at::zeros({N, num_bins_y, num_bins_x}, opts); - at::Tensor bin_elems = at::full({N, num_bins_y, num_bins_x, M}, -1, opts); - - if (bin_elems.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return bin_elems; - } - - const int chunk_size = 512; - const size_t shared_size = num_bins_y * num_bins_x * chunk_size / 8; - const size_t blocks = 64; - const size_t threads = 512; - - RasterizeCoarseCudaKernel<<>>( - bboxes.contiguous().data_ptr(), - should_skip.contiguous().data_ptr(), - elem_first_idxs.contiguous().data_ptr(), - elems_per_batch.contiguous().data_ptr(), - N, - E, - H, - W, - bin_size, - chunk_size, - M, - elems_per_bin.data_ptr(), - bin_elems.data_ptr()); - - AT_CUDA_CHECK(cudaGetLastError()); - return bin_elems; -} - -at::Tensor RasterizeMeshesCoarseCuda( - const at::Tensor& face_verts, - const at::Tensor& mesh_to_face_first_idx, - const at::Tensor& num_faces_per_mesh, - const std::tuple image_size, - const float blur_radius, - const int bin_size, - const int max_faces_per_bin) { - TORCH_CHECK( - face_verts.ndimension() == 3 && face_verts.size(1) == 3 && - face_verts.size(2) == 3, - "face_verts must have dimensions (num_faces, 3, 3)"); - - // Check inputs are on the same device - at::TensorArg face_verts_t{face_verts, "face_verts", 1}, - mesh_to_face_first_idx_t{ - mesh_to_face_first_idx, "mesh_to_face_first_idx", 2}, - num_faces_per_mesh_t{num_faces_per_mesh, "num_faces_per_mesh", 3}; - at::CheckedFrom c = "RasterizeMeshesCoarseCuda"; - at::checkAllSameGPU( - c, {face_verts_t, mesh_to_face_first_idx_t, num_faces_per_mesh_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(face_verts.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - // Allocate tensors for bboxes and should_skip - const int F = face_verts.size(0); - auto float_opts = face_verts.options().dtype(at::kFloat); - auto bool_opts = face_verts.options().dtype(at::kBool); - at::Tensor bboxes = at::empty({4, F}, float_opts); - at::Tensor should_skip = at::empty({F}, bool_opts); - - // Launch kernel to compute triangle bboxes - const size_t blocks = 128; - const size_t threads = 256; - TriangleBoundingBoxKernel<<>>( - face_verts.contiguous().data_ptr(), - F, - blur_radius, - bboxes.contiguous().data_ptr(), - should_skip.contiguous().data_ptr()); - AT_CUDA_CHECK(cudaGetLastError()); - - return RasterizeCoarseCuda( - bboxes, - should_skip, - mesh_to_face_first_idx, - num_faces_per_mesh, - image_size, - bin_size, - max_faces_per_bin); -} - -at::Tensor RasterizePointsCoarseCuda( - const at::Tensor& points, // (P, 3) - const at::Tensor& cloud_to_packed_first_idx, // (N,) - const at::Tensor& num_points_per_cloud, // (N,) - const std::tuple image_size, - const at::Tensor& radius, - const int bin_size, - const int max_points_per_bin) { - TORCH_CHECK( - points.ndimension() == 2 && points.size(1) == 3, - "points must have dimensions (num_points, 3)"); - - // Check inputs are on the same device - at::TensorArg points_t{points, "points", 1}, - cloud_to_packed_first_idx_t{ - cloud_to_packed_first_idx, "cloud_to_packed_first_idx", 2}, - num_points_per_cloud_t{num_points_per_cloud, "num_points_per_cloud", 3}; - at::CheckedFrom c = "RasterizePointsCoarseCuda"; - at::checkAllSameGPU( - c, {points_t, cloud_to_packed_first_idx_t, num_points_per_cloud_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - // Allocate tensors for bboxes and should_skip - const int P = points.size(0); - auto float_opts = points.options().dtype(at::kFloat); - auto bool_opts = points.options().dtype(at::kBool); - at::Tensor bboxes = at::empty({4, P}, float_opts); - at::Tensor should_skip = at::empty({P}, bool_opts); - - // Launch kernel to compute point bboxes - const size_t blocks = 128; - const size_t threads = 256; - PointBoundingBoxKernel<<>>( - points.contiguous().data_ptr(), - radius.contiguous().data_ptr(), - P, - bboxes.contiguous().data_ptr(), - should_skip.contiguous().data_ptr()); - AT_CUDA_CHECK(cudaGetLastError()); - - return RasterizeCoarseCuda( - bboxes, - should_skip, - cloud_to_packed_first_idx, - num_points_per_cloud, - image_size, - bin_size, - max_points_per_bin); -} diff --git a/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.h b/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.h deleted file mode 100644 index 858407cb66b2a252f1b2b223f2adaa2ce8074543..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include -#include - -// Arguments are the same as RasterizeMeshesCoarse from -// rasterize_meshes/rasterize_meshes.h -#ifdef WITH_CUDA -torch::Tensor RasterizeMeshesCoarseCuda( - const torch::Tensor& face_verts, - const torch::Tensor& mesh_to_face_first_idx, - const torch::Tensor& num_faces_per_mesh, - const std::tuple image_size, - const float blur_radius, - const int bin_size, - const int max_faces_per_bin); -#endif - -// Arguments are the same as RasterizePointsCoarse from -// rasterize_points/rasterize_points.h -#ifdef WITH_CUDA -torch::Tensor RasterizePointsCoarseCuda( - const torch::Tensor& points, - const torch::Tensor& cloud_to_packed_first_idx, - const torch::Tensor& num_points_per_cloud, - const std::tuple image_size, - const torch::Tensor& radius, - const int bin_size, - const int max_points_per_bin); -#endif diff --git a/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu b/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu deleted file mode 100644 index 21ff7e504d9478eec865c3f12e3740313ce3bf88..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu +++ /dev/null @@ -1,823 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "rasterize_points/rasterization_utils.cuh" -#include "utils/float_math.cuh" -#include "utils/geometry_utils.cuh" - -namespace { -// A structure for holding details about a pixel. -struct Pixel { - float z; - int64_t idx; // idx of face - float dist; // abs distance of pixel to face - float3 bary; -}; - -__device__ bool operator<(const Pixel& a, const Pixel& b) { - return a.z < b.z || (a.z == b.z && a.idx < b.idx); -} - -// Get the xyz coordinates of the three vertices for the face given by the -// index face_idx into face_verts. -__device__ thrust::tuple GetSingleFaceVerts( - const float* face_verts, - int face_idx) { - const float x0 = face_verts[face_idx * 9 + 0]; - const float y0 = face_verts[face_idx * 9 + 1]; - const float z0 = face_verts[face_idx * 9 + 2]; - const float x1 = face_verts[face_idx * 9 + 3]; - const float y1 = face_verts[face_idx * 9 + 4]; - const float z1 = face_verts[face_idx * 9 + 5]; - const float x2 = face_verts[face_idx * 9 + 6]; - const float y2 = face_verts[face_idx * 9 + 7]; - const float z2 = face_verts[face_idx * 9 + 8]; - - const float3 v0xyz = make_float3(x0, y0, z0); - const float3 v1xyz = make_float3(x1, y1, z1); - const float3 v2xyz = make_float3(x2, y2, z2); - - return thrust::make_tuple(v0xyz, v1xyz, v2xyz); -} - -// Get the min/max x/y/z values for the face given by vertices v0, v1, v2. -__device__ thrust::tuple -GetFaceBoundingBox(float3 v0, float3 v1, float3 v2) { - const float xmin = FloatMin3(v0.x, v1.x, v2.x); - const float ymin = FloatMin3(v0.y, v1.y, v2.y); - const float zmin = FloatMin3(v0.z, v1.z, v2.z); - const float xmax = FloatMax3(v0.x, v1.x, v2.x); - const float ymax = FloatMax3(v0.y, v1.y, v2.y); - const float zmax = FloatMax3(v0.z, v1.z, v2.z); - - return thrust::make_tuple( - make_float2(xmin, xmax), - make_float2(ymin, ymax), - make_float2(zmin, zmax)); -} - -// Check if the point (px, py) lies outside the face bounding box face_bbox. -// Return true if the point is outside. -__device__ bool CheckPointOutsideBoundingBox( - float3 v0, - float3 v1, - float3 v2, - float blur_radius, - float2 pxy) { - const auto bbox = GetFaceBoundingBox(v0, v1, v2); - const float2 xlims = thrust::get<0>(bbox); - const float2 ylims = thrust::get<1>(bbox); - const float2 zlims = thrust::get<2>(bbox); - - const float x_min = xlims.x - blur_radius; - const float y_min = ylims.x - blur_radius; - const float x_max = xlims.y + blur_radius; - const float y_max = ylims.y + blur_radius; - - // Faces with at least one vertex behind the camera won't render correctly - // and should be removed or clipped before calling the rasterizer - const bool z_invalid = zlims.x < kEpsilon; - - // Check if the current point is oustside the triangle bounding box. - return ( - pxy.x > x_max || pxy.x < x_min || pxy.y > y_max || pxy.y < y_min || - z_invalid); -} - -// This function checks if a pixel given by xy location pxy lies within the -// face with index face_idx in face_verts. One of the inputs is a list (q) -// which contains Pixel structs with the indices of the faces which intersect -// with this pixel sorted by closest z distance. If the point pxy lies in the -// face, the list (q) is updated and re-orderered in place. In addition -// the auxiliary variables q_size, q_max_z and q_max_idx are also modified. -// This code is shared between RasterizeMeshesNaiveCudaKernel and -// RasterizeMeshesFineCudaKernel. -template -__device__ void CheckPixelInsideFace( - const float* face_verts, // (F, 3, 3) - const int64_t* clipped_faces_neighbor_idx, // (F,) - const int face_idx, - int& q_size, - float& q_max_z, - int& q_max_idx, - FaceQ& q, - const float blur_radius, - const float2 pxy, // Coordinates of the pixel - const int K, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces) { - const auto v012 = GetSingleFaceVerts(face_verts, face_idx); - const float3 v0 = thrust::get<0>(v012); - const float3 v1 = thrust::get<1>(v012); - const float3 v2 = thrust::get<2>(v012); - - // Only need xy for barycentric coordinates and distance calculations. - const float2 v0xy = make_float2(v0.x, v0.y); - const float2 v1xy = make_float2(v1.x, v1.y); - const float2 v2xy = make_float2(v2.x, v2.y); - - // Perform checks and skip if: - // 1. the face is behind the camera - // 2. the face is facing away from the camera - // 3. the face has very small face area - // 4. the pixel is outside the face bbox - const float zmax = FloatMax3(v0.z, v1.z, v2.z); - const bool outside_bbox = CheckPointOutsideBoundingBox( - v0, v1, v2, sqrt(blur_radius), pxy); // use sqrt of blur for bbox - const float face_area = EdgeFunctionForward(v0xy, v1xy, v2xy); - // Check if the face is visible to the camera. - const bool back_face = face_area < 0.0; - const bool zero_face_area = - (face_area <= kEpsilon && face_area >= -1.0f * kEpsilon); - - if (zmax < 0 || cull_backfaces && back_face || outside_bbox || - zero_face_area) { - return; - } - - // Calculate barycentric coords and euclidean dist to triangle. - const float3 p_bary0 = BarycentricCoordsForward(pxy, v0xy, v1xy, v2xy); - const float3 p_bary = !perspective_correct - ? p_bary0 - : BarycentricPerspectiveCorrectionForward(p_bary0, v0.z, v1.z, v2.z); - const float3 p_bary_clip = - !clip_barycentric_coords ? p_bary : BarycentricClipForward(p_bary); - - const float pz = - p_bary_clip.x * v0.z + p_bary_clip.y * v1.z + p_bary_clip.z * v2.z; - - if (pz < 0) { - return; // Face is behind the image plane. - } - - // Get abs squared distance - const float dist = PointTriangleDistanceForward(pxy, v0xy, v1xy, v2xy); - - // Use the unclipped bary coordinates to determine if the point is inside the - // face. - const bool inside = p_bary.x > 0.0f && p_bary.y > 0.0f && p_bary.z > 0.0f; - const float signed_dist = inside ? -dist : dist; - // Check if pixel is outside blur region - if (!inside && dist >= blur_radius) { - return; - } - - // Handle the case where a face (f) partially behind the image plane is - // clipped to a quadrilateral and then split into two faces (t1, t2). In this - // case we: - // 1. Find the index of the neighboring face (e.g. for t1 need index of t2) - // 2. Check if the neighboring face (t2) is already in the top K faces - // 3. If yes, compare the distance of the pixel to t1 with the distance to t2. - // 4. If dist_t1 < dist_t2, overwrite the values for t2 in the top K faces. - const int neighbor_idx = clipped_faces_neighbor_idx[face_idx]; - int neighbor_idx_top_k = -1; - - // Check if neighboring face is already in the top K. - // -1 is the fill value in clipped_faces_neighbor_idx - if (neighbor_idx != -1) { - // Only need to loop until q_size. - for (int i = 0; i < q_size; i++) { - if (q[i].idx == neighbor_idx) { - neighbor_idx_top_k = i; - break; - } - } - } - // If neighbor idx is not -1 then it is in the top K struct. - if (neighbor_idx_top_k != -1) { - // If dist of current face is less than neighbor then overwrite the - // neighbor face values in the top K struct. - float neighbor_dist = abs(q[neighbor_idx_top_k].dist); - if (dist < neighbor_dist) { - // Overwrite the neighbor face values - q[neighbor_idx_top_k] = {pz, face_idx, signed_dist, p_bary_clip}; - - // If pz > q_max then overwrite the max values and index of the max. - // q_size stays the same. - if (pz > q_max_z) { - q_max_z = pz; - q_max_idx = neighbor_idx_top_k; - } - } - } else { - // Handle as a normal face - if (q_size < K) { - // Just insert it. - q[q_size] = {pz, face_idx, signed_dist, p_bary_clip}; - if (pz > q_max_z) { - q_max_z = pz; - q_max_idx = q_size; - } - q_size++; - } else if (pz < q_max_z) { - // Overwrite the old max, and find the new max. - q[q_max_idx] = {pz, face_idx, signed_dist, p_bary_clip}; - q_max_z = pz; - for (int i = 0; i < K; i++) { - if (q[i].z > q_max_z) { - q_max_z = q[i].z; - q_max_idx = i; - } - } - } - } -} - -} // namespace - -// **************************************************************************** -// * NAIVE RASTERIZATION * -// **************************************************************************** -__global__ void RasterizeMeshesNaiveCudaKernel( - const float* face_verts, - const int64_t* mesh_to_face_first_idx, - const int64_t* num_faces_per_mesh, - const int64_t* clipped_faces_neighbor_idx, - const float blur_radius, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces, - const int N, - const int H, - const int W, - const int K, - int64_t* face_idxs, - float* zbuf, - float* pix_dists, - float* bary) { - // Simple version: One thread per output pixel - int num_threads = gridDim.x * blockDim.x; - int tid = blockDim.x * blockIdx.x + threadIdx.x; - - for (int i = tid; i < N * H * W; i += num_threads) { - // Convert linear index to 3D index - const int n = i / (H * W); // batch index. - const int pix_idx = i % (H * W); - - // Reverse ordering of X and Y axes - const int yi = H - 1 - pix_idx / W; - const int xi = W - 1 - pix_idx % W; - - // screen coordinates to ndc coordinates of pixel. - const float xf = PixToNonSquareNdc(xi, W, H); - const float yf = PixToNonSquareNdc(yi, H, W); - const float2 pxy = make_float2(xf, yf); - - // For keeping track of the K closest points we want a data structure - // that (1) gives O(1) access to the closest point for easy comparisons, - // and (2) allows insertion of new elements. In the CPU version we use - // std::priority_queue; then (2) is O(log K). We can't use STL - // containers in CUDA; we could roll our own max heap in an array, but - // that would likely have a lot of warp divergence so we do something - // simpler instead: keep the elements in an unsorted array, but keep - // track of the max value and the index of the max value. Then (1) is - // still O(1) time, while (2) is O(K) with a clean loop. Since K <= 8 - // this should be fast enough for our purposes. - Pixel q[kMaxPointsPerPixel]; - int q_size = 0; - float q_max_z = -1000; - int q_max_idx = -1; - - // Using the batch index of the thread get the start and stop - // indices for the faces. - const int64_t face_start_idx = mesh_to_face_first_idx[n]; - const int64_t face_stop_idx = face_start_idx + num_faces_per_mesh[n]; - - // Loop through the faces in the mesh. - for (int f = face_start_idx; f < face_stop_idx; ++f) { - // Check if the pixel pxy is inside the face bounding box and if it is, - // update q, q_size, q_max_z and q_max_idx in place. - - CheckPixelInsideFace( - face_verts, - clipped_faces_neighbor_idx, - f, - q_size, - q_max_z, - q_max_idx, - q, - blur_radius, - pxy, - K, - perspective_correct, - clip_barycentric_coords, - cull_backfaces); - } - - // TODO: make sorting an option as only top k is needed, not sorted values. - BubbleSort(q, q_size); - int idx = n * H * W * K + pix_idx * K; - - for (int k = 0; k < q_size; ++k) { - face_idxs[idx + k] = q[k].idx; - zbuf[idx + k] = q[k].z; - pix_dists[idx + k] = q[k].dist; - bary[(idx + k) * 3 + 0] = q[k].bary.x; - bary[(idx + k) * 3 + 1] = q[k].bary.y; - bary[(idx + k) * 3 + 2] = q[k].bary.z; - } - } -} - -std::tuple -RasterizeMeshesNaiveCuda( - const at::Tensor& face_verts, - const at::Tensor& mesh_to_faces_packed_first_idx, - const at::Tensor& num_faces_per_mesh, - const at::Tensor& clipped_faces_neighbor_idx, - const std::tuple image_size, - const float blur_radius, - const int num_closest, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces) { - TORCH_CHECK( - face_verts.ndimension() == 3 && face_verts.size(1) == 3 && - face_verts.size(2) == 3, - "face_verts must have dimensions (num_faces, 3, 3)"); - - TORCH_CHECK( - num_faces_per_mesh.size(0) == mesh_to_faces_packed_first_idx.size(0), - "num_faces_per_mesh must have save size first dimension as mesh_to_faces_packed_first_idx"); - - TORCH_CHECK( - clipped_faces_neighbor_idx.size(0) == face_verts.size(0), - "clipped_faces_neighbor_idx must have save size first dimension as face_verts"); - - if (num_closest > kMaxPointsPerPixel) { - std::stringstream ss; - ss << "Must have points_per_pixel <= " << kMaxPointsPerPixel; - AT_ERROR(ss.str()); - } - - // Check inputs are on the same device - at::TensorArg face_verts_t{face_verts, "face_verts", 1}, - mesh_to_faces_packed_first_idx_t{ - mesh_to_faces_packed_first_idx, "mesh_to_faces_packed_first_idx", 2}, - num_faces_per_mesh_t{num_faces_per_mesh, "num_faces_per_mesh", 3}, - clipped_faces_neighbor_idx_t{ - clipped_faces_neighbor_idx, "clipped_faces_neighbor_idx", 4}; - at::CheckedFrom c = "RasterizeMeshesNaiveCuda"; - at::checkAllSameGPU( - c, - {face_verts_t, - mesh_to_faces_packed_first_idx_t, - num_faces_per_mesh_t, - clipped_faces_neighbor_idx_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(face_verts.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int N = num_faces_per_mesh.size(0); // batch size. - const int H = std::get<0>(image_size); - const int W = std::get<1>(image_size); - const int K = num_closest; - - auto long_opts = num_faces_per_mesh.options().dtype(at::kLong); - auto float_opts = face_verts.options().dtype(at::kFloat); - - at::Tensor face_idxs = at::full({N, H, W, K}, -1, long_opts); - at::Tensor zbuf = at::full({N, H, W, K}, -1, float_opts); - at::Tensor pix_dists = at::full({N, H, W, K}, -1, float_opts); - at::Tensor bary = at::full({N, H, W, K, 3}, -1, float_opts); - - if (face_idxs.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(face_idxs, zbuf, bary, pix_dists); - } - - const size_t blocks = 1024; - const size_t threads = 64; - - RasterizeMeshesNaiveCudaKernel<<>>( - face_verts.contiguous().data_ptr(), - mesh_to_faces_packed_first_idx.contiguous().data_ptr(), - num_faces_per_mesh.contiguous().data_ptr(), - clipped_faces_neighbor_idx.contiguous().data_ptr(), - blur_radius, - perspective_correct, - clip_barycentric_coords, - cull_backfaces, - N, - H, - W, - K, - face_idxs.data_ptr(), - zbuf.data_ptr(), - pix_dists.data_ptr(), - bary.data_ptr()); - - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(face_idxs, zbuf, bary, pix_dists); -} - -// **************************************************************************** -// * BACKWARD PASS * -// **************************************************************************** -// TODO: benchmark parallelizing over faces_verts instead of over pixels. -__global__ void RasterizeMeshesBackwardCudaKernel( - const float* face_verts, // (F, 3, 3) - const int64_t* pix_to_face, // (N, H, W, K) - const bool perspective_correct, - const bool clip_barycentric_coords, - const int N, - const int H, - const int W, - const int K, - const float* grad_zbuf, // (N, H, W, K) - const float* grad_bary, // (N, H, W, K, 3) - const float* grad_dists, // (N, H, W, K) - float* grad_face_verts) { // (F, 3, 3) - - // Parallelize over each pixel in images of - // size H * W, for each image in the batch of size N. - const int num_threads = gridDim.x * blockDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - for (int t_i = tid; t_i < N * H * W; t_i += num_threads) { - // Convert linear index to 3D index - const int n = t_i / (H * W); // batch index. - const int pix_idx = t_i % (H * W); - - // Reverse ordering of X and Y axes. - const int yi = H - 1 - pix_idx / W; - const int xi = W - 1 - pix_idx % W; - - const float xf = PixToNonSquareNdc(xi, W, H); - const float yf = PixToNonSquareNdc(yi, H, W); - const float2 pxy = make_float2(xf, yf); - - // Loop over all the faces for this pixel. - for (int k = 0; k < K; k++) { - // Index into (N, H, W, K, :) grad tensors - // pixel index + top k index - int i = n * H * W * K + pix_idx * K + k; - - const int f = pix_to_face[i]; - if (f < 0) { - continue; // padded face. - } - // Get xyz coordinates of the three face vertices. - const auto v012 = GetSingleFaceVerts(face_verts, f); - const float3 v0 = thrust::get<0>(v012); - const float3 v1 = thrust::get<1>(v012); - const float3 v2 = thrust::get<2>(v012); - - // Only neex xy for barycentric coordinate and distance calculations. - const float2 v0xy = make_float2(v0.x, v0.y); - const float2 v1xy = make_float2(v1.x, v1.y); - const float2 v2xy = make_float2(v2.x, v2.y); - - // Get upstream gradients for the face. - const float grad_dist_upstream = grad_dists[i]; - const float grad_zbuf_upstream = grad_zbuf[i]; - const float grad_bary_upstream_w0 = grad_bary[i * 3 + 0]; - const float grad_bary_upstream_w1 = grad_bary[i * 3 + 1]; - const float grad_bary_upstream_w2 = grad_bary[i * 3 + 2]; - const float3 grad_bary_upstream = make_float3( - grad_bary_upstream_w0, grad_bary_upstream_w1, grad_bary_upstream_w2); - - const float3 b_w = BarycentricCoordsForward(pxy, v0xy, v1xy, v2xy); - const float3 b_pp = !perspective_correct - ? b_w - : BarycentricPerspectiveCorrectionForward(b_w, v0.z, v1.z, v2.z); - - const float3 b_w_clip = - !clip_barycentric_coords ? b_pp : BarycentricClipForward(b_pp); - - const bool inside = b_pp.x > 0.0f && b_pp.y > 0.0f && b_pp.z > 0.0f; - const float sign = inside ? -1.0f : 1.0f; - - auto grad_dist_f = PointTriangleDistanceBackward( - pxy, v0xy, v1xy, v2xy, sign * grad_dist_upstream); - const float2 ddist_d_v0 = thrust::get<1>(grad_dist_f); - const float2 ddist_d_v1 = thrust::get<2>(grad_dist_f); - const float2 ddist_d_v2 = thrust::get<3>(grad_dist_f); - - // Upstream gradient for barycentric coords from zbuf calculation: - // zbuf = bary_w0 * z0 + bary_w1 * z1 + bary_w2 * z2 - // Therefore - // d_zbuf/d_bary_w0 = z0 - // d_zbuf/d_bary_w1 = z1 - // d_zbuf/d_bary_w2 = z2 - const float3 d_zbuf_d_bwclip = make_float3(v0.z, v1.z, v2.z); - - // Total upstream barycentric gradients are the sum of - // external upstream gradients and contribution from zbuf. - const float3 grad_bary_f_sum = - (grad_bary_upstream + grad_zbuf_upstream * d_zbuf_d_bwclip); - - float3 grad_bary0 = grad_bary_f_sum; - - if (clip_barycentric_coords) { - grad_bary0 = BarycentricClipBackward(b_w, grad_bary_f_sum); - } - - float dz0_persp = 0.0f, dz1_persp = 0.0f, dz2_persp = 0.0f; - if (perspective_correct) { - auto perspective_grads = BarycentricPerspectiveCorrectionBackward( - b_w, v0.z, v1.z, v2.z, grad_bary0); - grad_bary0 = thrust::get<0>(perspective_grads); - dz0_persp = thrust::get<1>(perspective_grads); - dz1_persp = thrust::get<2>(perspective_grads); - dz2_persp = thrust::get<3>(perspective_grads); - } - - auto grad_bary_f = - BarycentricCoordsBackward(pxy, v0xy, v1xy, v2xy, grad_bary0); - const float2 dbary_d_v0 = thrust::get<1>(grad_bary_f); - const float2 dbary_d_v1 = thrust::get<2>(grad_bary_f); - const float2 dbary_d_v2 = thrust::get<3>(grad_bary_f); - - atomicAdd(grad_face_verts + f * 9 + 0, dbary_d_v0.x + ddist_d_v0.x); - atomicAdd(grad_face_verts + f * 9 + 1, dbary_d_v0.y + ddist_d_v0.y); - atomicAdd( - grad_face_verts + f * 9 + 2, - grad_zbuf_upstream * b_w_clip.x + dz0_persp); - atomicAdd(grad_face_verts + f * 9 + 3, dbary_d_v1.x + ddist_d_v1.x); - atomicAdd(grad_face_verts + f * 9 + 4, dbary_d_v1.y + ddist_d_v1.y); - atomicAdd( - grad_face_verts + f * 9 + 5, - grad_zbuf_upstream * b_w_clip.y + dz1_persp); - atomicAdd(grad_face_verts + f * 9 + 6, dbary_d_v2.x + ddist_d_v2.x); - atomicAdd(grad_face_verts + f * 9 + 7, dbary_d_v2.y + ddist_d_v2.y); - atomicAdd( - grad_face_verts + f * 9 + 8, - grad_zbuf_upstream * b_w_clip.z + dz2_persp); - } - } -} - -at::Tensor RasterizeMeshesBackwardCuda( - const at::Tensor& face_verts, // (F, 3, 3) - const at::Tensor& pix_to_face, // (N, H, W, K) - const at::Tensor& grad_zbuf, // (N, H, W, K) - const at::Tensor& grad_bary, // (N, H, W, K, 3) - const at::Tensor& grad_dists, // (N, H, W, K) - const bool perspective_correct, - const bool clip_barycentric_coords) { - // Check inputs are on the same device - at::TensorArg face_verts_t{face_verts, "face_verts", 1}, - pix_to_face_t{pix_to_face, "pix_to_face", 2}, - grad_zbuf_t{grad_zbuf, "grad_zbuf", 3}, - grad_bary_t{grad_bary, "grad_bary", 4}, - grad_dists_t{grad_dists, "grad_dists", 5}; - at::CheckedFrom c = "RasterizeMeshesBackwardCuda"; - at::checkAllSameGPU( - c, {face_verts_t, pix_to_face_t, grad_zbuf_t, grad_bary_t, grad_dists_t}); - at::checkAllSameType( - c, {face_verts_t, grad_zbuf_t, grad_bary_t, grad_dists_t}); - - // This is nondeterministic because atomicAdd - at::globalContext().alertNotDeterministic("RasterizeMeshesBackwardCuda"); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(face_verts.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int F = face_verts.size(0); - const int N = pix_to_face.size(0); - const int H = pix_to_face.size(1); - const int W = pix_to_face.size(2); - const int K = pix_to_face.size(3); - - at::Tensor grad_face_verts = at::zeros({F, 3, 3}, face_verts.options()); - - if (grad_face_verts.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return grad_face_verts; - } - - const size_t blocks = 1024; - const size_t threads = 64; - - RasterizeMeshesBackwardCudaKernel<<>>( - face_verts.contiguous().data_ptr(), - pix_to_face.contiguous().data_ptr(), - perspective_correct, - clip_barycentric_coords, - N, - H, - W, - K, - grad_zbuf.contiguous().data_ptr(), - grad_bary.contiguous().data_ptr(), - grad_dists.contiguous().data_ptr(), - grad_face_verts.data_ptr()); - - AT_CUDA_CHECK(cudaGetLastError()); - return grad_face_verts; -} - -// **************************************************************************** -// * FINE RASTERIZATION * -// **************************************************************************** -__global__ void RasterizeMeshesFineCudaKernel( - const float* face_verts, // (F, 3, 3) - const int32_t* bin_faces, // (N, BH, BW, T) - const int64_t* clipped_faces_neighbor_idx, // (F,) - const float blur_radius, - const int bin_size, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces, - const int N, - const int BH, - const int BW, - const int M, - const int H, - const int W, - const int K, - int64_t* face_idxs, // (N, H, W, K) - float* zbuf, // (N, H, W, K) - float* pix_dists, // (N, H, W, K) - float* bary // (N, H, W, K, 3) -) { - // This can be more than H * W if H or W are not divisible by bin_size. - int num_pixels = N * BH * BW * bin_size * bin_size; - int num_threads = gridDim.x * blockDim.x; - int tid = blockIdx.x * blockDim.x + threadIdx.x; - - for (int pid = tid; pid < num_pixels; pid += num_threads) { - // Convert linear index into bin and pixel indices. We make the within - // block pixel ids move the fastest, so that adjacent threads will fall - // into the same bin; this should give them coalesced memory reads when - // they read from faces and bin_faces. - int i = pid; - const int n = i / (BH * BW * bin_size * bin_size); - i %= BH * BW * bin_size * bin_size; - // bin index y - const int by = i / (BW * bin_size * bin_size); - i %= BW * bin_size * bin_size; - // bin index y - const int bx = i / (bin_size * bin_size); - // pixel within the bin - i %= bin_size * bin_size; - - // Pixel x, y indices - const int yi = i / bin_size + by * bin_size; - const int xi = i % bin_size + bx * bin_size; - - if (yi >= H || xi >= W) - continue; - - const float xf = PixToNonSquareNdc(xi, W, H); - const float yf = PixToNonSquareNdc(yi, H, W); - - const float2 pxy = make_float2(xf, yf); - - // This part looks like the naive rasterization kernel, except we use - // bin_faces to only look at a subset of faces already known to fall - // in this bin. TODO abstract out this logic into some data structure - // that is shared by both kernels? - Pixel q[kMaxPointsPerPixel]; - int q_size = 0; - float q_max_z = -1000; - int q_max_idx = -1; - - for (int m = 0; m < M; m++) { - const int f = bin_faces[n * BH * BW * M + by * BW * M + bx * M + m]; - if (f < 0) { - continue; // bin_faces uses -1 as a sentinal value. - } - // Check if the pixel pxy is inside the face bounding box and if it is, - // update q, q_size, q_max_z and q_max_idx in place. - CheckPixelInsideFace( - face_verts, - clipped_faces_neighbor_idx, - f, - q_size, - q_max_z, - q_max_idx, - q, - blur_radius, - pxy, - K, - perspective_correct, - clip_barycentric_coords, - cull_backfaces); - } - - // Now we've looked at all the faces for this bin, so we can write - // output for the current pixel. - // TODO: make sorting an option as only top k is needed, not sorted values. - BubbleSort(q, q_size); - - // Reverse ordering of the X and Y axis so that - // in the image +Y is pointing up and +X is pointing left. - const int yidx = H - 1 - yi; - const int xidx = W - 1 - xi; - - const int pix_idx = n * H * W * K + yidx * W * K + xidx * K; - for (int k = 0; k < q_size; k++) { - face_idxs[pix_idx + k] = q[k].idx; - zbuf[pix_idx + k] = q[k].z; - pix_dists[pix_idx + k] = q[k].dist; - bary[(pix_idx + k) * 3 + 0] = q[k].bary.x; - bary[(pix_idx + k) * 3 + 1] = q[k].bary.y; - bary[(pix_idx + k) * 3 + 2] = q[k].bary.z; - } - } -} - -std::tuple -RasterizeMeshesFineCuda( - const at::Tensor& face_verts, - const at::Tensor& bin_faces, - const at::Tensor& clipped_faces_neighbor_idx, - const std::tuple image_size, - const float blur_radius, - const int bin_size, - const int faces_per_pixel, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces) { - TORCH_CHECK( - face_verts.ndimension() == 3 && face_verts.size(1) == 3 && - face_verts.size(2) == 3, - "face_verts must have dimensions (num_faces, 3, 3)"); - TORCH_CHECK(bin_faces.ndimension() == 4, "bin_faces must have 4 dimensions"); - TORCH_CHECK( - clipped_faces_neighbor_idx.size(0) == face_verts.size(0), - "clipped_faces_neighbor_idx must have the same first dimension as face_verts"); - - // Check inputs are on the same device - at::TensorArg face_verts_t{face_verts, "face_verts", 1}, - bin_faces_t{bin_faces, "bin_faces", 2}, - clipped_faces_neighbor_idx_t{ - clipped_faces_neighbor_idx, "clipped_faces_neighbor_idx", 3}; - at::CheckedFrom c = "RasterizeMeshesFineCuda"; - at::checkAllSameGPU( - c, {face_verts_t, bin_faces_t, clipped_faces_neighbor_idx_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(face_verts.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - // bin_faces shape (N, BH, BW, M) - const int N = bin_faces.size(0); - const int BH = bin_faces.size(1); - const int BW = bin_faces.size(2); - const int M = bin_faces.size(3); - const int K = faces_per_pixel; - - const int H = std::get<0>(image_size); - const int W = std::get<1>(image_size); - - if (K > kMaxPointsPerPixel) { - AT_ERROR("Must have num_closest <= 150"); - } - auto long_opts = bin_faces.options().dtype(at::kLong); - auto float_opts = face_verts.options().dtype(at::kFloat); - - at::Tensor face_idxs = at::full({N, H, W, K}, -1, long_opts); - at::Tensor zbuf = at::full({N, H, W, K}, -1, float_opts); - at::Tensor pix_dists = at::full({N, H, W, K}, -1, float_opts); - at::Tensor bary = at::full({N, H, W, K, 3}, -1, float_opts); - - if (face_idxs.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(face_idxs, zbuf, bary, pix_dists); - } - - const size_t blocks = 1024; - const size_t threads = 64; - - RasterizeMeshesFineCudaKernel<<>>( - face_verts.contiguous().data_ptr(), - bin_faces.contiguous().data_ptr(), - clipped_faces_neighbor_idx.contiguous().data_ptr(), - blur_radius, - bin_size, - perspective_correct, - clip_barycentric_coords, - cull_backfaces, - N, - BH, - BW, - M, - H, - W, - K, - face_idxs.data_ptr(), - zbuf.data_ptr(), - pix_dists.data_ptr(), - bary.data_ptr()); - - return std::make_tuple(face_idxs, zbuf, bary, pix_dists); -} diff --git a/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h b/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h deleted file mode 100644 index 584aa0238ad86434567716d5c77f212a394b1d84..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h +++ /dev/null @@ -1,549 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include -#include "rasterize_coarse/rasterize_coarse.h" -#include "utils/pytorch3d_cutils.h" - -// **************************************************************************** -// * FORWARD PASS * -// **************************************************************************** - -std::tuple -RasterizeMeshesNaiveCpu( - const torch::Tensor& face_verts, - const torch::Tensor& mesh_to_face_first_idx, - const torch::Tensor& num_faces_per_mesh, - const torch::Tensor& clipped_faces_neighbor_idx, - const std::tuple image_size, - const float blur_radius, - const int faces_per_pixel, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces); - -#ifdef WITH_CUDA -std::tuple -RasterizeMeshesNaiveCuda( - const at::Tensor& face_verts, - const at::Tensor& mesh_to_face_first_idx, - const at::Tensor& num_faces_per_mesh, - const torch::Tensor& clipped_faces_neighbor_idx, - const std::tuple image_size, - const float blur_radius, - const int num_closest, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces); -#endif -// Forward pass for rasterizing a batch of meshes. -// -// Args: -// face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for -// faces in all the meshes in the batch. Concretely, -// face_verts[f, i] = [x, y, z] gives the coordinates for the -// ith vertex of the fth face. These vertices are expected to be -// in NDC coordinates in the range [-1, 1]. -// mesh_to_face_first_idx: LongTensor of shape (N) giving the index in -// faces_verts of the first face in each mesh in -// the batch where N is the batch size. -// num_faces_per_mesh: LongTensor of shape (N) giving the number of faces -// for each mesh in the batch. -// clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the -// index of the neighboring face for each face which was clipped to a -// quadrilateral and then divided into two triangles. -// e.g. for a face f partially behind the image plane which is split into -// two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx -// Faces which are not clipped and subdivided are set to -1. -// image_size: Tuple (H, W) giving the size in pixels of the output -// image to be rasterized. -// blur_radius: float distance in NDC coordinates uses to expand the face -// bounding boxes for the rasterization. Set to 0.0 if no blur -// is required. -// faces_per_pixel: the number of closeset faces to rasterize per pixel. -// perspective_correct: Whether to apply perspective correction when -// computing barycentric coordinates. If this is True, -// then this function returns world-space barycentric -// coordinates for each pixel; if this is False then -// this function instead returns screen-space -// barycentric coordinates for each pixel. -// clip_barycentric_coords: Whether, after any perspective correction -// is applied but before the depth is calculated (e.g. for -// z clipping), to "correct" a location outside the face (i.e. with -// a negative barycentric coordinate) to a position on the edge of the -// face. -// cull_backfaces: Bool, Whether to only rasterize mesh faces which are -// visible to the camera. This assumes that vertices of -// front-facing triangles are ordered in an anti-clockwise -// fashion, and triangles that face away from the camera are -// in a clockwise order relative to the current view -// direction. NOTE: This will only work if the mesh faces are -// consistently defined with counter-clockwise ordering when -// viewed from the outside. -// -// Returns: -// A 4 element tuple of: -// pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of -// each of the closest faces to the pixel in the rasterized -// image, or -1 for pixels that are not covered by any face. -// zbuf: float32 Tensor of shape (N, H, W, K) giving the depth of each of -// the closest faces for each pixel. -// barycentric_coords: float tensor of shape (N, H, W, K, 3) giving -// barycentric coordinates of the pixel with respect to -// each of the closest faces along the z axis, padded -// with -1 for pixels hit by fewer than -// faces_per_pixel faces. -// dists: float tensor of shape (N, H, W, K) giving the euclidean distance -// in the (NDC) x/y plane between each pixel and its K closest -// faces along the z axis padded with -1 for pixels hit by fewer than -// faces_per_pixel faces. -inline std::tuple -RasterizeMeshesNaive( - const torch::Tensor& face_verts, - const torch::Tensor& mesh_to_face_first_idx, - const torch::Tensor& num_faces_per_mesh, - const torch::Tensor& clipped_faces_neighbor_idx, - const std::tuple image_size, - const float blur_radius, - const int faces_per_pixel, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces) { - // TODO: Better type checking. - if (face_verts.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(face_verts); - CHECK_CUDA(mesh_to_face_first_idx); - CHECK_CUDA(num_faces_per_mesh); - return RasterizeMeshesNaiveCuda( - face_verts, - mesh_to_face_first_idx, - num_faces_per_mesh, - clipped_faces_neighbor_idx, - image_size, - blur_radius, - faces_per_pixel, - perspective_correct, - clip_barycentric_coords, - cull_backfaces); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return RasterizeMeshesNaiveCpu( - face_verts, - mesh_to_face_first_idx, - num_faces_per_mesh, - clipped_faces_neighbor_idx, - image_size, - blur_radius, - faces_per_pixel, - perspective_correct, - clip_barycentric_coords, - cull_backfaces); - } -} - -// **************************************************************************** -// * BACKWARD PASS * -// **************************************************************************** - -torch::Tensor RasterizeMeshesBackwardCpu( - const torch::Tensor& face_verts, - const torch::Tensor& pix_to_face, - const torch::Tensor& grad_zbuf, - const torch::Tensor& grad_bary, - const torch::Tensor& grad_dists, - const bool perspective_correct, - const bool clip_barycentric_coords); - -#ifdef WITH_CUDA -torch::Tensor RasterizeMeshesBackwardCuda( - const torch::Tensor& face_verts, - const torch::Tensor& pix_to_face, - const torch::Tensor& grad_zbuf, - const torch::Tensor& grad_bary, - const torch::Tensor& grad_dists, - const bool perspective_correct, - const bool clip_barycentric_coords); -#endif - -// Args: -// face_verts: float32 Tensor of shape (F, 3, 3) (from forward pass) giving -// (packed) vertex positions for faces in all the meshes in -// the batch. -// pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of -// each of the closest faces to the pixel in the rasterized -// image, or -1 for pixels that are not covered by any face. -// grad_zbuf: Tensor of shape (N, H, W, K) giving upstream gradients -// d(loss)/d(zbuf) of the zbuf tensor from the forward pass. -// grad_bary: Tensor of shape (N, H, W, K, 3) giving upstream gradients -// d(loss)/d(bary) of the barycentric_coords tensor returned by -// the forward pass. -// grad_dists: Tensor of shape (N, H, W, K) giving upstream gradients -// d(loss)/d(dists) of the dists tensor from the forward pass. -// perspective_correct: Whether to apply perspective correction when -// computing barycentric coordinates. If this is True, -// then this function returns world-space barycentric -// coordinates for each pixel; if this is False then -// this function instead returns screen-space -// barycentric coordinates for each pixel. -// clip_barycentric_coords: Whether, after any perspective correction -// is applied but before the depth is calculated (e.g. for -// z clipping), to "correct" a location outside the face (i.e. with -// a negative barycentric coordinate) to a position on the edge of the -// face. -// -// Returns: -// grad_face_verts: float32 Tensor of shape (F, 3, 3) giving downstream -// gradients for the face vertices. -torch::Tensor RasterizeMeshesBackward( - const torch::Tensor& face_verts, - const torch::Tensor& pix_to_face, - const torch::Tensor& grad_zbuf, - const torch::Tensor& grad_bary, - const torch::Tensor& grad_dists, - const bool perspective_correct, - const bool clip_barycentric_coords) { - if (face_verts.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(face_verts); - CHECK_CUDA(pix_to_face); - CHECK_CUDA(grad_zbuf); - CHECK_CUDA(grad_bary); - CHECK_CUDA(grad_dists); - return RasterizeMeshesBackwardCuda( - face_verts, - pix_to_face, - grad_zbuf, - grad_bary, - grad_dists, - perspective_correct, - clip_barycentric_coords); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return RasterizeMeshesBackwardCpu( - face_verts, - pix_to_face, - grad_zbuf, - grad_bary, - grad_dists, - perspective_correct, - clip_barycentric_coords); - } -} - -// **************************************************************************** -// * COARSE RASTERIZATION * -// **************************************************************************** - -// RasterizeMeshesCoarseCuda in rasterize_coarse/rasterize_coarse.h - -torch::Tensor RasterizeMeshesCoarseCpu( - const torch::Tensor& face_verts, - const at::Tensor& mesh_to_face_first_idx, - const at::Tensor& num_faces_per_mesh, - const std::tuple image_size, - const float blur_radius, - const int bin_size, - const int max_faces_per_bin); - -// Args: -// face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for -// faces in all the meshes in the batch. Concretely, -// face_verts[f, i] = [x, y, z] gives the coordinates for the -// ith vertex of the fth face. These vertices are expected to be -// in NDC coordinates in the range [-1, 1]. -// mesh_to_face_first_idx: LongTensor of shape (N) giving the index in -// faces_verts of the first face in each mesh in -// the batch where N is the batch size. -// num_faces_per_mesh: LongTensor of shape (N) giving the number of faces -// for each mesh in the batch. -// image_size: Tuple (H, W) giving the size in pixels of the output -// image to be rasterized. -// blur_radius: float distance in NDC coordinates uses to expand the face -// bounding boxes for the rasterization. Set to 0.0 if no blur -// is required. -// bin_size: Size of each bin within the image (in pixels) -// max_faces_per_bin: Maximum number of faces to count in each bin. -// -// Returns: -// bin_face_idxs: Tensor of shape (N, num_bins, num_bins, K) giving the -// indices of faces that fall into each bin. - -torch::Tensor RasterizeMeshesCoarse( - const torch::Tensor& face_verts, - const torch::Tensor& mesh_to_face_first_idx, - const torch::Tensor& num_faces_per_mesh, - const std::tuple image_size, - const float blur_radius, - const int bin_size, - const int max_faces_per_bin) { - if (face_verts.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(face_verts); - CHECK_CUDA(mesh_to_face_first_idx); - CHECK_CUDA(num_faces_per_mesh); - return RasterizeMeshesCoarseCuda( - face_verts, - mesh_to_face_first_idx, - num_faces_per_mesh, - image_size, - blur_radius, - bin_size, - max_faces_per_bin); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return RasterizeMeshesCoarseCpu( - face_verts, - mesh_to_face_first_idx, - num_faces_per_mesh, - image_size, - blur_radius, - bin_size, - max_faces_per_bin); - } -} - -// **************************************************************************** -// * FINE RASTERIZATION * -// **************************************************************************** - -#ifdef WITH_CUDA -std::tuple -RasterizeMeshesFineCuda( - const torch::Tensor& face_verts, - const torch::Tensor& bin_faces, - const torch::Tensor& clipped_faces_neighbor_idx, - const std::tuple image_size, - const float blur_radius, - const int bin_size, - const int faces_per_pixel, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces); -#endif -// Args: -// face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for -// faces in all the meshes in the batch. Concretely, -// face_verts[f, i] = [x, y, z] gives the coordinates for the -// ith vertex of the fth face. These vertices are expected to be -// in NDC coordinates in the range [-1, 1]. -// bin_faces: int32 Tensor of shape (N, B, B, M) giving the indices of faces -// that fall into each bin (output from coarse rasterization). -// clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the -// index of the neighboring face for each face which was clipped to a -// quadrilateral and then divided into two triangles. -// e.g. for a face f partially behind the image plane which is split into -// two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx -// Faces which are not clipped and subdivided are set to -1. -// image_size: Tuple (H, W) giving the size in pixels of the output -// image to be rasterized. -// blur_radius: float distance in NDC coordinates uses to expand the face -// bounding boxes for the rasterization. Set to 0.0 if no blur -// is required. -// bin_size: Size of each bin within the image (in pixels) -// faces_per_pixel: the number of closeset faces to rasterize per pixel. -// perspective_correct: Whether to apply perspective correction when -// computing barycentric coordinates. If this is True, -// then this function returns world-space barycentric -// coordinates for each pixel; if this is False then -// this function instead returns screen-space -// barycentric coordinates for each pixel. -// clip_barycentric_coords: Whether, after any perspective correction -// is applied but before the depth is calculated (e.g. for -// z clipping), to "correct" a location outside the face (i.e. with -// a negative barycentric coordinate) to a position on the edge of the -// face. -// cull_backfaces: Bool, Whether to only rasterize mesh faces which are -// visible to the camera. This assumes that vertices of -// front-facing triangles are ordered in an anti-clockwise -// fashion, and triangles that face away from the camera are -// in a clockwise order relative to the current view -// direction. NOTE: This will only work if the mesh faces are -// consistently defined with counter-clockwise ordering when -// viewed from the outside. -// -// Returns (same as rasterize_meshes): -// A 4 element tuple of: -// pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of -// each of the closest faces to the pixel in the rasterized -// image, or -1 for pixels that are not covered by any face. -// zbuf: float32 Tensor of shape (N, H, W, K) giving the depth of each of -// the closest faces for each pixel. -// barycentric_coords: float tensor of shape (N, H, W, K, 3) giving -// barycentric coordinates of the pixel with respect to -// each of the closest faces along the z axis, padded -// with -1 for pixels hit by fewer than -// faces_per_pixel faces. -// dists: float tensor of shape (N, H, W, K) giving the euclidean distance -// in the (NDC) x/y plane between each pixel and its K closest -// faces along the z axis padded with -1 for pixels hit by fewer than -// faces_per_pixel faces. -std::tuple -RasterizeMeshesFine( - const torch::Tensor& face_verts, - const torch::Tensor& bin_faces, - const torch::Tensor& clipped_faces_neighbor_idx, - const std::tuple image_size, - const float blur_radius, - const int bin_size, - const int faces_per_pixel, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces) { - if (face_verts.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(face_verts); - CHECK_CUDA(bin_faces); - return RasterizeMeshesFineCuda( - face_verts, - bin_faces, - clipped_faces_neighbor_idx, - image_size, - blur_radius, - bin_size, - faces_per_pixel, - perspective_correct, - clip_barycentric_coords, - cull_backfaces); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - AT_ERROR("NOT IMPLEMENTED"); - } -} - -// **************************************************************************** -// * MAIN ENTRY POINT * -// **************************************************************************** - -// This is the main entry point for the forward pass of the mesh rasterizer; -// it uses either naive or coarse-to-fine rasterization based on bin_size. -// -// Args: -// face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for -// faces in all the meshes in the batch. Concretely, -// face_verts[f, i] = [x, y, z] gives the coordinates for the -// ith vertex of the fth face. These vertices are expected to be -// in NDC coordinates in the range [-1, 1]. -// mesh_to_face_first_idx: LongTensor of shape (N) giving the index in -// faces_verts of the first face in each mesh in -// the batch where N is the batch size. -// num_faces_per_mesh: LongTensor of shape (N) giving the number of faces -// for each mesh in the batch. -// clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the -// index of the neighboring face for each face which was clipped to a -// quadrilateral and then divided into two triangles. -// e.g. for a face f partially behind the image plane which is split into -// two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx -// Faces which are not clipped and subdivided are set to -1. -// image_size: Tuple (H, W) giving the size in pixels of the output -// image to be rasterized. -// blur_radius: float distance in NDC coordinates uses to expand the face -// bounding boxes for the rasterization. Set to 0.0 if no blur -// is required. -// faces_per_pixel: the number of closeset faces to rasterize per pixel. -// bin_size: Bin size (in pixels) for coarse-to-fine rasterization. Setting -// bin_size=0 uses naive rasterization instead. -// max_faces_per_bin: The maximum number of faces allowed to fall into each -// bin when using coarse-to-fine rasterization. -// perspective_correct: Whether to apply perspective correction when -// computing barycentric coordinates. If this is True, -// then this function returns world-space barycentric -// coordinates for each pixel; if this is False then -// this function instead returns screen-space -// barycentric coordinates for each pixel. -// clip_barycentric_coords: Whether, after any perspective correction -// is applied but before the depth is calculated (e.g. for -// z clipping), to "correct" a location outside the face (i.e. with -// a negative barycentric coordinate) to a position on the edge of the -// face. -// cull_backfaces: Bool, Whether to only rasterize mesh faces which are -// visible to the camera. This assumes that vertices of -// front-facing triangles are ordered in an anti-clockwise -// fashion, and triangles that face away from the camera are -// in a clockwise order relative to the current view -// direction. NOTE: This will only work if the mesh faces are -// consistently defined with counter-clockwise ordering when -// viewed from the outside. -// -// Returns: -// A 4 element tuple of: -// pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of -// each of the closest faces to the pixel in the rasterized -// image, or -1 for pixels that are not covered by any face. -// zbuf: float32 Tensor of shape (N, H, W, K) giving the depth of each of -// the closest faces for each pixel. -// barycentric_coords: float tensor of shape (N, H, W, K, 3) giving -// barycentric coordinates of the pixel with respect to -// each of the closest faces along the z axis, padded -// with -1 for pixels hit by fewer than -// faces_per_pixel faces. -// dists: float tensor of shape (N, H, W, K) giving the euclidean distance -// in the (NDC) x/y plane between each pixel and its K closest -// faces along the z axis padded with -1 for pixels hit by fewer than -// faces_per_pixel faces. -std::tuple -RasterizeMeshes( - const torch::Tensor& face_verts, - const torch::Tensor& mesh_to_face_first_idx, - const torch::Tensor& num_faces_per_mesh, - const torch::Tensor& clipped_faces_neighbor_idx, - const std::tuple image_size, - const float blur_radius, - const int faces_per_pixel, - const int bin_size, - const int max_faces_per_bin, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces) { - if (bin_size > 0 && max_faces_per_bin > 0) { - // Use coarse-to-fine rasterization - at::Tensor bin_faces = RasterizeMeshesCoarse( - face_verts, - mesh_to_face_first_idx, - num_faces_per_mesh, - image_size, - blur_radius, - bin_size, - max_faces_per_bin); - return RasterizeMeshesFine( - face_verts, - bin_faces, - clipped_faces_neighbor_idx, - image_size, - blur_radius, - bin_size, - faces_per_pixel, - perspective_correct, - clip_barycentric_coords, - cull_backfaces); - } else { - // Use the naive per-pixel implementation - return RasterizeMeshesNaive( - face_verts, - mesh_to_face_first_idx, - num_faces_per_mesh, - clipped_faces_neighbor_idx, - image_size, - blur_radius, - faces_per_pixel, - perspective_correct, - clip_barycentric_coords, - cull_backfaces); - } -} diff --git a/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp b/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp deleted file mode 100644 index 210df55e43de5602c3c80b05e0ff8b9d7e59253a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp +++ /dev/null @@ -1,640 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include "ATen/core/TensorAccessor.h" -#include "rasterize_points/rasterization_utils.h" -#include "utils/geometry_utils.h" -#include "utils/vec2.h" -#include "utils/vec3.h" - -// Get (x, y, z) values for vertex from (3, 3) tensor face. -template -auto ExtractVerts(const Face& face, const int vertex_index) { - return std::make_tuple( - face[vertex_index][0], face[vertex_index][1], face[vertex_index][2]); -} - -// Compute min/max x/y for each face. -auto ComputeFaceBoundingBoxes(const torch::Tensor& face_verts) { - const int total_F = face_verts.size(0); - auto float_opts = face_verts.options().dtype(torch::kFloat32); - auto face_verts_a = face_verts.accessor(); - torch::Tensor face_bboxes = torch::full({total_F, 6}, -2.0, float_opts); - - // Loop through all the faces - for (int f = 0; f < total_F; ++f) { - const auto& face = face_verts_a[f]; - float x0, x1, x2, y0, y1, y2, z0, z1, z2; - std::tie(x0, y0, z0) = ExtractVerts(face, 0); - std::tie(x1, y1, z1) = ExtractVerts(face, 1); - std::tie(x2, y2, z2) = ExtractVerts(face, 2); - - const float x_min = std::min(x0, std::min(x1, x2)); - const float y_min = std::min(y0, std::min(y1, y2)); - const float x_max = std::max(x0, std::max(x1, x2)); - const float y_max = std::max(y0, std::max(y1, y2)); - const float z_min = std::min(z0, std::min(z1, z2)); - const float z_max = std::max(z0, std::max(z1, z2)); - - face_bboxes[f][0] = x_min; - face_bboxes[f][1] = y_min; - face_bboxes[f][2] = x_max; - face_bboxes[f][3] = y_max; - face_bboxes[f][4] = z_min; - face_bboxes[f][5] = z_max; - } - - return face_bboxes; -} - -// Check if the point (px, py) lies inside the face bounding box face_bbox. -// Return true if the point is outside. -template -bool CheckPointOutsideBoundingBox( - const Face& face_bbox, - float blur_radius, - float px, - float py) { - // Read triangle bbox coordinates and expand by blur radius. - float x_min = face_bbox[0] - blur_radius; - float y_min = face_bbox[1] - blur_radius; - float x_max = face_bbox[2] + blur_radius; - float y_max = face_bbox[3] + blur_radius; - - // Faces with at least one vertex behind the camera won't render correctly - // and should be removed or clipped before calling the rasterizer - const bool z_invalid = face_bbox[4] < kEpsilon; - - // Check if the current point is within the triangle bounding box. - return (px > x_max || px < x_min || py > y_max || py < y_min || z_invalid); -} - -// Calculate areas of all faces. Returns a tensor of shape (total_faces, 1) -// where faces with zero area have value -1. -auto ComputeFaceAreas(const torch::Tensor& face_verts) { - const int total_F = face_verts.size(0); - auto float_opts = face_verts.options().dtype(torch::kFloat32); - auto face_verts_a = face_verts.accessor(); - torch::Tensor face_areas = torch::full({total_F}, -1, float_opts); - - // Loop through all the faces - for (int f = 0; f < total_F; ++f) { - const auto& face = face_verts_a[f]; - float x0, x1, x2, y0, y1, y2, z0, z1, z2; - std::tie(x0, y0, z0) = ExtractVerts(face, 0); - std::tie(x1, y1, z1) = ExtractVerts(face, 1); - std::tie(x2, y2, z2) = ExtractVerts(face, 2); - - const vec2 v0(x0, y0); - const vec2 v1(x1, y1); - const vec2 v2(x2, y2); - - const float face_area = EdgeFunctionForward(v0, v1, v2); - face_areas[f] = face_area; - } - - return face_areas; -} - -// Helper function to use with std::find_if to find the index of any -// values in the top k struct which match a given idx. -struct IsNeighbor { - IsNeighbor(int neighbor_idx) { - this->neighbor_idx = neighbor_idx; - } - bool operator()(std::tuple elem) { - return (std::get<1>(elem) == neighbor_idx); - } - int neighbor_idx; -}; - -namespace { -void RasterizeMeshesNaiveCpu_worker( - const int start_yi, - const int end_yi, - const torch::Tensor& mesh_to_face_first_idx, - const torch::Tensor& num_faces_per_mesh, - const float blur_radius, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces, - const int32_t N, - const int H, - const int W, - const int K, - at::TensorAccessor& face_verts_a, - at::TensorAccessor& face_areas_a, - at::TensorAccessor& face_bboxes_a, - at::TensorAccessor& neighbor_idx_a, - at::TensorAccessor& zbuf_a, - at::TensorAccessor& face_idxs_a, - at::TensorAccessor& pix_dists_a, - at::TensorAccessor& barycentric_coords_a) { - for (int n = 0; n < N; ++n) { - // Loop through each mesh in the batch. - // Get the start index of the faces in faces_packed and the num faces - // in the mesh to avoid having to loop through all the faces. - const int face_start_idx = mesh_to_face_first_idx[n].item().to(); - const int face_stop_idx = - (face_start_idx + num_faces_per_mesh[n].item().to()); - - // Iterate through the horizontal lines of the image from top to bottom. - for (int yi = start_yi; yi < end_yi; ++yi) { - // Reverse the order of yi so that +Y is pointing upwards in the image. - const int yidx = H - 1 - yi; - - // Y coordinate of the top of the pixel. - const float yf = PixToNonSquareNdc(yidx, H, W); - // Iterate through pixels on this horizontal line, left to right. - for (int xi = 0; xi < W; ++xi) { - // Reverse the order of xi so that +X is pointing to the left in the - // image. - const int xidx = W - 1 - xi; - - // X coordinate of the left of the pixel. - const float xf = PixToNonSquareNdc(xidx, W, H); - - // Use a deque to hold values: - // (z, idx, r, bary.x, bary.y. bary.z) - // Sort the deque as needed to mimic a priority queue. - std::deque> q; - - // Loop through the faces in the mesh. - for (int f = face_start_idx; f < face_stop_idx; ++f) { - // Get coordinates of three face vertices. - const auto& face = face_verts_a[f]; - float x0, x1, x2, y0, y1, y2, z0, z1, z2; - std::tie(x0, y0, z0) = ExtractVerts(face, 0); - std::tie(x1, y1, z1) = ExtractVerts(face, 1); - std::tie(x2, y2, z2) = ExtractVerts(face, 2); - - const vec2 v0(x0, y0); - const vec2 v1(x1, y1); - const vec2 v2(x2, y2); - - const float face_area = face_areas_a[f]; - const bool back_face = face_area < 0.0; - // Check if the face is visible to the camera. - if (cull_backfaces && back_face) { - continue; - } - // Skip faces with zero area. - if (face_area <= kEpsilon && face_area >= -1.0f * kEpsilon) { - continue; - } - - // Skip if point is outside the face bounding box. - const auto face_bbox = face_bboxes_a[f]; - const bool outside_bbox = CheckPointOutsideBoundingBox( - face_bbox, std::sqrt(blur_radius), xf, yf); - if (outside_bbox) { - continue; - } - - // Compute barycentric coordinates and use this to get the - // depth of the point on the triangle. - const vec2 pxy(xf, yf); - const vec3 bary0 = - BarycentricCoordinatesForward(pxy, v0, v1, v2); - const vec3 bary = !perspective_correct - ? bary0 - : BarycentricPerspectiveCorrectionForward(bary0, z0, z1, z2); - - const vec3 bary_clip = - !clip_barycentric_coords ? bary : BarycentricClipForward(bary); - - // Use barycentric coordinates to get the depth of the current pixel - const float pz = - (bary_clip.x * z0 + bary_clip.y * z1 + bary_clip.z * z2); - - if (pz < 0) { - continue; // Point is behind the image plane so ignore. - } - - // Compute squared distance of the point to the triangle. - const float dist = PointTriangleDistanceForward(pxy, v0, v1, v2); - - // Use the bary coordinates to determine if the point is - // inside the face. - const bool inside = bary.x > 0.0f && bary.y > 0.0f && bary.z > 0.0f; - - // If the point is inside the triangle then signed_dist - // is negative. - const float signed_dist = inside ? -dist : dist; - - // Check if pixel is outside blur region - if (!inside && dist >= blur_radius) { - continue; - } - - // Handle the case where a face (f) partially behind the image plane - // is clipped to a quadrilateral and then split into two faces (t1, - // t2). In this case we: - // 1. Find the index of the neighbor (e.g. for t1 need index of t2) - // 2. Check if the neighbor (t2) is already in the top K faces - // 3. If yes, compare the distance of the pixel to t1 with the - // distance to t2. - // 4. If dist_t1 < dist_t2, overwrite the values for t2 in the top K - // faces. - const int neighbor_idx = neighbor_idx_a[f]; - int idx_top_k = -1; - - // Check if neighboring face is already in the top K. - if (neighbor_idx != -1) { - const auto it = - std::find_if(q.begin(), q.end(), IsNeighbor(neighbor_idx)); - // Get the index of the element from the iterator - idx_top_k = (it != q.end()) ? it - q.begin() : idx_top_k; - } - - // If idx_top_k idx is not -1 then it is in the top K struct. - if (idx_top_k != -1) { - // If dist of current face is less than neighbor, overwrite - // the neighbor face values in the top K struct. - const auto neighbor = q[idx_top_k]; - const float dist_neighbor = std::abs(std::get<2>(neighbor)); - if (dist < dist_neighbor) { - // Overwrite the neighbor face values. - q[idx_top_k] = std::make_tuple( - pz, f, signed_dist, bary_clip.x, bary_clip.y, bary_clip.z); - } - } else { - // Handle as a normal face. - // The current pixel lies inside the current face. - // Add at the end of the deque. - q.emplace_back( - pz, f, signed_dist, bary_clip.x, bary_clip.y, bary_clip.z); - } - - // Sort the deque inplace based on the z distance - // to mimic using a priority queue. - std::sort(q.begin(), q.end()); - if (static_cast(q.size()) > K) { - // remove the last value - q.pop_back(); - } - } - while (!q.empty()) { - // Loop through and add values to the output tensors - auto t = q.back(); - q.pop_back(); - const int i = q.size(); - zbuf_a[n][yi][xi][i] = std::get<0>(t); - face_idxs_a[n][yi][xi][i] = std::get<1>(t); - pix_dists_a[n][yi][xi][i] = std::get<2>(t); - barycentric_coords_a[n][yi][xi][i][0] = std::get<3>(t); - barycentric_coords_a[n][yi][xi][i][1] = std::get<4>(t); - barycentric_coords_a[n][yi][xi][i][2] = std::get<5>(t); - } - } - } - } -} -} // namespace - -std::tuple -RasterizeMeshesNaiveCpu( - const torch::Tensor& face_verts, - const torch::Tensor& mesh_to_face_first_idx, - const torch::Tensor& num_faces_per_mesh, - const torch::Tensor& clipped_faces_neighbor_idx, - const std::tuple image_size, - const float blur_radius, - const int faces_per_pixel, - const bool perspective_correct, - const bool clip_barycentric_coords, - const bool cull_backfaces) { - if (face_verts.ndimension() != 3 || face_verts.size(1) != 3 || - face_verts.size(2) != 3) { - AT_ERROR("face_verts must have dimensions (num_faces, 3, 3)"); - } - if (num_faces_per_mesh.size(0) != mesh_to_face_first_idx.size(0)) { - AT_ERROR( - "num_faces_per_mesh must have save size first dimension as mesh_to_face_first_idx"); - } - - const int32_t N = mesh_to_face_first_idx.size(0); // batch_size. - const int H = std::get<0>(image_size); - const int W = std::get<1>(image_size); - const int K = faces_per_pixel; - - auto long_opts = num_faces_per_mesh.options().dtype(torch::kInt64); - auto float_opts = face_verts.options().dtype(torch::kFloat32); - - // Initialize output tensors. - torch::Tensor face_idxs = torch::full({N, H, W, K}, -1, long_opts); - torch::Tensor zbuf = torch::full({N, H, W, K}, -1, float_opts); - torch::Tensor pix_dists = torch::full({N, H, W, K}, -1, float_opts); - torch::Tensor barycentric_coords = - torch::full({N, H, W, K, 3}, -1, float_opts); - - auto face_verts_a = face_verts.accessor(); - auto face_idxs_a = face_idxs.accessor(); - auto zbuf_a = zbuf.accessor(); - auto pix_dists_a = pix_dists.accessor(); - auto barycentric_coords_a = barycentric_coords.accessor(); - auto neighbor_idx_a = clipped_faces_neighbor_idx.accessor(); - - auto face_bboxes = ComputeFaceBoundingBoxes(face_verts); - auto face_bboxes_a = face_bboxes.accessor(); - auto face_areas = ComputeFaceAreas(face_verts); - auto face_areas_a = face_areas.accessor(); - - const int64_t n_threads = at::get_num_threads(); - std::vector threads; - threads.reserve(n_threads); - const int chunk_size = 1 + (H - 1) / n_threads; - int start_yi = 0; - for (int iThread = 0; iThread < n_threads; ++iThread) { - const int64_t end_yi = std::min(start_yi + chunk_size, H); - threads.emplace_back( - RasterizeMeshesNaiveCpu_worker, - start_yi, - end_yi, - mesh_to_face_first_idx, - num_faces_per_mesh, - blur_radius, - perspective_correct, - clip_barycentric_coords, - cull_backfaces, - N, - H, - W, - K, - std::ref(face_verts_a), - std::ref(face_areas_a), - std::ref(face_bboxes_a), - std::ref(neighbor_idx_a), - std::ref(zbuf_a), - std::ref(face_idxs_a), - std::ref(pix_dists_a), - std::ref(barycentric_coords_a)); - start_yi += chunk_size; - } - for (auto&& thread : threads) { - thread.join(); - } - - return std::make_tuple(face_idxs, zbuf, barycentric_coords, pix_dists); -} - -torch::Tensor RasterizeMeshesBackwardCpu( - const torch::Tensor& face_verts, // (F, 3, 3) - const torch::Tensor& pix_to_face, // (N, H, W, K) - const torch::Tensor& grad_zbuf, // (N, H, W, K) - const torch::Tensor& grad_bary, // (N, H, W, K, 3) - const torch::Tensor& grad_dists, // (N, H, W, K) - const bool perspective_correct, - const bool clip_barycentric_coords) { - const int F = face_verts.size(0); - const int N = pix_to_face.size(0); - const int H = pix_to_face.size(1); - const int W = pix_to_face.size(2); - const int K = pix_to_face.size(3); - - torch::Tensor grad_face_verts = torch::zeros({F, 3, 3}, face_verts.options()); - auto face_verts_a = face_verts.accessor(); - auto pix_to_face_a = pix_to_face.accessor(); - auto grad_dists_a = grad_dists.accessor(); - auto grad_zbuf_a = grad_zbuf.accessor(); - auto grad_bary_a = grad_bary.accessor(); - - for (int n = 0; n < N; ++n) { - // Iterate through the horizontal lines of the image from top to bottom. - for (int y = 0; y < H; ++y) { - // Reverse the order of yi so that +Y is pointing upwards in the image. - const int yidx = H - 1 - y; - - // Y coordinate of the top of the pixel. - const float yf = PixToNonSquareNdc(yidx, H, W); - // Iterate through pixels on this horizontal line, left to right. - for (int x = 0; x < W; ++x) { - // Reverse the order of xi so that +X is pointing to the left in the - // image. - const int xidx = W - 1 - x; - - // X coordinate of the left of the pixel. - const float xf = PixToNonSquareNdc(xidx, W, H); - const vec2 pxy(xf, yf); - - // Iterate through the faces that hit this pixel. - for (int k = 0; k < K; ++k) { - // Get face index from forward pass output. - const int f = pix_to_face_a[n][y][x][k]; - if (f < 0) { - continue; // padded face. - } - // Get coordinates of the three face vertices. - const auto face_verts_f = face_verts_a[f]; - const float x0 = face_verts_f[0][0]; - const float y0 = face_verts_f[0][1]; - const float z0 = face_verts_f[0][2]; - const float x1 = face_verts_f[1][0]; - const float y1 = face_verts_f[1][1]; - const float z1 = face_verts_f[1][2]; - const float x2 = face_verts_f[2][0]; - const float y2 = face_verts_f[2][1]; - const float z2 = face_verts_f[2][2]; - const vec2 v0xy(x0, y0); - const vec2 v1xy(x1, y1); - const vec2 v2xy(x2, y2); - - // Get upstream gradients for the face. - const float grad_dist_upstream = grad_dists_a[n][y][x][k]; - const float grad_zbuf_upstream = grad_zbuf_a[n][y][x][k]; - const auto grad_bary_upstream_w012 = grad_bary_a[n][y][x][k]; - const float grad_bary_upstream_w0 = grad_bary_upstream_w012[0]; - const float grad_bary_upstream_w1 = grad_bary_upstream_w012[1]; - const float grad_bary_upstream_w2 = grad_bary_upstream_w012[2]; - const vec3 grad_bary_upstream( - grad_bary_upstream_w0, - grad_bary_upstream_w1, - grad_bary_upstream_w2); - - const vec3 bary0 = - BarycentricCoordinatesForward(pxy, v0xy, v1xy, v2xy); - const vec3 bary = !perspective_correct - ? bary0 - : BarycentricPerspectiveCorrectionForward(bary0, z0, z1, z2); - const vec3 bary_clip = - !clip_barycentric_coords ? bary : BarycentricClipForward(bary); - - // Distances inside the face are negative so get the - // correct sign to apply to the upstream gradient. - const bool inside = bary.x > 0.0f && bary.y > 0.0f && bary.z > 0.0f; - const float sign = inside ? -1.0f : 1.0f; - - const auto grad_dist_f = PointTriangleDistanceBackward( - pxy, v0xy, v1xy, v2xy, sign * grad_dist_upstream); - const auto ddist_d_v0 = std::get<1>(grad_dist_f); - const auto ddist_d_v1 = std::get<2>(grad_dist_f); - const auto ddist_d_v2 = std::get<3>(grad_dist_f); - - // Upstream gradient for barycentric coords from zbuf calculation: - // zbuf = bary_w0 * z0 + bary_w1 * z1 + bary_w2 * z2 - // Therefore - // d_zbuf/d_bary_w0 = z0 - // d_zbuf/d_bary_w1 = z1 - // d_zbuf/d_bary_w2 = z2 - const vec3 d_zbuf_d_baryclip(z0, z1, z2); - - // Total upstream barycentric gradients are the sum of - // external upstream gradients and contribution from zbuf. - const vec3 grad_bary_f_sum = - (grad_bary_upstream + grad_zbuf_upstream * d_zbuf_d_baryclip); - - vec3 grad_bary0 = grad_bary_f_sum; - - if (clip_barycentric_coords) { - grad_bary0 = BarycentricClipBackward(bary, grad_bary0); - } - - if (perspective_correct) { - auto perspective_grads = BarycentricPerspectiveCorrectionBackward( - bary0, z0, z1, z2, grad_bary0); - grad_bary0 = std::get<0>(perspective_grads); - grad_face_verts[f][0][2] += std::get<1>(perspective_grads); - grad_face_verts[f][1][2] += std::get<2>(perspective_grads); - grad_face_verts[f][2][2] += std::get<3>(perspective_grads); - } - - auto grad_bary_f = - BarycentricCoordsBackward(pxy, v0xy, v1xy, v2xy, grad_bary0); - const vec2 dbary_d_v0 = std::get<1>(grad_bary_f); - const vec2 dbary_d_v1 = std::get<2>(grad_bary_f); - const vec2 dbary_d_v2 = std::get<3>(grad_bary_f); - - // Update output gradient buffer. - grad_face_verts[f][0][0] += dbary_d_v0.x + ddist_d_v0.x; - grad_face_verts[f][0][1] += dbary_d_v0.y + ddist_d_v0.y; - grad_face_verts[f][0][2] += grad_zbuf_upstream * bary_clip.x; - grad_face_verts[f][1][0] += dbary_d_v1.x + ddist_d_v1.x; - grad_face_verts[f][1][1] += dbary_d_v1.y + ddist_d_v1.y; - grad_face_verts[f][1][2] += grad_zbuf_upstream * bary_clip.y; - grad_face_verts[f][2][0] += dbary_d_v2.x + ddist_d_v2.x; - grad_face_verts[f][2][1] += dbary_d_v2.y + ddist_d_v2.y; - grad_face_verts[f][2][2] += grad_zbuf_upstream * bary_clip.z; - } - } - } - } - return grad_face_verts; -} - -torch::Tensor RasterizeMeshesCoarseCpu( - const torch::Tensor& face_verts, - const torch::Tensor& mesh_to_face_first_idx, - const torch::Tensor& num_faces_per_mesh, - const std::tuple image_size, - const float blur_radius, - const int bin_size, - const int max_faces_per_bin) { - if (face_verts.ndimension() != 3 || face_verts.size(1) != 3 || - face_verts.size(2) != 3) { - AT_ERROR("face_verts must have dimensions (num_faces, 3, 3)"); - } - if (num_faces_per_mesh.ndimension() != 1) { - AT_ERROR("num_faces_per_mesh can only have one dimension"); - } - - const int N = num_faces_per_mesh.size(0); // batch size. - const int M = max_faces_per_bin; - - const float H = std::get<0>(image_size); - const float W = std::get<1>(image_size); - - // Integer division round up. - const int BH = 1 + (H - 1) / bin_size; - const int BW = 1 + (W - 1) / bin_size; - - auto opts = num_faces_per_mesh.options().dtype(torch::kInt32); - torch::Tensor faces_per_bin = torch::zeros({N, BH, BW}, opts); - torch::Tensor bin_faces = torch::full({N, BH, BW, M}, -1, opts); - auto bin_faces_a = bin_faces.accessor(); - - // Precompute all face bounding boxes. - auto face_bboxes = ComputeFaceBoundingBoxes(face_verts); - auto face_bboxes_a = face_bboxes.accessor(); - - const float ndc_x_range = NonSquareNdcRange(W, H); - const float pixel_width_x = ndc_x_range / W; - const float bin_width_x = pixel_width_x * bin_size; - - const float ndc_y_range = NonSquareNdcRange(H, W); - const float pixel_width_y = ndc_y_range / H; - const float bin_width_y = pixel_width_y * bin_size; - - // Iterate through the meshes in the batch. - for (int n = 0; n < N; ++n) { - const int face_start_idx = mesh_to_face_first_idx[n].item().to(); - const int face_stop_idx = - (face_start_idx + num_faces_per_mesh[n].item().to()); - - float bin_y_min = -1.0f; - float bin_y_max = bin_y_min + bin_width_y; - - // Iterate through the horizontal bins from top to bottom. - for (int by = 0; by < BH; ++by) { - float bin_x_min = -1.0f; - float bin_x_max = bin_x_min + bin_width_x; - - // Iterate through bins on this horizontal line, left to right. - for (int bx = 0; bx < BW; ++bx) { - int32_t faces_hit = 0; - - for (int32_t f = face_start_idx; f < face_stop_idx; ++f) { - // Get bounding box and expand by blur radius. - float face_x_min = face_bboxes_a[f][0] - std::sqrt(blur_radius); - float face_y_min = face_bboxes_a[f][1] - std::sqrt(blur_radius); - float face_x_max = face_bboxes_a[f][2] + std::sqrt(blur_radius); - float face_y_max = face_bboxes_a[f][3] + std::sqrt(blur_radius); - float face_z_min = face_bboxes_a[f][4]; - - // Faces with at least one vertex behind the camera won't render - // correctly and should be removed or clipped before calling the - // rasterizer - if (face_z_min < kEpsilon) { - continue; - } - - // Use a half-open interval so that faces exactly on the - // boundary between bins will fall into exactly one bin. - bool x_overlap = - (face_x_min <= bin_x_max) && (bin_x_min < face_x_max); - bool y_overlap = - (face_y_min <= bin_y_max) && (bin_y_min < face_y_max); - - if (x_overlap && y_overlap) { - // Got too many faces for this bin, so throw an error. - if (faces_hit >= max_faces_per_bin) { - AT_ERROR("Got too many faces per bin"); - } - // The current point falls in the current bin, so - // record it. - bin_faces_a[n][by][bx][faces_hit] = f; - faces_hit++; - } - } - - // Shift the bin to the right for the next loop iteration - bin_x_min = bin_x_max; - bin_x_max = bin_x_min + bin_width_x; - } - // Shift the bin down for the next loop iteration - bin_y_min = bin_y_max; - bin_y_max = bin_y_min + bin_width_y; - } - } - return bin_faces; -} diff --git a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.cuh b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.cuh deleted file mode 100644 index 600d7a1afff8a5089ad2d8d8023e195bcaab480c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.cuh +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -// The default value of the NDC range is [-1, 1], however in the case that -// H != W, the NDC range is set such that the shorter side has range [-1, 1] and -// the longer side is scaled by the ratio of H:W. S1 is the dimension for which -// the NDC range is calculated and S2 is the other image dimension. -// e.g. to get the NDC x range S1 = W and S2 = H -__device__ inline float NonSquareNdcRange(int S1, int S2) { - float range = 2.0f; - if (S1 > S2) { - // First multiply S1 by float range so that division results - // in a float value. - range = (S1 * range) / S2; - } - return range; -} - -// Given a pixel coordinate 0 <= i < S1, convert it to a normalized device -// coordinates. We divide the NDC range into S1 evenly-sized -// pixels, and assume that each pixel falls in the *center* of its range. -// The default value of the NDC range is [-1, 1], however in the case that -// H != W, the NDC range is set such that the shorter side has range [-1, 1] and -// the longer side is scaled by the ratio of H:W. The dimension of i should be -// S1 and the other image dimension is S2 For example, to get the x and y NDC -// coordinates or a given pixel i: -// x = PixToNonSquareNdc(i, W, H) -// y = PixToNonSquareNdc(i, H, W) -__device__ inline float PixToNonSquareNdc(int i, int S1, int S2) { - float range = NonSquareNdcRange(S1, S2); - // NDC: offset + (i * pixel_width + half_pixel_width) - // The NDC range is [-range/2, range/2]. - float offset = (range / 2.0f); - return -offset + (range * i + offset) / S1; -} - -// The maximum number of points per pixel that we can return. Since we use -// thread-local arrays to hold and sort points, the maximum size of the array -// needs to be known at compile time. There might be some fancy template magic -// we could use to make this more dynamic, but for now just fix a constant. -// TODO: is 8 enough? Would increasing have performance considerations? -const int32_t kMaxPointsPerPixel = 150; - -const int32_t kMaxItemsPerBin = 22; - -template -__device__ inline void BubbleSort(T* arr, int n) { - // Bubble sort. We only use it for tiny thread-local arrays (n < 8); in this - // regime we care more about warp divergence than computational complexity. - for (int i = 0; i < n - 1; ++i) { - for (int j = 0; j < n - i - 1; ++j) { - if (arr[j + 1] < arr[j]) { - T temp = arr[j]; - arr[j] = arr[j + 1]; - arr[j + 1] = temp; - } - } - } -} diff --git a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.h b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.h deleted file mode 100644 index 6980afc4a49962760283c46ac415faa5ab76ce8e..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -// The default value of the NDC range is [-1, 1], however in the case that -// H != W, the NDC range is set such that the shorter side has range [-1, 1] and -// the longer side is scaled by the ratio of H:W. S1 is the dimension for which -// the NDC range is calculated and S2 is the other image dimension. -// e.g. to get the NDC x range S1 = W and S2 = H -inline float NonSquareNdcRange(int S1, int S2) { - float range = 2.0f; - if (S1 > S2) { - range = (S1 * range) / S2; - } - return range; -} - -// Given a pixel coordinate 0 <= i < S1, convert it to a normalized device -// coordinates. We divide the NDC range into S1 evenly-sized -// pixels, and assume that each pixel falls in the *center* of its range. -// The default value of the NDC range is [-1, 1], however in the case that -// H != W, the NDC range is set such that the shorter side has range [-1, 1] and -// the longer side is scaled by the ratio of H:W. The dimension of i should be -// S1 and the other image dimension is S2 For example, to get the x and y NDC -// coordinates or a given pixel i: -// x = PixToNonSquareNdc(i, W, H) -// y = PixToNonSquareNdc(i, H, W) -inline float PixToNonSquareNdc(int i, int S1, int S2) { - float range = NonSquareNdcRange(S1, S2); - // NDC: offset + (i * pixel_width + half_pixel_width) - // The NDC range is [-range/2, range/2]. - const float offset = (range / 2.0f); - return -offset + (range * i + offset) / S1; -} diff --git a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.cu b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.cu deleted file mode 100644 index 5b18d8334d3358244409a2d9e43acd3c9262cb99..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.cu +++ /dev/null @@ -1,462 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include -#include "rasterize_points/rasterization_utils.cuh" - -namespace { -// A little structure for holding details about a pixel. -struct Pix { - float z; // Depth of the reference point. - int32_t idx; // Index of the reference point. - float dist2; // Euclidean distance square to the reference point. -}; - -__device__ inline bool operator<(const Pix& a, const Pix& b) { - return a.z < b.z; -} - -// This function checks if a pixel given by xy location pxy lies within the -// point with index p and batch index n. One of the inputs is a list (q) -// which contains Pixel structs with the indices of the points which intersect -// with this pixel sorted by closest z distance. If the pixel pxy lies in the -// point, the list (q) is updated and re-orderered in place. In addition -// the auxiliary variables q_size, q_max_z and q_max_idx are also modified. -// This code is shared between RasterizePointsNaiveCudaKernel and -// RasterizePointsFineCudaKernel. -template -__device__ void CheckPixelInsidePoint( - const float* points, // (P, 3) - const int p_idx, - int& q_size, - float& q_max_z, - int& q_max_idx, - PointQ& q, - const float* radius, - const float xf, - const float yf, - const int K) { - const float px = points[p_idx * 3 + 0]; - const float py = points[p_idx * 3 + 1]; - const float pz = points[p_idx * 3 + 2]; - const float p_radius = radius[p_idx]; - const float radius2 = p_radius * p_radius; - if (pz < 0) - return; // Don't render points behind the camera - const float dx = xf - px; - const float dy = yf - py; - const float dist2 = dx * dx + dy * dy; - if (dist2 < radius2) { - if (q_size < K) { - // Just insert it - q[q_size] = {pz, p_idx, dist2}; - if (pz > q_max_z) { - q_max_z = pz; - q_max_idx = q_size; - } - q_size++; - } else if (pz < q_max_z) { - // Overwrite the old max, and find the new max - q[q_max_idx] = {pz, p_idx, dist2}; - q_max_z = pz; - for (int i = 0; i < K; i++) { - if (q[i].z > q_max_z) { - q_max_z = q[i].z; - q_max_idx = i; - } - } - } - } -} -} // namespace -// **************************************************************************** -// * NAIVE RASTERIZATION * -// **************************************************************************** - -__global__ void RasterizePointsNaiveCudaKernel( - const float* points, // (P, 3) - const int64_t* cloud_to_packed_first_idx, // (N) - const int64_t* num_points_per_cloud, // (N) - const float* radius, - const int N, - const int H, - const int W, - const int K, - int32_t* point_idxs, // (N, H, W, K) - float* zbuf, // (N, H, W, K) - float* pix_dists) { // (N, H, W, K) - // Simple version: One thread per output pixel - const int num_threads = gridDim.x * blockDim.x; - const int tid = blockDim.x * blockIdx.x + threadIdx.x; - for (int i = tid; i < N * H * W; i += num_threads) { - // Convert linear index to 3D index - const int n = i / (H * W); // Batch index - const int pix_idx = i % (H * W); - - // Reverse ordering of the X and Y axis as the camera coordinates - // assume that +Y is pointing up and +X is pointing left. - const int yi = H - 1 - pix_idx / W; - const int xi = W - 1 - pix_idx % W; - - // screen coordinates to ndc coordinates of pixel. - const float xf = PixToNonSquareNdc(xi, W, H); - const float yf = PixToNonSquareNdc(yi, H, W); - - // For keeping track of the K closest points we want a data structure - // that (1) gives O(1) access to the closest point for easy comparisons, - // and (2) allows insertion of new elements. In the CPU version we use - // std::priority_queue; then (2) is O(log K). We can't use STL - // containers in CUDA; we could roll our own max heap in an array, but - // that would likely have a lot of warp divergence so we do something - // simpler instead: keep the elements in an unsorted array, but keep - // track of the max value and the index of the max value. Then (1) is - // still O(1) time, while (2) is O(K) with a clean loop. Since K <= 8 - // this should be fast enough for our purposes. - // TODO(jcjohns) Abstract this out into a standalone data structure - Pix q[kMaxPointsPerPixel]; - int q_size = 0; - float q_max_z = -1000; - int q_max_idx = -1; - - // Using the batch index of the thread get the start and stop - // indices for the points. - const int64_t point_start_idx = cloud_to_packed_first_idx[n]; - const int64_t point_stop_idx = point_start_idx + num_points_per_cloud[n]; - - for (int p_idx = point_start_idx; p_idx < point_stop_idx; ++p_idx) { - CheckPixelInsidePoint( - points, p_idx, q_size, q_max_z, q_max_idx, q, radius, xf, yf, K); - } - BubbleSort(q, q_size); - int idx = n * H * W * K + pix_idx * K; - for (int k = 0; k < q_size; ++k) { - point_idxs[idx + k] = q[k].idx; - zbuf[idx + k] = q[k].z; - pix_dists[idx + k] = q[k].dist2; - } - } -} - -std::tuple RasterizePointsNaiveCuda( - const at::Tensor& points, // (P. 3) - const at::Tensor& cloud_to_packed_first_idx, // (N) - const at::Tensor& num_points_per_cloud, // (N) - const std::tuple image_size, - const at::Tensor& radius, - const int points_per_pixel) { - // Check inputs are on the same device - at::TensorArg points_t{points, "points", 1}, - cloud_to_packed_first_idx_t{ - cloud_to_packed_first_idx, "cloud_to_packed_first_idx", 2}, - num_points_per_cloud_t{num_points_per_cloud, "num_points_per_cloud", 3}; - at::CheckedFrom c = "RasterizePointsNaiveCuda"; - at::checkAllSameGPU( - c, {points_t, cloud_to_packed_first_idx_t, num_points_per_cloud_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - TORCH_CHECK( - points.ndimension() == 2 && points.size(1) == 3, - "points must have dimensions (num_points, 3)"); - TORCH_CHECK( - num_points_per_cloud.size(0) == cloud_to_packed_first_idx.size(0), - "num_points_per_cloud must have same size first dimension as cloud_to_packed_first_idx"); - - const int N = num_points_per_cloud.size(0); // batch size. - const int H = std::get<0>(image_size); - const int W = std::get<1>(image_size); - const int K = points_per_pixel; - - if (K > kMaxPointsPerPixel) { - std::stringstream ss; - ss << "Must have points_per_pixel <= " << kMaxPointsPerPixel; - AT_ERROR(ss.str()); - } - - auto int_opts = num_points_per_cloud.options().dtype(at::kInt); - auto float_opts = points.options().dtype(at::kFloat); - at::Tensor point_idxs = at::full({N, H, W, K}, -1, int_opts); - at::Tensor zbuf = at::full({N, H, W, K}, -1, float_opts); - at::Tensor pix_dists = at::full({N, H, W, K}, -1, float_opts); - - if (point_idxs.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(point_idxs, zbuf, pix_dists); - } - - const size_t blocks = 1024; - const size_t threads = 64; - RasterizePointsNaiveCudaKernel<<>>( - points.contiguous().data_ptr(), - cloud_to_packed_first_idx.contiguous().data_ptr(), - num_points_per_cloud.contiguous().data_ptr(), - radius.contiguous().data_ptr(), - N, - H, - W, - K, - point_idxs.contiguous().data_ptr(), - zbuf.contiguous().data_ptr(), - pix_dists.contiguous().data_ptr()); - - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(point_idxs, zbuf, pix_dists); -} - -// **************************************************************************** -// * FINE RASTERIZATION * -// **************************************************************************** - -__global__ void RasterizePointsFineCudaKernel( - const float* points, // (P, 3) - const int32_t* bin_points, // (N, BH, BW, T) - const float* radius, - const int bin_size, - const int N, - const int BH, // num_bins y - const int BW, // num_bins x - const int M, - const int H, - const int W, - const int K, - int32_t* point_idxs, // (N, H, W, K) - float* zbuf, // (N, H, W, K) - float* pix_dists) { // (N, H, W, K) - // This can be more than H * W if H or W are not divisible by bin_size. - const int num_pixels = N * BH * BW * bin_size * bin_size; - const int num_threads = gridDim.x * blockDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - for (int pid = tid; pid < num_pixels; pid += num_threads) { - // Convert linear index into bin and pixel indices. We make the within - // block pixel ids move the fastest, so that adjacent threads will fall - // into the same bin; this should give them coalesced memory reads when - // they read from points and bin_points. - int i = pid; - const int n = i / (BH * BW * bin_size * bin_size); - i %= BH * BW * bin_size * bin_size; - const int by = i / (BW * bin_size * bin_size); - i %= BW * bin_size * bin_size; - const int bx = i / (bin_size * bin_size); - i %= bin_size * bin_size; - - const int yi = i / bin_size + by * bin_size; - const int xi = i % bin_size + bx * bin_size; - - if (yi >= H || xi >= W) - continue; - - const float xf = PixToNonSquareNdc(xi, W, H); - const float yf = PixToNonSquareNdc(yi, H, W); - - // This part looks like the naive rasterization kernel, except we use - // bin_points to only look at a subset of points already known to fall - // in this bin. TODO abstract out this logic into some data structure - // that is shared by both kernels? - Pix q[kMaxPointsPerPixel]; - int q_size = 0; - float q_max_z = -1000; - int q_max_idx = -1; - for (int m = 0; m < M; ++m) { - const int p = bin_points[n * BH * BW * M + by * BW * M + bx * M + m]; - if (p < 0) { - // bin_points uses -1 as a sentinal value - continue; - } - CheckPixelInsidePoint( - points, p, q_size, q_max_z, q_max_idx, q, radius, xf, yf, K); - } - // Now we've looked at all the points for this bin, so we can write - // output for the current pixel. - BubbleSort(q, q_size); - - // Reverse ordering of the X and Y axis as the camera coordinates - // assume that +Y is pointing up and +X is pointing left. - const int yidx = H - 1 - yi; - const int xidx = W - 1 - xi; - - const int pix_idx = n * H * W * K + yidx * W * K + xidx * K; - for (int k = 0; k < q_size; ++k) { - point_idxs[pix_idx + k] = q[k].idx; - zbuf[pix_idx + k] = q[k].z; - pix_dists[pix_idx + k] = q[k].dist2; - } - } -} - -std::tuple RasterizePointsFineCuda( - const at::Tensor& points, // (P, 3) - const at::Tensor& bin_points, - const std::tuple image_size, - const at::Tensor& radius, - const int bin_size, - const int points_per_pixel) { - // Check inputs are on the same device - at::TensorArg points_t{points, "points", 1}, - bin_points_t{bin_points, "bin_points", 2}; - at::CheckedFrom c = "RasterizePointsFineCuda"; - at::checkAllSameGPU(c, {points_t, bin_points_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int N = bin_points.size(0); - const int BH = bin_points.size(1); - const int BW = bin_points.size(2); - const int M = bin_points.size(3); - const int K = points_per_pixel; - - const int H = std::get<0>(image_size); - const int W = std::get<1>(image_size); - - if (K > kMaxPointsPerPixel) { - AT_ERROR("Must have num_closest <= 150"); - } - auto int_opts = bin_points.options().dtype(at::kInt); - auto float_opts = points.options().dtype(at::kFloat); - at::Tensor point_idxs = at::full({N, H, W, K}, -1, int_opts); - at::Tensor zbuf = at::full({N, H, W, K}, -1, float_opts); - at::Tensor pix_dists = at::full({N, H, W, K}, -1, float_opts); - - if (point_idxs.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(point_idxs, zbuf, pix_dists); - } - - const size_t blocks = 1024; - const size_t threads = 64; - RasterizePointsFineCudaKernel<<>>( - points.contiguous().data_ptr(), - bin_points.contiguous().data_ptr(), - radius.contiguous().data_ptr(), - bin_size, - N, - BH, - BW, - M, - H, - W, - K, - point_idxs.contiguous().data_ptr(), - zbuf.contiguous().data_ptr(), - pix_dists.contiguous().data_ptr()); - - AT_CUDA_CHECK(cudaGetLastError()); - return std::make_tuple(point_idxs, zbuf, pix_dists); -} - -// **************************************************************************** -// * BACKWARD PASS * -// **************************************************************************** -// TODO(T55115174) Add more documentation for backward kernel. -__global__ void RasterizePointsBackwardCudaKernel( - const float* points, // (P, 3) - const int32_t* idxs, // (N, H, W, K) - const int N, - const int P, - const int H, - const int W, - const int K, - const float* grad_zbuf, // (N, H, W, K) - const float* grad_dists, // (N, H, W, K) - float* grad_points) { // (P, 3) - // Parallelized over each of K points per pixel, for each pixel in images of - // size H * W, for each image in the batch of size N. - int num_threads = gridDim.x * blockDim.x; - int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < N * H * W * K; i += num_threads) { - // const int n = i / (H * W * K); // batch index (not needed). - const int yxk = i % (H * W * K); - const int yi = yxk / (W * K); - const int xk = yxk % (W * K); - const int xi = xk / K; - // k = xk % K (We don't actually need k, but this would be it.) - // Reverse ordering of X and Y axes. - const int yidx = H - 1 - yi; - const int xidx = W - 1 - xi; - - const float xf = PixToNonSquareNdc(xidx, W, H); - const float yf = PixToNonSquareNdc(yidx, H, W); - - const int p = idxs[i]; - if (p < 0) - continue; - const float grad_dist2 = grad_dists[i]; - const int p_ind = p * 3; // index into packed points tensor - const float px = points[p_ind + 0]; - const float py = points[p_ind + 1]; - const float dx = px - xf; - const float dy = py - yf; - const float grad_px = 2.0f * grad_dist2 * dx; - const float grad_py = 2.0f * grad_dist2 * dy; - const float grad_pz = grad_zbuf[i]; - atomicAdd(grad_points + p_ind + 0, grad_px); - atomicAdd(grad_points + p_ind + 1, grad_py); - atomicAdd(grad_points + p_ind + 2, grad_pz); - } -} - -at::Tensor RasterizePointsBackwardCuda( - const at::Tensor& points, // (N, P, 3) - const at::Tensor& idxs, // (N, H, W, K) - const at::Tensor& grad_zbuf, // (N, H, W, K) - const at::Tensor& grad_dists) { // (N, H, W, K) - - // Check inputs are on the same device - at::TensorArg points_t{points, "points", 1}, idxs_t{idxs, "idxs", 2}, - grad_zbuf_t{grad_zbuf, "grad_zbuf", 3}, - grad_dists_t{grad_dists, "grad_dists", 4}; - at::CheckedFrom c = "RasterizePointsBackwardCuda"; - at::checkAllSameGPU(c, {points_t, idxs_t, grad_zbuf_t, grad_dists_t}); - at::checkAllSameType(c, {points_t, grad_zbuf_t, grad_dists_t}); - // This is nondeterministic because atomicAdd - at::globalContext().alertNotDeterministic("RasterizePointsBackwardCuda"); - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int P = points.size(0); - const int N = idxs.size(0); - const int H = idxs.size(1); - const int W = idxs.size(2); - const int K = idxs.size(3); - - at::Tensor grad_points = at::zeros({P, 3}, points.options()); - - if (grad_points.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return grad_points; - } - - const size_t blocks = 1024; - const size_t threads = 64; - - RasterizePointsBackwardCudaKernel<<>>( - points.contiguous().data_ptr(), - idxs.contiguous().data_ptr(), - N, - P, - H, - W, - K, - grad_zbuf.contiguous().data_ptr(), - grad_dists.contiguous().data_ptr(), - grad_points.contiguous().data_ptr()); - - AT_CUDA_CHECK(cudaGetLastError()); - return grad_points; -} diff --git a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.h b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.h deleted file mode 100644 index 4e74e9e2b0600dccbcbe3e745e80f13a8149ca99..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points.h +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include -#include "rasterize_coarse/rasterize_coarse.h" -#include "utils/pytorch3d_cutils.h" - -// **************************************************************************** -// * NAIVE RASTERIZATION * -// **************************************************************************** - -std::tuple RasterizePointsNaiveCpu( - const torch::Tensor& points, - const torch::Tensor& cloud_to_packed_first_idx, - const torch::Tensor& num_points_per_cloud, - const std::tuple image_size, - const torch::Tensor& radius, - const int points_per_pixel); - -#ifdef WITH_CUDA -std::tuple -RasterizePointsNaiveCuda( - const torch::Tensor& points, - const torch::Tensor& cloud_to_packed_first_idx, - const torch::Tensor& num_points_per_cloud, - const std::tuple image_size, - const torch::Tensor& radius, - const int points_per_pixel); -#endif -// Naive (forward) pointcloud rasterization: For each pixel, for each point, -// check whether that point hits the pixel. -// -// Args: -// points: Tensor of shape (P, 3) giving (packed) positions for -// points in all N pointclouds in the batch where P is the total -// number of points in the batch across all pointclouds. These points -// are expected to be in NDC coordinates in the range [-1, 1]. -// cloud_to_packed_first_idx: LongTensor of shape (N) giving the index in -// points_packed of the first point in each pointcloud -// in the batch where N is the batch size. -// num_points_per_cloud: LongTensor of shape (N) giving the number of points -// for each pointcloud in the batch. -// image_size: Tuple (H, W) giving the size in pixels of the output -// image to be rasterized. -// radius: FloatTensor of shape (P) giving the radius (in NDC units) of -// each point in points. -// points_per_pixel: (K) The number closest of points to return for each pixel -// -// Returns: -// A 4 element tuple of: -// idxs: int32 Tensor of shape (N, S, S, K) giving the indices of the -// closest K points along the z-axis for each pixel, padded with -1 for -// pixels hit by fewer than K points. The indices refer to points in -// points packed i.e a tensor of shape (P, 3) representing the flattened -// points for all pointclouds in the batch. -// zbuf: float32 Tensor of shape (N, S, S, K) giving the depth of each -// closest point for each pixel. -// dists: float32 Tensor of shape (N, S, S, K) giving squared Euclidean -// distance in the (NDC) x/y plane between each pixel and its K closest -// points along the z axis. -std::tuple RasterizePointsNaive( - const torch::Tensor& points, - const torch::Tensor& cloud_to_packed_first_idx, - const torch::Tensor& num_points_per_cloud, - const std::tuple image_size, - const torch::Tensor& radius, - const int points_per_pixel) { - if (points.is_cuda() && cloud_to_packed_first_idx.is_cuda() && - num_points_per_cloud.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(cloud_to_packed_first_idx); - CHECK_CUDA(num_points_per_cloud); - CHECK_CUDA(radius); - return RasterizePointsNaiveCuda( - points, - cloud_to_packed_first_idx, - num_points_per_cloud, - image_size, - radius, - points_per_pixel); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return RasterizePointsNaiveCpu( - points, - cloud_to_packed_first_idx, - num_points_per_cloud, - image_size, - radius, - points_per_pixel); - } -} - -// **************************************************************************** -// * COARSE RASTERIZATION * -// **************************************************************************** - -// RasterizePointsCoarseCuda in rasterize_coarse/rasterize_coarse.h - -torch::Tensor RasterizePointsCoarseCpu( - const torch::Tensor& points, - const torch::Tensor& cloud_to_packed_first_idx, - const torch::Tensor& num_points_per_cloud, - const std::tuple image_size, - const torch::Tensor& radius, - const int bin_size, - const int max_points_per_bin); - -// Args: -// points: Tensor of shape (P, 3) giving (packed) positions for -// points in all N pointclouds in the batch where P is the total -// number of points in the batch across all pointclouds. These points -// are expected to be in NDC coordinates in the range [-1, 1]. -// cloud_to_packed_first_idx: LongTensor of shape (N) giving the index in -// points_packed of the first point in each pointcloud -// in the batch where N is the batch size. -// num_points_per_cloud: LongTensor of shape (N) giving the number of points -// for each pointcloud in the batch. -// image_size: Tuple (H, W) giving the size in pixels of the output -// image to be rasterized. -// radius: FloatTensor of shape (P) giving the radius (in NDC units) of -// each point in points. -// bin_size: Size of each bin within the image (in pixels) -// max_points_per_bin: The maximum number of points allowed to fall into each -// bin when using coarse-to-fine rasterization. -// -// Returns: -// points_per_bin: Tensor of shape (N, num_bins, num_bins) giving the number -// of points that fall in each bin -// bin_points: Tensor of shape (N, num_bins, num_bins, K) giving the indices -// of points that fall into each bin. -torch::Tensor RasterizePointsCoarse( - const torch::Tensor& points, - const torch::Tensor& cloud_to_packed_first_idx, - const torch::Tensor& num_points_per_cloud, - const std::tuple image_size, - const torch::Tensor& radius, - const int bin_size, - const int max_points_per_bin) { - if (points.is_cuda() && cloud_to_packed_first_idx.is_cuda() && - num_points_per_cloud.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(cloud_to_packed_first_idx); - CHECK_CUDA(num_points_per_cloud); - CHECK_CUDA(radius); - return RasterizePointsCoarseCuda( - points, - cloud_to_packed_first_idx, - num_points_per_cloud, - image_size, - radius, - bin_size, - max_points_per_bin); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return RasterizePointsCoarseCpu( - points, - cloud_to_packed_first_idx, - num_points_per_cloud, - image_size, - radius, - bin_size, - max_points_per_bin); - } -} - -// **************************************************************************** -// * FINE RASTERIZATION * -// **************************************************************************** - -#ifdef WITH_CUDA -std::tuple RasterizePointsFineCuda( - const torch::Tensor& points, - const torch::Tensor& bin_points, - const std::tuple image_size, - const torch::Tensor& radius, - const int bin_size, - const int points_per_pixel); -#endif -// Args: -// points: Tensor of shape (P, 3) giving (packed) positions for -// points in all N pointclouds in the batch where P is the total -// number of points in the batch across all pointclouds. These points -// are expected to be in NDC coordinates in the range [-1, 1]. -// bin_points: int32 Tensor of shape (N, B, B, M) giving the indices of points -// that fall into each bin (output from coarse rasterization) -// image_size: Tuple (H, W) giving the size in pixels of the output -// image to be rasterized. -// radius: FloatTensor of shape (P) giving the radius (in NDC units) of -// each point in points. -// bin_size: Size of each bin (in pixels) -// points_per_pixel: How many points to rasterize for each pixel -// -// Returns (same as rasterize_points): -// idxs: int32 Tensor of shape (N, S, S, K) giving the indices of the -// closest K points along the z-axis for each pixel, padded with -1 for -// pixels hit by fewer than K points. The indices refer to points in -// points packed i.e a tensor of shape (P, 3) representing the flattened -// points for all pointclouds in the batch. -// zbuf: float32 Tensor of shape (N, S, S, K) giving the depth of each of each -// closest point for each pixel -// dists: float32 Tensor of shape (N, S, S, K) giving squared Euclidean -// distance in the (NDC) x/y plane between each pixel and its K closest -// points along the z axis. -std::tuple RasterizePointsFine( - const torch::Tensor& points, - const torch::Tensor& bin_points, - const std::tuple image_size, - const torch::Tensor& radius, - const int bin_size, - const int points_per_pixel) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(bin_points); - return RasterizePointsFineCuda( - points, bin_points, image_size, radius, bin_size, points_per_pixel); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - AT_ERROR("NOT IMPLEMENTED"); - } -} - -// **************************************************************************** -// * BACKWARD PASS * -// **************************************************************************** - -torch::Tensor RasterizePointsBackwardCpu( - const torch::Tensor& points, - const torch::Tensor& idxs, - const torch::Tensor& grad_zbuf, - const torch::Tensor& grad_dists); - -#ifdef WITH_CUDA -torch::Tensor RasterizePointsBackwardCuda( - const torch::Tensor& points, - const torch::Tensor& idxs, - const torch::Tensor& grad_zbuf, - const torch::Tensor& grad_dists); -#endif -// Args: -// points: Tensor of shape (P, 3) giving (packed) positions for -// points in all N pointclouds in the batch where P is the total -// number of points in the batch across all pointclouds. These points -// are expected to be in NDC coordinates in the range [-1, 1]. -// idxs: int32 Tensor of shape (N, H, W, K) (from forward pass) -// grad_zbuf: float32 Tensor of shape (N, H, W, K) giving upstream gradient -// d(loss)/d(zbuf) of the distances from each pixel to its nearest -// points. -// grad_dists: Tensor of shape (N, H, W, K) giving upstream gradient -// d(loss)/d(dists) of the dists tensor returned by the forward -// pass. -// -// Returns: -// grad_points: float32 Tensor of shape (N, P, 3) giving downstream gradients -torch::Tensor RasterizePointsBackward( - const torch::Tensor& points, - const torch::Tensor& idxs, - const torch::Tensor& grad_zbuf, - const torch::Tensor& grad_dists) { - if (points.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(idxs); - CHECK_CUDA(grad_zbuf); - CHECK_CUDA(grad_dists); - return RasterizePointsBackwardCuda(points, idxs, grad_zbuf, grad_dists); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } else { - return RasterizePointsBackwardCpu(points, idxs, grad_zbuf, grad_dists); - } -} - -// **************************************************************************** -// * MAIN ENTRY POINT * -// **************************************************************************** - -// This is the main entry point for the forward pass of the point rasterizer; -// it uses either naive or coarse-to-fine rasterization based on bin_size. -// -// Args: -// points: Tensor of shape (P, 3) giving (packed) positions for -// points in all N pointclouds in the batch where P is the total -// number of points in the batch across all pointclouds. These points -// are expected to be in NDC coordinates in the range [-1, 1]. -// cloud_to_packed_first_idx: LongTensor of shape (N) giving the index in -// points_packed of the first point in each pointcloud -// in the batch where N is the batch size. -// num_points_per_cloud: LongTensor of shape (N) giving the number of points -// for each pointcloud in the batch. -// image_size: Tuple (H, W) giving the size in pixels of the output -// image to be rasterized. -// radius: FloatTensor of shape (P) giving the radius (in NDC units) of -// each point in points. -// points_per_pixel: (K) The number of points to return for each pixel -// bin_size: Bin size (in pixels) for coarse-to-fine rasterization. Setting -// bin_size=0 uses naive rasterization instead. -// max_points_per_bin: The maximum number of points allowed to fall into each -// bin when using coarse-to-fine rasterization. -// -// Returns: -// idxs: int32 Tensor of shape (N, S, S, K) giving the indices of the -// closest K points along the z-axis for each pixel, padded with -1 for -// pixels hit by fewer than K points. The indices refer to points in -// points packed i.e a tensor of shape (P, 3) representing the flattened -// points for all pointclouds in the batch. -// zbuf: float32 Tensor of shape (N, S, S, K) giving the depth of each of each -// closest point for each pixel -// dists: float32 Tensor of shape (N, S, S, K) giving squared Euclidean -// distance in the (NDC) x/y plane between each pixel and its K closest -// points along the z axis. -std::tuple RasterizePoints( - const torch::Tensor& points, - const torch::Tensor& cloud_to_packed_first_idx, - const torch::Tensor& num_points_per_cloud, - const std::tuple image_size, - const torch::Tensor& radius, - const int points_per_pixel, - const int bin_size, - const int max_points_per_bin) { - if (bin_size == 0) { - // Use the naive per-pixel implementation - return RasterizePointsNaive( - points, - cloud_to_packed_first_idx, - num_points_per_cloud, - image_size, - radius, - points_per_pixel); - } else { - // Use coarse-to-fine rasterization - const auto bin_points = RasterizePointsCoarse( - points, - cloud_to_packed_first_idx, - num_points_per_cloud, - image_size, - radius, - bin_size, - max_points_per_bin); - return RasterizePointsFine( - points, bin_points, image_size, radius, bin_size, points_per_pixel); - } -} diff --git a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points_cpu.cpp b/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points_cpu.cpp deleted file mode 100644 index 29da4c497b89b2fe3e2e2d5b2563adbbcd6ef326..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/rasterize_points/rasterize_points_cpu.cpp +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include "rasterization_utils.h" - -std::tuple RasterizePointsNaiveCpu( - const torch::Tensor& points, // (P, 3) - const torch::Tensor& cloud_to_packed_first_idx, // (N) - const torch::Tensor& num_points_per_cloud, // (N) - const std::tuple image_size, - const torch::Tensor& radius, - const int points_per_pixel) { - const int32_t N = cloud_to_packed_first_idx.size(0); // batch_size. - - const int H = std::get<0>(image_size); - const int W = std::get<1>(image_size); - const int K = points_per_pixel; - - // Initialize output tensors. - auto int_opts = num_points_per_cloud.options().dtype(torch::kInt32); - auto float_opts = points.options().dtype(torch::kFloat32); - torch::Tensor point_idxs = torch::full({N, H, W, K}, -1, int_opts); - torch::Tensor zbuf = torch::full({N, H, W, K}, -1, float_opts); - torch::Tensor pix_dists = torch::full({N, H, W, K}, -1, float_opts); - - auto points_a = points.accessor(); - auto point_idxs_a = point_idxs.accessor(); - auto zbuf_a = zbuf.accessor(); - auto pix_dists_a = pix_dists.accessor(); - auto radius_a = radius.accessor(); - - for (int n = 0; n < N; ++n) { - // Loop through each pointcloud in the batch. - // Get the start index of the points in points_packed and the num points - // in the point cloud. - const int point_start_idx = - cloud_to_packed_first_idx[n].item().to(); - const int point_stop_idx = - (point_start_idx + num_points_per_cloud[n].item().to()); - - for (int yi = 0; yi < H; ++yi) { - // Reverse the order of yi so that +Y is pointing upwards in the image. - const int yidx = H - 1 - yi; - const float yf = PixToNonSquareNdc(yidx, H, W); - - for (int xi = 0; xi < W; ++xi) { - // Reverse the order of xi so that +X is pointing to the left in the - // image. - const int xidx = W - 1 - xi; - const float xf = PixToNonSquareNdc(xidx, W, H); - - // Use a priority queue to hold (z, idx, r) - std::priority_queue> q; - for (int p = point_start_idx; p < point_stop_idx; ++p) { - const float px = points_a[p][0]; - const float py = points_a[p][1]; - const float pz = points_a[p][2]; - const float p_radius = radius_a[p]; - const float radius2 = p_radius * p_radius; - if (pz < 0) { - continue; - } - const float dx = px - xf; - const float dy = py - yf; - const float dist2 = dx * dx + dy * dy; - if (dist2 < radius2) { - // The current point hit the current pixel - q.emplace(pz, p, dist2); - if ((int)q.size() > K) { - q.pop(); - } - } - } - // Now all the points have been seen, so pop elements off the queue - // one by one and write them into the output tensors. - while (!q.empty()) { - auto t = q.top(); - q.pop(); - int i = q.size(); - zbuf_a[n][yi][xi][i] = std::get<0>(t); - point_idxs_a[n][yi][xi][i] = std::get<1>(t); - pix_dists_a[n][yi][xi][i] = std::get<2>(t); - } - } - } - } - return std::make_tuple(point_idxs, zbuf, pix_dists); -} - -torch::Tensor RasterizePointsCoarseCpu( - const torch::Tensor& points, // (P, 3) - const torch::Tensor& cloud_to_packed_first_idx, // (N) - const torch::Tensor& num_points_per_cloud, // (N) - const std::tuple image_size, - const torch::Tensor& radius, - const int bin_size, - const int max_points_per_bin) { - const int32_t N = cloud_to_packed_first_idx.size(0); // batch_size. - const int M = max_points_per_bin; - - const float H = std::get<0>(image_size); - const float W = std::get<1>(image_size); - - // Integer division round up. - const int BH = 1 + (H - 1) / bin_size; - const int BW = 1 + (W - 1) / bin_size; - - auto opts = num_points_per_cloud.options().dtype(torch::kInt32); - torch::Tensor points_per_bin = torch::zeros({N, BH, BW}, opts); - torch::Tensor bin_points = torch::full({N, BH, BW, M}, -1, opts); - - auto points_a = points.accessor(); - auto points_per_bin_a = points_per_bin.accessor(); - auto bin_points_a = bin_points.accessor(); - auto radius_a = radius.accessor(); - - const float ndc_x_range = NonSquareNdcRange(W, H); - const float pixel_width_x = ndc_x_range / W; - const float bin_width_x = pixel_width_x * bin_size; - - const float ndc_y_range = NonSquareNdcRange(H, W); - const float pixel_width_y = ndc_y_range / H; - const float bin_width_y = pixel_width_y * bin_size; - - for (int n = 0; n < N; ++n) { - // Loop through each pointcloud in the batch. - // Get the start index of the points in points_packed and the num points - // in the point cloud. - const int point_start_idx = - cloud_to_packed_first_idx[n].item().to(); - const int point_stop_idx = - (point_start_idx + num_points_per_cloud[n].item().to()); - - float bin_y_min = -1.0f; - float bin_y_max = bin_y_min + bin_width_y; - - // Iterate through the horizontal bins from top to bottom. - for (int by = 0; by < BH; by++) { - float bin_x_min = -1.0f; - float bin_x_max = bin_x_min + bin_width_x; - - // Iterate through bins on this horizontal line, left to right. - for (int bx = 0; bx < BW; bx++) { - int32_t points_hit = 0; - for (int p = point_start_idx; p < point_stop_idx; ++p) { - float px = points_a[p][0]; - float py = points_a[p][1]; - float pz = points_a[p][2]; - const float p_radius = radius_a[p]; - if (pz < 0) { - continue; - } - float point_x_min = px - p_radius; - float point_x_max = px + p_radius; - float point_y_min = py - p_radius; - float point_y_max = py + p_radius; - - // Use a half-open interval so that points exactly on the - // boundary between bins will fall into exactly one bin. - bool x_hit = (point_x_min <= bin_x_max) && (bin_x_min <= point_x_max); - bool y_hit = (point_y_min <= bin_y_max) && (bin_y_min <= point_y_max); - if (x_hit && y_hit) { - // Got too many points for this bin, so throw an error. - if (points_hit >= max_points_per_bin) { - AT_ERROR("Got too many points per bin"); - } - // The current point falls in the current bin, so - // record it. - bin_points_a[n][by][bx][points_hit] = p; - points_hit++; - } - } - // Record the number of points found in this bin - points_per_bin_a[n][by][bx] = points_hit; - - // Shift the bin to the right for the next loop iteration - bin_x_min = bin_x_max; - bin_x_max = bin_x_min + bin_width_x; - } - // Shift the bin down for the next loop iteration - bin_y_min = bin_y_max; - bin_y_max = bin_y_min + bin_width_y; - } - } - return bin_points; -} - -torch::Tensor RasterizePointsBackwardCpu( - const torch::Tensor& points, // (P, 3) - const torch::Tensor& idxs, // (N, H, W, K) - const torch::Tensor& grad_zbuf, // (N, H, W, K) - const torch::Tensor& grad_dists) { // (N, H, W, K) - - const int N = idxs.size(0); - const int P = points.size(0); - const int H = idxs.size(1); - const int W = idxs.size(2); - const int K = idxs.size(3); - - torch::Tensor grad_points = torch::zeros({P, 3}, points.options()); - - auto points_a = points.accessor(); - auto idxs_a = idxs.accessor(); - auto grad_dists_a = grad_dists.accessor(); - auto grad_zbuf_a = grad_zbuf.accessor(); - auto grad_points_a = grad_points.accessor(); - - for (int n = 0; n < N; ++n) { // Loop over images in the batch - for (int y = 0; y < H; ++y) { // Loop over rows in the image - // Reverse the order of yi so that +Y is pointing upwards in the image. - const int yidx = H - 1 - y; - // Y coordinate of the top of the pixel. - const float yf = PixToNonSquareNdc(yidx, H, W); - - // Iterate through pixels on this horizontal line, left to right. - for (int x = 0; x < W; ++x) { // Loop over pixels in the row - - // Reverse the order of xi so that +X is pointing to the left in the - // image. - const int xidx = W - 1 - x; - const float xf = PixToNonSquareNdc(xidx, W, H); - for (int k = 0; k < K; ++k) { // Loop over points for the pixel - const int p = idxs_a[n][y][x][k]; - if (p < 0) { - break; - } - const float grad_dist2 = grad_dists_a[n][y][x][k]; - const float px = points_a[p][0]; - const float py = points_a[p][1]; - const float dx = px - xf; - const float dy = py - yf; - // Remember: dists[n][y][x][k] = dx * dx + dy * dy; - const float grad_px = 2.0f * grad_dist2 * dx; - const float grad_py = 2.0f * grad_dist2 * dy; - grad_points_a[p][0] += grad_px; - grad_points_a[p][1] += grad_py; - grad_points_a[p][2] += grad_zbuf_a[n][y][x][k]; - } - } - } - } - return grad_points; -} diff --git a/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu b/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu deleted file mode 100644 index 70cef75c7b4bd57cdf4615f415786bbefc288a57..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include -#include -#include -#include "utils/warp_reduce.cuh" - -template -__global__ void FarthestPointSamplingKernel( - // clang-format off - const at::PackedTensorAccessor64 points, - const at::PackedTensorAccessor64 lengths, - const at::PackedTensorAccessor64 K, - at::PackedTensorAccessor64 idxs, - at::PackedTensorAccessor64 min_point_dist, - const at::PackedTensorAccessor64 start_idxs - // clang-format on -) { - typedef cub::BlockReduce< - cub::KeyValuePair, - block_size, - cub::BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY> - BlockReduce; - __shared__ typename BlockReduce::TempStorage temp_storage; - __shared__ int64_t selected_store; - - // Get constants - const int64_t N = points.size(0); - const int64_t P = points.size(1); - const int64_t D = points.size(2); - - // Get batch index and thread index - const int64_t batch_idx = blockIdx.x; - const size_t tid = threadIdx.x; - - // If K is greater than the number of points in the pointcloud - // we only need to iterate until the smaller value is reached. - const int64_t k_n = min(K[batch_idx], lengths[batch_idx]); - - // Write the first selected point to global memory in the first thread - int64_t selected = start_idxs[batch_idx]; - if (tid == 0) - idxs[batch_idx][0] = selected; - - // Iterate to find k_n sampled points - for (int64_t k = 1; k < k_n; ++k) { - // Keep track of the maximum of the minimum distance to previously selected - // points seen by this thread - int64_t max_dist_idx = 0; - float max_dist = -1.0; - - // Iterate through all the points in this pointcloud. For already selected - // points, the minimum distance to the set of previously selected points - // will be 0.0 so they won't be selected again. - for (int64_t p = tid; p < lengths[batch_idx]; p += block_size) { - // Calculate the distance to the last selected point - float dist2 = 0.0; - for (int64_t d = 0; d < D; ++d) { - float diff = points[batch_idx][selected][d] - points[batch_idx][p][d]; - dist2 += (diff * diff); - } - - // If the distance of point p to the last selected point is - // less than the previous minimum distance of p to the set of selected - // points, then updated the corresponding value in min_point_dist - // so it always contains the min distance. - const float p_min_dist = min(dist2, min_point_dist[batch_idx][p]); - min_point_dist[batch_idx][p] = p_min_dist; - - // Update the max distance and point idx for this thread. - max_dist_idx = (p_min_dist > max_dist) ? p : max_dist_idx; - max_dist = (p_min_dist > max_dist) ? p_min_dist : max_dist; - } - - // max_dist, max_dist_idx are now the max point and idx seen by this thread. - // Now find the index corresponding to the maximum distance seen by any - // thread. (This value is only on thread 0.) - selected = - BlockReduce(temp_storage) - .Reduce( - cub::KeyValuePair(max_dist_idx, max_dist), - cub::ArgMax(), - block_size) - .key; - - if (tid == 0) { - // Write the farthest point for iteration k to global memory - idxs[batch_idx][k] = selected; - selected_store = selected; - } - - // Ensure `selected` in all threads equals the global maximum. - __syncthreads(); - selected = selected_store; - } -} - -at::Tensor FarthestPointSamplingCuda( - const at::Tensor& points, // (N, P, 3) - const at::Tensor& lengths, // (N,) - const at::Tensor& K, // (N,) - const at::Tensor& start_idxs) { - // Check inputs are on the same device - at::TensorArg p_t{points, "points", 1}, lengths_t{lengths, "lengths", 2}, - k_t{K, "K", 3}, start_idxs_t{start_idxs, "start_idxs", 4}; - at::CheckedFrom c = "FarthestPointSamplingCuda"; - at::checkAllSameGPU(c, {p_t, lengths_t, k_t, start_idxs_t}); - at::checkAllSameType(c, {lengths_t, k_t, start_idxs_t}); - - // Set the device for the kernel launch based on the device of points - at::cuda::CUDAGuard device_guard(points.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - TORCH_CHECK( - points.size(0) == lengths.size(0), - "Point and lengths must have the same batch dimension"); - - TORCH_CHECK( - points.size(0) == K.size(0), - "Points and K must have the same batch dimension"); - - const int64_t N = points.size(0); - const int64_t P = points.size(1); - const int64_t max_K = at::max(K).item(); - - // Initialize the output tensor with the sampled indices - auto idxs = at::full({N, max_K}, -1, lengths.options()); - auto min_point_dist = at::full({N, P}, 1e10, points.options()); - - if (N == 0 || P == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return idxs; - } - - // Set the number of blocks to the batch size so that the - // block reduction step can be done for each pointcloud - // to find the max distance point in the pointcloud at each iteration. - const size_t blocks = N; - - // Set the threads to the nearest power of 2 of the number of - // points in the pointcloud (up to the max threads in a block). - // This will ensure each thread processes the minimum necessary number of - // points (P/threads). - const int points_pow_2 = std::log(static_cast(P)) / std::log(2.0); - - // Max possible threads per block - const int MAX_THREADS_PER_BLOCK = 1024; - const size_t threads = max(min(1 << points_pow_2, MAX_THREADS_PER_BLOCK), 2); - - // Create the accessors - auto points_a = points.packed_accessor64(); - auto lengths_a = - lengths.packed_accessor64(); - auto K_a = K.packed_accessor64(); - auto idxs_a = idxs.packed_accessor64(); - auto start_idxs_a = - start_idxs.packed_accessor64(); - auto min_point_dist_a = - min_point_dist.packed_accessor64(); - - // TempStorage for the reduction uses static shared memory only. - size_t shared_mem = 0; - - // Support a case for all powers of 2 up to MAX_THREADS_PER_BLOCK possible per - // block. - switch (threads) { - case 1024: - FarthestPointSamplingKernel<1024> - <<>>( - points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a); - break; - case 512: - FarthestPointSamplingKernel<512><<>>( - points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a); - break; - case 256: - FarthestPointSamplingKernel<256><<>>( - points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a); - break; - case 128: - FarthestPointSamplingKernel<128><<>>( - points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a); - break; - case 64: - FarthestPointSamplingKernel<64><<>>( - points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a); - break; - case 32: - FarthestPointSamplingKernel<32><<>>( - points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a); - break; - case 16: - FarthestPointSamplingKernel<16><<>>( - points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a); - break; - case 8: - FarthestPointSamplingKernel<8><<>>( - points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a); - break; - case 4: - FarthestPointSamplingKernel<4><<>>( - points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a); - break; - case 2: - FarthestPointSamplingKernel<2><<>>( - points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a); - break; - default: - FarthestPointSamplingKernel<1024> - <<>>( - points_a, lengths_a, K_a, idxs_a, min_point_dist_a, start_idxs_a); - } - - AT_CUDA_CHECK(cudaGetLastError()); - return idxs; -} diff --git a/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h b/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h deleted file mode 100644 index 7b613d358880936863c2a56b82dee77d93d777f9..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include "utils/pytorch3d_cutils.h" - -// Iterative farthest point sampling algorithm [1] to subsample a set of -// K points from a given pointcloud. At each iteration, a point is selected -// which has the largest nearest neighbor distance to any of the -// already selected points. - -// Farthest point sampling provides more uniform coverage of the input -// point cloud compared to uniform random sampling. - -// [1] Charles R. Qi et al, "PointNet++: Deep Hierarchical Feature Learning -// on Point Sets in a Metric Space", NeurIPS 2017. - -// Args: -// points: (N, P, D) float32 Tensor containing the batch of pointclouds. -// lengths: (N,) long Tensor giving the number of points in each pointcloud -// (to support heterogeneous batches of pointclouds). -// K: a tensor of length (N,) giving the number of -// samples to select for each element in the batch. -// The number of samples is typically << P. -// start_idxs: (N,) long Tensor giving the index of the first point to -// sample. Default is all 0. When a random start point is required, -// start_idxs should be set to a random value between [0, lengths[n]] -// for batch element n. -// Returns: -// selected_indices: (N, K) array of selected indices. If the values in -// K are not all the same, then the shape will be (N, max(K), D), and -// padded with -1 for batch elements where k_i < max(K). The selected -// points are gathered in the pytorch autograd wrapper. - -at::Tensor FarthestPointSamplingCuda( - const at::Tensor& points, - const at::Tensor& lengths, - const at::Tensor& K, - const at::Tensor& start_idxs); - -at::Tensor FarthestPointSamplingCpu( - const at::Tensor& points, - const at::Tensor& lengths, - const at::Tensor& K, - const at::Tensor& start_idxs); - -// Exposed implementation. -at::Tensor FarthestPointSampling( - const at::Tensor& points, - const at::Tensor& lengths, - const at::Tensor& K, - const at::Tensor& start_idxs) { - if (points.is_cuda() || lengths.is_cuda() || K.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(points); - CHECK_CUDA(lengths); - CHECK_CUDA(K); - CHECK_CUDA(start_idxs); - return FarthestPointSamplingCuda(points, lengths, K, start_idxs); -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - return FarthestPointSamplingCpu(points, lengths, K, start_idxs); -} diff --git a/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points_cpu.cpp b/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points_cpu.cpp deleted file mode 100644 index cd533825f4da75e232bc493c28a0872e477d6db7..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/sample_farthest_points/sample_farthest_points_cpu.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include - -at::Tensor FarthestPointSamplingCpu( - const at::Tensor& points, - const at::Tensor& lengths, - const at::Tensor& K, - const at::Tensor& start_idxs) { - // Get constants - const int64_t N = points.size(0); - const int64_t P = points.size(1); - const int64_t D = points.size(2); - const int64_t max_K = torch::max(K).item(); - - // Initialize an output array for the sampled indices - // of shape (N, max_K) - auto long_opts = lengths.options(); - torch::Tensor sampled_indices = torch::full({N, max_K}, -1, long_opts); - - // Create accessors for all tensors - auto points_a = points.accessor(); - auto lengths_a = lengths.accessor(); - auto k_a = K.accessor(); - auto sampled_indices_a = sampled_indices.accessor(); - auto start_idxs_a = start_idxs.accessor(); - - // Initialize a mask to prevent duplicates - // If true, the point has already been selected. - std::vector selected_points_mask(P, false); - - // Initialize to infinity a vector of - // distances from each point to any of the previously selected points - std::vector dists(P, std::numeric_limits::max()); - - for (int64_t n = 0; n < N; ++n) { - // Resize and reset points mask and distances for each batch - selected_points_mask.resize(lengths_a[n]); - dists.resize(lengths_a[n]); - std::fill(selected_points_mask.begin(), selected_points_mask.end(), false); - std::fill(dists.begin(), dists.end(), std::numeric_limits::max()); - - // Get the starting point index and save it - int64_t last_idx = start_idxs_a[n]; - sampled_indices_a[n][0] = last_idx; - - // Set the value of the mask at this point to false - selected_points_mask[last_idx] = true; - - // For heterogeneous pointclouds, use the minimum of the - // length for that cloud compared to K as the number of - // points to sample - const int64_t batch_k = std::min(lengths_a[n], k_a[n]); - - // Iteratively select batch_k points per batch - for (int64_t k = 1; k < batch_k; ++k) { - // Iterate through all the points - for (int64_t p = 0; p < lengths_a[n]; ++p) { - if (selected_points_mask[p]) { - // For already selected points set the distance to 0.0 - dists[p] = 0.0; - continue; - } - - // Calculate the distance to the last selected point - float dist2 = 0.0; - for (int64_t d = 0; d < D; ++d) { - float diff = points_a[n][last_idx][d] - points_a[n][p][d]; - dist2 += diff * diff; - } - - // If the distance of this point to the last selected point is closer - // than the distance to any of the previously selected points, then - // update this distance - if (dist2 < dists[p]) { - dists[p] = dist2; - } - } - - // The aim is to pick the point that has the largest - // nearest neighbour distance to any of the already selected points - auto itr = std::max_element(dists.begin(), dists.end()); - last_idx = std::distance(dists.begin(), itr); - - // Save selected point - sampled_indices_a[n][k] = last_idx; - - // Set the mask value to true to prevent duplicates. - selected_points_mask[last_idx] = true; - } - } - - return sampled_indices; -} diff --git a/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.cu b/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.cu deleted file mode 100644 index 885313ac3c8aaba173c4691a1189ea43ddd39f1e..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.cu +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -// There is no intermediate memory, so no reason not to have blocksize=32. -// 256 is a reasonable number of blocks. - -// DESIGN -// We exploit the fact that n_samples is not tiny. -// A chunk of work is T*blocksize many samples from -// a single batch elememt. -// For each batch element there will be -// chunks_per_batch = 1 + (n_samples-1)/(T*blocksize) of them. -// The number of potential chunks to do is -// n_chunks = chunks_per_batch * n_batches. -// These chunks are divided among the gridSize-many blocks. -// In block b, we work on chunks b, b+gridSize, b+2*gridSize etc . -// In chunk i, we work on batch_element i/chunks_per_batch -// on samples starting from (i%chunks_per_batch) * (T*blocksize) - -// BEGIN HYPOTHETICAL -// Another option (not implemented) if batch_size was always large -// would be as follows. - -// A chunk of work is S samples from each of blocksize-many -// batch elements. -// For each batch element there will be -// chunks_per_batch = (1+(n_samples-1)/S) of them. -// The number of potential chunks to do is -// n_chunks = chunks_per_batch * (1+(n_batches-1)/blocksize) -// These chunks are divided among the gridSize-many blocks. -// In block b, we work on chunks b, b+gridSize, b+2*gridSize etc . -// In chunk i, we work on samples starting from S*(i%chunks_per_batch) -// on batch elements starting from blocksize*(i/chunks_per_batch). -// END HYPOTHETICAL - -__global__ void SamplePdfCudaKernel( - const float* __restrict__ bins, - const float* __restrict__ weights, - float* __restrict__ outputs, - float eps, - const int T, - const int64_t batch_size, - const int64_t n_bins, - const int64_t n_samples) { - const int64_t chunks_per_batch = 1 + (n_samples - 1) / (T * blockDim.x); - const int64_t n_chunks = chunks_per_batch * batch_size; - - for (int64_t i_chunk = blockIdx.x; i_chunk < n_chunks; i_chunk += gridDim.x) { - // Loop over the chunks. - int64_t i_batch_element = i_chunk / chunks_per_batch; - int64_t sample_start = (i_chunk % chunks_per_batch) * (T * blockDim.x); - const float* const weight_startp = weights + n_bins * i_batch_element; - const float* const bin_startp = bins + (1 + n_bins) * i_batch_element; - - // Each chunk looks at a single batch element, so we do the preprocessing - // which depends on the batch element, namely finding the total weight. - // Idenntical work is being done in sync here by every thread of the block. - float total_weight = eps; - for (int64_t i_bin = 0; i_bin < n_bins; ++i_bin) { - total_weight += weight_startp[i_bin]; - } - - float* const output_startp = - outputs + n_samples * i_batch_element + sample_start; - - for (int t = 0; t < T; ++t) { - // Loop over T, which is the number of samples each thread makes within - // the chunk. - const int64_t i_sample_within_chunk = threadIdx.x + t * blockDim.x; - if (sample_start + i_sample_within_chunk >= n_samples) { - // Some threads need to exit early because the sample they would - // make is unwanted. - continue; - } - // output_startp[i_sample_within_chunk] contains the quantile we (i.e. - // this thread) are calcvulating. - float uniform = total_weight * output_startp[i_sample_within_chunk]; - int64_t i_bin = 0; - // We find the bin containing the quantile by walking along the weights. - // This loop must be thread dependent. I.e. the whole warp will wait until - // every thread has found the bin for its quantile. - // It may be best to write it differently. - while (i_bin + 1 < n_bins && uniform > weight_startp[i_bin]) { - uniform -= weight_startp[i_bin]; - ++i_bin; - } - - // Now we know which bin to look in, we use linear interpolation - // to find the location of the quantile within the bin, and - // write the answer back. - float bin_start = bin_startp[i_bin]; - float bin_end = bin_startp[i_bin + 1]; - float bin_weight = weight_startp[i_bin]; - float output_value = bin_start; - if (uniform > bin_weight) { - output_value = bin_end; - } else if (bin_weight > eps) { - output_value += (uniform / bin_weight) * (bin_end - bin_start); - } - output_startp[i_sample_within_chunk] = output_value; - } - } -} - -void SamplePdfCuda( - const at::Tensor& bins, - const at::Tensor& weights, - const at::Tensor& outputs, - float eps) { - // Check inputs are on the same device - at::TensorArg bins_t{bins, "bins", 1}, weights_t{weights, "weights", 2}, - outputs_t{outputs, "outputs", 3}; - at::CheckedFrom c = "SamplePdfCuda"; - at::checkAllSameGPU(c, {bins_t, weights_t, outputs_t}); - at::checkAllSameType(c, {bins_t, weights_t, outputs_t}); - - // Set the device for the kernel launch based on the device of the input - at::cuda::CUDAGuard device_guard(bins.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - const int64_t batch_size = bins.size(0); - const int64_t n_bins = weights.size(1); - const int64_t n_samples = outputs.size(1); - - const int64_t threads = 32; - const int64_t T = n_samples <= threads ? 1 : 2; - const int64_t chunks_per_batch = 1 + (n_samples - 1) / (T * threads); - const int64_t n_chunks = chunks_per_batch * batch_size; - - const int64_t max_blocks = 1024; - const int64_t blocks = n_chunks < max_blocks ? n_chunks : max_blocks; - - SamplePdfCudaKernel<<>>( - bins.contiguous().data_ptr(), - weights.contiguous().data_ptr(), - outputs.data_ptr(), // Checked contiguous in header file. - eps, - T, - batch_size, - n_bins, - n_samples); - - AT_CUDA_CHECK(cudaGetLastError()); -} diff --git a/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.h b/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.h deleted file mode 100644 index 899117df797cf03c3c207e6205d1607dd3707f3d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include -#include -#include -#include -#include "utils/pytorch3d_cutils.h" - -// **************************************************************************** -// * SamplePdf * -// **************************************************************************** - -// Samples a probability density functions defined by bin edges `bins` and -// the non-negative per-bin probabilities `weights`. - -// Args: -// bins: FloatTensor of shape `(batch_size, n_bins+1)` denoting the edges -// of the sampling bins. - -// weights: FloatTensor of shape `(batch_size, n_bins)` containing -// non-negative numbers representing the probability of sampling the -// corresponding bin. - -// uniforms: The quantiles to draw, FloatTensor of shape -// `(batch_size, n_samples)`. - -// outputs: On call, this contains the quantiles to draw. It is overwritten -// with the drawn samples. FloatTensor of shape -// `(batch_size, n_samples), where `n_samples are drawn from each -// distribution. - -// eps: A constant preventing division by zero in case empty bins are -// present. - -// Not differentiable - -#ifdef WITH_CUDA -void SamplePdfCuda( - const torch::Tensor& bins, - const torch::Tensor& weights, - const torch::Tensor& outputs, - float eps); -#endif - -void SamplePdfCpu( - const torch::Tensor& bins, - const torch::Tensor& weights, - const torch::Tensor& outputs, - float eps); - -inline void SamplePdf( - const torch::Tensor& bins, - const torch::Tensor& weights, - const torch::Tensor& outputs, - float eps) { - if (bins.is_cuda()) { -#ifdef WITH_CUDA - CHECK_CUDA(weights); - CHECK_CONTIGUOUS_CUDA(outputs); - torch::autograd::increment_version(outputs); - SamplePdfCuda(bins, weights, outputs, eps); - return; -#else - AT_ERROR("Not compiled with GPU support."); -#endif - } - CHECK_CONTIGUOUS(outputs); - SamplePdfCpu(bins, weights, outputs, eps); -} diff --git a/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf_cpu.cpp b/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf_cpu.cpp deleted file mode 100644 index 272197c6e9d5f8ee19153004310fb0fd8e10b94b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/sample_pdf/sample_pdf_cpu.cpp +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include - -// If the number of bins is the typical 64, it is -// quicker to use binary search than linear scan. -// With more bins, it is more important. -// There is no equivalent CUDA implementation yet. -#define USE_BINARY_SEARCH - -namespace { -// This worker function does the job of SamplePdf but only on -// batch elements in [start_batch, end_batch). -void SamplePdfCpu_worker( - const torch::Tensor& bins, - const torch::Tensor& weights, - const torch::Tensor& outputs, - float eps, - int64_t start_batch, - int64_t end_batch) { - const int64_t n_bins = weights.size(1); - const int64_t n_samples = outputs.size(1); - - auto bins_a = bins.accessor(); - auto weights_a = weights.accessor(); - float* output_p = outputs.data_ptr() + start_batch * n_samples; - -#ifdef USE_BINARY_SEARCH - std::vector partial_sums(n_bins); -#endif - - for (int64_t i_batch_elt = start_batch; i_batch_elt < end_batch; - ++i_batch_elt) { - auto bin_a = bins_a[i_batch_elt]; - auto weight_a = weights_a[i_batch_elt]; - - // Here we do the work which has to be done once per batch element. - // i.e. (1) finding the total weight. (2) If using binary search, - // precompute the partial sums of the weights. - - float total_weight = 0; - for (int64_t i_bin = 0; i_bin < n_bins; ++i_bin) { - total_weight += weight_a[i_bin]; -#ifdef USE_BINARY_SEARCH - partial_sums[i_bin] = total_weight; -#endif - } - total_weight += eps; - - for (int64_t i_sample = 0; i_sample < n_samples; ++i_sample) { - // Here we are taking a single random quantile (which is stored - // in *output_p) and using it to make a single sample, which we - // write back to the same location. First we find which bin - // the quantile lives in, either by binary search in the - // precomputed partial sums, or by scanning through the weights. - - float uniform = total_weight * *output_p; -#ifdef USE_BINARY_SEARCH - int64_t i_bin = std::lower_bound( - partial_sums.begin(), --partial_sums.end(), uniform) - - partial_sums.begin(); - if (i_bin > 0) { - uniform -= partial_sums[i_bin - 1]; - } -#else - int64_t i_bin = 0; - while (i_bin + 1 < n_bins && uniform > weight_a[i_bin]) { - uniform -= weight_a[i_bin]; - ++i_bin; - } -#endif - - // Now i_bin identifies the bin the quantile lives in, we use - // straight line interpolation to find the position of the - // quantile within the bin, and write it to *output_p. - - float bin_start = bin_a[i_bin]; - float bin_end = bin_a[i_bin + 1]; - float bin_weight = weight_a[i_bin]; - float output_value = bin_start; - if (uniform > bin_weight) { - output_value = bin_end; - } else if (bin_weight > eps) { - output_value += (uniform / bin_weight) * (bin_end - bin_start); - } - *output_p = output_value; - ++output_p; - } - } -} - -} // anonymous namespace - -void SamplePdfCpu( - const torch::Tensor& bins, - const torch::Tensor& weights, - const torch::Tensor& outputs, - float eps) { - const int64_t batch_size = bins.size(0); - const int64_t max_threads = std::min(4, at::get_num_threads()); - const int64_t n_threads = std::min(max_threads, batch_size); - if (batch_size == 0) { - return; - } - - // SamplePdfCpu_worker does the work of this function. We send separate ranges - // of batch elements to that function in nThreads-1 separate threads. - - std::vector threads; - threads.reserve(n_threads - 1); - const int64_t batch_elements_per_thread = 1 + (batch_size - 1) / n_threads; - int64_t start_batch = 0; - for (int iThread = 0; iThread < n_threads - 1; ++iThread) { - threads.emplace_back( - SamplePdfCpu_worker, - bins, - weights, - outputs, - eps, - start_batch, - start_batch + batch_elements_per_thread); - start_batch += batch_elements_per_thread; - } - - // The remaining batch elements are calculated in this threads. If nThreads is - // 1 then all the work happens in this line. - SamplePdfCpu_worker(bins, weights, outputs, eps, start_batch, batch_size); - for (auto&& thread : threads) { - thread.join(); - } - torch::autograd::increment_version(outputs); -} diff --git a/pytorch3d/pytorch3d/csrc/utils/dispatch.cuh b/pytorch3d/pytorch3d/csrc/utils/dispatch.cuh deleted file mode 100644 index 83f3d69ff40907c396e3d175402d5cf4561142b5..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/utils/dispatch.cuh +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -// This file provides utilities for dispatching to specialized versions of -// functions. This is especially useful for CUDA kernels, since specializing -// them to particular input sizes can often allow the compiler to unroll loops -// and place arrays into registers, which can give huge performance speedups. -// -// As an example, suppose we have the following function which is specialized -// based on a compile-time int64_t value: -// -// template -// struct SquareOffset { -// static void run(T y) { -// T val = x * x + y; -// std::cout << val << std::endl; -// } -// } -// -// This function takes one compile-time argument x, and one run-time argument y. -// We might want to compile specialized versions of this for x=0, x=1, etc and -// then dispatch to the correct one based on the runtime value of x. -// One simple way to achieve this is with a lookup table: -// -// template -// void DispatchSquareOffset(const int64_t x, T y) { -// if (x == 0) { -// SquareOffset::run(y); -// } else if (x == 1) { -// SquareOffset::run(y); -// } else if (x == 2) { -// SquareOffset::run(y); -// } -// } -// -// This function takes both x and y as run-time arguments, and dispatches to -// different specialized versions of SquareOffset based on the run-time value -// of x. This works, but it's tedious and error-prone. If we want to change the -// set of x values for which we provide compile-time specializations, then we -// will need to do a lot of tedius editing of the dispatch function. Also, if we -// want to provide compile-time specializations for another function other than -// SquareOffset, we will need to duplicate the entire lookup table. -// -// To solve these problems, we can use the DispatchKernel1D function provided by -// this file instead: -// -// template -// void DispatchSquareOffset(const int64_t x, T y) { -// constexpr int64_t xmin = 0; -// constexpr int64_t xmax = 2; -// DispatchKernel1D(x, y); -// } -// -// DispatchKernel1D uses template metaprogramming to compile specialized -// versions of SquareOffset for all values of x with xmin <= x <= xmax, and -// then dispatches to the correct one based on the run-time value of x. If we -// want to change the range of x values for which SquareOffset is specialized -// at compile-time, then all we have to do is change the values of the -// compile-time constants xmin and xmax. -// -// This file also allows us to similarly dispatch functions that depend on two -// compile-time int64_t values, using the DispatchKernel2D function like this: -// -// template -// struct Sum { -// static void run(T z, T w) { -// T val = x + y + z + w; -// std::cout << val << std::endl; -// } -// } -// -// template -// void DispatchSum(const int64_t x, const int64_t y, int z, int w) { -// constexpr int64_t xmin = 1; -// constexpr int64_t xmax = 3; -// constexpr int64_t ymin = 2; -// constexpr int64_t ymax = 5; -// DispatchKernel2D(x, y, z, w); -// } -// -// Like its 1D counterpart, DispatchKernel2D uses template metaprogramming to -// compile specialized versions of sum for all values of (x, y) with -// xmin <= x <= xmax and ymin <= y <= ymax, then dispatches to the correct -// specialized version based on the runtime values of x and y. - -// Define some helper structs in an anonymous namespace. -namespace { - -// 1D dispatch: general case. -// Kernel is the function we want to dispatch to; it should take a typename and -// an int64_t as template args, and it should define a static void function -// run which takes any number of arguments of any type. -// In order to dispatch, we will take an additional template argument curN, -// and increment it via template recursion until it is equal to the run-time -// argument N. -template < - template - class Kernel, - typename T, - int64_t minN, - int64_t maxN, - int64_t curN, - typename... Args> -struct DispatchKernelHelper1D { - static void run(const int64_t N, Args... args) { - if (curN == N) { - // The compile-time value curN is equal to the run-time value N, so we - // can dispatch to the run method of the Kernel. - Kernel::run(args...); - } else if (curN < N) { - // Increment curN via template recursion - DispatchKernelHelper1D::run( - N, args...); - } - // We shouldn't get here -- throw an error? - } -}; - -// 1D dispatch: Specialization when curN == maxN -// We need this base case to avoid infinite template recursion. -template < - template - class Kernel, - typename T, - int64_t minN, - int64_t maxN, - typename... Args> -struct DispatchKernelHelper1D { - static void run(const int64_t N, Args... args) { - if (N == maxN) { - Kernel::run(args...); - } - // We shouldn't get here -- throw an error? - } -}; - -// 2D dispatch, general case. -// This is similar to the 1D case: we take additional template args curN and -// curM, and increment them via template recursion until they are equal to -// the run-time values of N and M, at which point we dispatch to the run -// method of the kernel. -template < - template - class Kernel, - typename T, - int64_t minN, - int64_t maxN, - int64_t curN, - int64_t minM, - int64_t maxM, - int64_t curM, - typename... Args> -struct DispatchKernelHelper2D { - static void run(const int64_t N, const int64_t M, Args... args) { - if (curN == N && curM == M) { - Kernel::run(args...); - } else if (curN < N && curM < M) { - // Increment both curN and curM. This isn't strictly necessary; we could - // just increment one or the other at each step. But this helps to cut - // on the number of recursive calls we make. - DispatchKernelHelper2D< - Kernel, - T, - minN, - maxN, - curN + 1, - minM, - maxM, - curM + 1, - Args...>::run(N, M, args...); - } else if (curN < N) { - // Increment curN only - DispatchKernelHelper2D< - Kernel, - T, - minN, - maxN, - curN + 1, - minM, - maxM, - curM, - Args...>::run(N, M, args...); - } else if (curM < M) { - // Increment curM only - DispatchKernelHelper2D< - Kernel, - T, - minN, - maxN, - curN, - minM, - maxM, - curM + 1, - Args...>::run(N, M, args...); - } - } -}; - -// 2D dispatch, specialization for curN == maxN -template < - template - class Kernel, - typename T, - int64_t minN, - int64_t maxN, - int64_t minM, - int64_t maxM, - int64_t curM, - typename... Args> -struct DispatchKernelHelper2D< - Kernel, - T, - minN, - maxN, - maxN, - minM, - maxM, - curM, - Args...> { - static void run(const int64_t N, const int64_t M, Args... args) { - if (maxN == N && curM == M) { - Kernel::run(args...); - } else if (curM < maxM) { - DispatchKernelHelper2D< - Kernel, - T, - minN, - maxN, - maxN, - minM, - maxM, - curM + 1, - Args...>::run(N, M, args...); - } - // We should not get here -- throw an error? - } -}; - -// 2D dispatch, specialization for curM == maxM -template < - template - class Kernel, - typename T, - int64_t minN, - int64_t maxN, - int64_t curN, - int64_t minM, - int64_t maxM, - typename... Args> -struct DispatchKernelHelper2D< - Kernel, - T, - minN, - maxN, - curN, - minM, - maxM, - maxM, - Args...> { - static void run(const int64_t N, const int64_t M, Args... args) { - if (curN == N && maxM == M) { - Kernel::run(args...); - } else if (curN < maxN) { - DispatchKernelHelper2D< - Kernel, - T, - minN, - maxN, - curN + 1, - minM, - maxM, - maxM, - Args...>::run(N, M, args...); - } - // We should not get here -- throw an error? - } -}; - -// 2D dispatch, specialization for curN == maxN, curM == maxM -template < - template - class Kernel, - typename T, - int64_t minN, - int64_t maxN, - int64_t minM, - int64_t maxM, - typename... Args> -struct DispatchKernelHelper2D< - Kernel, - T, - minN, - maxN, - maxN, - minM, - maxM, - maxM, - Args...> { - static void run(const int64_t N, const int64_t M, Args... args) { - if (maxN == N && maxM == M) { - Kernel::run(args...); - } - // We should not get here -- throw an error? - } -}; - -} // namespace - -// This is the function we expect users to call to dispatch to 1D functions -template < - template - class Kernel, - typename T, - int64_t minN, - int64_t maxN, - typename... Args> -void DispatchKernel1D(const int64_t N, Args... args) { - if (minN <= N && N <= maxN) { - // Kick off the template recursion by calling the Helper with curN = minN - DispatchKernelHelper1D::run( - N, args...); - } - // Maybe throw an error if we tried to dispatch outside the allowed range? -} - -// This is the function we expect users to call to dispatch to 2D functions -template < - template - class Kernel, - typename T, - int64_t minN, - int64_t maxN, - int64_t minM, - int64_t maxM, - typename... Args> -void DispatchKernel2D(const int64_t N, const int64_t M, Args... args) { - if (minN <= N && N <= maxN && minM <= M && M <= maxM) { - // Kick off the template recursion by calling the Helper with curN = minN - // and curM = minM - DispatchKernelHelper2D< - Kernel, - T, - minN, - maxN, - minN, - minM, - maxM, - minM, - Args...>::run(N, M, args...); - } - // Maybe throw an error if we tried to dispatch outside the specified range? -} diff --git a/pytorch3d/pytorch3d/csrc/utils/float_math.cuh b/pytorch3d/pytorch3d/csrc/utils/float_math.cuh deleted file mode 100644 index e48e960e96544fd901655ce1d0217513d300187b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/utils/float_math.cuh +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include - -// Set epsilon -#ifdef _MSC_VER -#define vEpsilon 1e-8f -#else -const auto vEpsilon = 1e-8; -#endif - -// Common functions and operators for float2. - -__device__ inline float2 operator-(const float2& a, const float2& b) { - return make_float2(a.x - b.x, a.y - b.y); -} - -__device__ inline float2 operator+(const float2& a, const float2& b) { - return make_float2(a.x + b.x, a.y + b.y); -} - -__device__ inline float2 operator/(const float2& a, const float2& b) { - return make_float2(a.x / b.x, a.y / b.y); -} - -__device__ inline float2 operator/(const float2& a, const float b) { - return make_float2(a.x / b, a.y / b); -} - -__device__ inline float2 operator*(const float2& a, const float2& b) { - return make_float2(a.x * b.x, a.y * b.y); -} - -__device__ inline float2 operator*(const float a, const float2& b) { - return make_float2(a * b.x, a * b.y); -} - -__device__ inline float FloatMin3(const float a, const float b, const float c) { - return fminf(a, fminf(b, c)); -} - -__device__ inline float FloatMax3(const float a, const float b, const float c) { - return fmaxf(a, fmaxf(b, c)); -} - -__device__ inline float dot(const float2& a, const float2& b) { - return a.x * b.x + a.y * b.y; -} - -// Backward pass for the dot product. -// Args: -// a, b: Coordinates of two points. -// grad_dot: Upstream gradient for the output. -// -// Returns: -// tuple of gradients for each of the input points: -// (float2 grad_a, float2 grad_b) -// -__device__ inline thrust::tuple -DotBackward(const float2& a, const float2& b, const float& grad_dot) { - return thrust::make_tuple(grad_dot * b, grad_dot * a); -} - -__device__ inline float sum(const float2& a) { - return a.x + a.y; -} - -// Common functions and operators for float3. - -__device__ inline float3 operator-(const float3& a, const float3& b) { - return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); -} - -__device__ inline float3 operator+(const float3& a, const float3& b) { - return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); -} - -__device__ inline float3 operator/(const float3& a, const float3& b) { - return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); -} - -__device__ inline float3 operator/(const float3& a, const float b) { - return make_float3(a.x / b, a.y / b, a.z / b); -} - -__device__ inline float3 operator*(const float3& a, const float3& b) { - return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); -} - -__device__ inline float3 operator*(const float a, const float3& b) { - return make_float3(a * b.x, a * b.y, a * b.z); -} - -__device__ inline float dot(const float3& a, const float3& b) { - return a.x * b.x + a.y * b.y + a.z * b.z; -} - -__device__ inline float sum(const float3& a) { - return a.x + a.y + a.z; -} - -__device__ inline float3 cross(const float3& a, const float3& b) { - return make_float3( - a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); -} - -__device__ inline thrust::tuple -cross_backward(const float3& a, const float3& b, const float3& grad_cross) { - const float grad_ax = -grad_cross.y * b.z + grad_cross.z * b.y; - const float grad_ay = grad_cross.x * b.z - grad_cross.z * b.x; - const float grad_az = -grad_cross.x * b.y + grad_cross.y * b.x; - const float3 grad_a = make_float3(grad_ax, grad_ay, grad_az); - - const float grad_bx = grad_cross.y * a.z - grad_cross.z * a.y; - const float grad_by = -grad_cross.x * a.z + grad_cross.z * a.x; - const float grad_bz = grad_cross.x * a.y - grad_cross.y * a.x; - const float3 grad_b = make_float3(grad_bx, grad_by, grad_bz); - - return thrust::make_tuple(grad_a, grad_b); -} - -__device__ inline float norm(const float3& a) { - return sqrt(dot(a, a)); -} - -__device__ inline float3 normalize(const float3& a) { - return a / (norm(a) + vEpsilon); -} - -__device__ inline float3 normalize_backward( - const float3& a, - const float3& grad_normz) { - const float a_norm = norm(a) + vEpsilon; - const float3 out = a / a_norm; - - const float grad_ax = grad_normz.x * (1.0f - out.x * out.x) / a_norm + - grad_normz.y * (-out.x * out.y) / a_norm + - grad_normz.z * (-out.x * out.z) / a_norm; - const float grad_ay = grad_normz.x * (-out.x * out.y) / a_norm + - grad_normz.y * (1.0f - out.y * out.y) / a_norm + - grad_normz.z * (-out.y * out.z) / a_norm; - const float grad_az = grad_normz.x * (-out.x * out.z) / a_norm + - grad_normz.y * (-out.y * out.z) / a_norm + - grad_normz.z * (1.0f - out.z * out.z) / a_norm; - return make_float3(grad_ax, grad_ay, grad_az); -} diff --git a/pytorch3d/pytorch3d/csrc/utils/geometry_utils.cuh b/pytorch3d/pytorch3d/csrc/utils/geometry_utils.cuh deleted file mode 100644 index 66aee7fc7bcd3495bc7dbba56d89995d383b655e..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/utils/geometry_utils.cuh +++ /dev/null @@ -1,792 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include "float_math.cuh" - -// Set epsilon for preventing floating point errors and division by 0. -#ifdef _MSC_VER -#define kEpsilon 1e-8f -#else -const auto kEpsilon = 1e-8; -#endif - -// ************************************************************* // -// vec2 utils // -// ************************************************************* // - -// Determines whether a point p is on the right side of a 2D line segment -// given by the end points v0, v1. -// -// Args: -// p: vec2 Coordinates of a point. -// v0, v1: vec2 Coordinates of the end points of the edge. -// -// Returns: -// area: The signed area of the parallelogram given by the vectors -// A = p - v0 -// B = v1 - v0 -// -__device__ inline float -EdgeFunctionForward(const float2& p, const float2& v0, const float2& v1) { - return (p.x - v0.x) * (v1.y - v0.y) - (p.y - v0.y) * (v1.x - v0.x); -} - -// Backward pass for the edge function returning partial dervivatives for each -// of the input points. -// -// Args: -// p: vec2 Coordinates of a point. -// v0, v1: vec2 Coordinates of the end points of the edge. -// grad_edge: Upstream gradient for output from edge function. -// -// Returns: -// tuple of gradients for each of the input points: -// (float2 d_edge_dp, float2 d_edge_dv0, float2 d_edge_dv1) -// -__device__ inline thrust::tuple EdgeFunctionBackward( - const float2& p, - const float2& v0, - const float2& v1, - const float& grad_edge) { - const float2 dedge_dp = make_float2(v1.y - v0.y, v0.x - v1.x); - const float2 dedge_dv0 = make_float2(p.y - v1.y, v1.x - p.x); - const float2 dedge_dv1 = make_float2(v0.y - p.y, p.x - v0.x); - return thrust::make_tuple( - grad_edge * dedge_dp, grad_edge * dedge_dv0, grad_edge * dedge_dv1); -} - -// The forward pass for computing the barycentric coordinates of a point -// relative to a triangle. -// -// Args: -// p: Coordinates of a point. -// v0, v1, v2: Coordinates of the triangle vertices. -// -// Returns -// bary: (w0, w1, w2) barycentric coordinates in the range [0, 1]. -// -__device__ inline float3 BarycentricCoordsForward( - const float2& p, - const float2& v0, - const float2& v1, - const float2& v2) { - const float area = EdgeFunctionForward(v2, v0, v1) + kEpsilon; - const float w0 = EdgeFunctionForward(p, v1, v2) / area; - const float w1 = EdgeFunctionForward(p, v2, v0) / area; - const float w2 = EdgeFunctionForward(p, v0, v1) / area; - return make_float3(w0, w1, w2); -} - -// The backward pass for computing the barycentric coordinates of a point -// relative to a triangle. -// -// Args: -// p: Coordinates of a point. -// v0, v1, v2: (x, y) coordinates of the triangle vertices. -// grad_bary_upstream: vec3 Upstream gradient for each of the -// barycentric coordaintes [grad_w0, grad_w1, grad_w2]. -// -// Returns -// tuple of gradients for each of the triangle vertices: -// (float2 grad_v0, float2 grad_v1, float2 grad_v2) -// -__device__ inline thrust::tuple -BarycentricCoordsBackward( - const float2& p, - const float2& v0, - const float2& v1, - const float2& v2, - const float3& grad_bary_upstream) { - const float area = EdgeFunctionForward(v2, v0, v1) + kEpsilon; - const float area2 = pow(area, 2.0f); - const float e0 = EdgeFunctionForward(p, v1, v2); - const float e1 = EdgeFunctionForward(p, v2, v0); - const float e2 = EdgeFunctionForward(p, v0, v1); - - const float grad_w0 = grad_bary_upstream.x; - const float grad_w1 = grad_bary_upstream.y; - const float grad_w2 = grad_bary_upstream.z; - - // Calculate component of the gradient from each of w0, w1 and w2. - // e.g. for w0: - // dloss/dw0_v = dl/dw0 * dw0/dw0_top * dw0_top/dv - // + dl/dw0 * dw0/dw0_bot * dw0_bot/dv - const float dw0_darea = -e0 / (area2); - const float dw0_e0 = 1 / area; - const float dloss_d_w0area = grad_w0 * dw0_darea; - const float dloss_e0 = grad_w0 * dw0_e0; - auto de0_dv = EdgeFunctionBackward(p, v1, v2, dloss_e0); - auto dw0area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w0area); - const float2 dw0_p = thrust::get<0>(de0_dv); - const float2 dw0_dv0 = thrust::get<1>(dw0area_dv); - const float2 dw0_dv1 = thrust::get<1>(de0_dv) + thrust::get<2>(dw0area_dv); - const float2 dw0_dv2 = thrust::get<2>(de0_dv) + thrust::get<0>(dw0area_dv); - - const float dw1_darea = -e1 / (area2); - const float dw1_e1 = 1 / area; - const float dloss_d_w1area = grad_w1 * dw1_darea; - const float dloss_e1 = grad_w1 * dw1_e1; - auto de1_dv = EdgeFunctionBackward(p, v2, v0, dloss_e1); - auto dw1area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w1area); - const float2 dw1_p = thrust::get<0>(de1_dv); - const float2 dw1_dv0 = thrust::get<2>(de1_dv) + thrust::get<1>(dw1area_dv); - const float2 dw1_dv1 = thrust::get<2>(dw1area_dv); - const float2 dw1_dv2 = thrust::get<1>(de1_dv) + thrust::get<0>(dw1area_dv); - - const float dw2_darea = -e2 / (area2); - const float dw2_e2 = 1 / area; - const float dloss_d_w2area = grad_w2 * dw2_darea; - const float dloss_e2 = grad_w2 * dw2_e2; - auto de2_dv = EdgeFunctionBackward(p, v0, v1, dloss_e2); - auto dw2area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w2area); - const float2 dw2_p = thrust::get<0>(de2_dv); - const float2 dw2_dv0 = thrust::get<1>(de2_dv) + thrust::get<1>(dw2area_dv); - const float2 dw2_dv1 = thrust::get<2>(de2_dv) + thrust::get<2>(dw2area_dv); - const float2 dw2_dv2 = thrust::get<0>(dw2area_dv); - - const float2 dbary_p = dw0_p + dw1_p + dw2_p; - const float2 dbary_dv0 = dw0_dv0 + dw1_dv0 + dw2_dv0; - const float2 dbary_dv1 = dw0_dv1 + dw1_dv1 + dw2_dv1; - const float2 dbary_dv2 = dw0_dv2 + dw1_dv2 + dw2_dv2; - - return thrust::make_tuple(dbary_p, dbary_dv0, dbary_dv1, dbary_dv2); -} - -// Forward pass for applying perspective correction to barycentric coordinates. -// -// Args: -// bary: Screen-space barycentric coordinates for a point -// z0, z1, z2: Camera-space z-coordinates of the triangle vertices -// -// Returns -// World-space barycentric coordinates -// -__device__ inline float3 BarycentricPerspectiveCorrectionForward( - const float3& bary, - const float z0, - const float z1, - const float z2) { - const float w0_top = bary.x * z1 * z2; - const float w1_top = z0 * bary.y * z2; - const float w2_top = z0 * z1 * bary.z; - const float denom = fmaxf(w0_top + w1_top + w2_top, kEpsilon); - const float w0 = w0_top / denom; - const float w1 = w1_top / denom; - const float w2 = w2_top / denom; - return make_float3(w0, w1, w2); -} - -// Backward pass for applying perspective correction to barycentric coordinates. -// -// Args: -// bary: Screen-space barycentric coordinates for a point -// z0, z1, z2: Camera-space z-coordinates of the triangle vertices -// grad_out: Upstream gradient of the loss with respect to the corrected -// barycentric coordinates. -// -// Returns a tuple of: -// grad_bary: Downstream gradient of the loss with respect to the the -// uncorrected barycentric coordinates. -// grad_z0, grad_z1, grad_z2: Downstream gradient of the loss with respect -// to the z-coordinates of the triangle verts -__device__ inline thrust::tuple -BarycentricPerspectiveCorrectionBackward( - const float3& bary, - const float z0, - const float z1, - const float z2, - const float3& grad_out) { - // Recompute forward pass - const float w0_top = bary.x * z1 * z2; - const float w1_top = z0 * bary.y * z2; - const float w2_top = z0 * z1 * bary.z; - const float denom = fmaxf(w0_top + w1_top + w2_top, kEpsilon); - - // Now do backward pass - const float grad_denom_top = - -w0_top * grad_out.x - w1_top * grad_out.y - w2_top * grad_out.z; - const float grad_denom = grad_denom_top / (denom * denom); - const float grad_w0_top = grad_denom + grad_out.x / denom; - const float grad_w1_top = grad_denom + grad_out.y / denom; - const float grad_w2_top = grad_denom + grad_out.z / denom; - const float grad_bary_x = grad_w0_top * z1 * z2; - const float grad_bary_y = grad_w1_top * z0 * z2; - const float grad_bary_z = grad_w2_top * z0 * z1; - const float3 grad_bary = make_float3(grad_bary_x, grad_bary_y, grad_bary_z); - const float grad_z0 = grad_w1_top * bary.y * z2 + grad_w2_top * bary.z * z1; - const float grad_z1 = grad_w0_top * bary.x * z2 + grad_w2_top * bary.z * z0; - const float grad_z2 = grad_w0_top * bary.x * z1 + grad_w1_top * bary.y * z0; - return thrust::make_tuple(grad_bary, grad_z0, grad_z1, grad_z2); -} - -// Clip negative barycentric coordinates to 0.0 and renormalize so -// the barycentric coordinates for a point sum to 1. When the blur_radius -// is greater than 0, a face will still be recorded as overlapping a pixel -// if the pixel is outside the face. In this case at least one of the -// barycentric coordinates for the pixel relative to the face will be negative. -// Clipping will ensure that the texture and z buffer are interpolated -// correctly. -// -// Args -// bary: (w0, w1, w2) barycentric coordinates which can be outside the -// range [0, 1]. -// -// Returns -// bary: (w0, w1, w2) barycentric coordinates in the range [0, 1] which -// satisfy the condition: sum(w0, w1, w2) = 1.0. -// -__device__ inline float3 BarycentricClipForward(const float3 bary) { - float3 w = make_float3(0.0f, 0.0f, 0.0f); - // Clamp lower bound only - w.x = max(bary.x, 0.0); - w.y = max(bary.y, 0.0); - w.z = max(bary.z, 0.0); - float w_sum = w.x + w.y + w.z; - w_sum = fmaxf(w_sum, 1e-5); - w.x /= w_sum; - w.y /= w_sum; - w.z /= w_sum; - - return w; -} - -// Backward pass for barycentric coordinate clipping. -// -// Args -// bary: (w0, w1, w2) barycentric coordinates which can be outside the -// range [0, 1]. -// grad_baryclip_upstream: vec3 Upstream gradient for each of the clipped -// barycentric coordinates [grad_w0, grad_w1, grad_w2]. -// -// Returns -// vec3 of gradients for the unclipped barycentric coordinates: -// (grad_w0, grad_w1, grad_w2) -// -__device__ inline float3 BarycentricClipBackward( - const float3 bary, - const float3 grad_baryclip_upstream) { - // Redo some of the forward pass calculations - float3 w = make_float3(0.0f, 0.0f, 0.0f); - // Clamp lower bound only - w.x = max(bary.x, 0.0); - w.y = max(bary.y, 0.0); - w.z = max(bary.z, 0.0); - float w_sum = w.x + w.y + w.z; - - float3 grad_bary = make_float3(1.0f, 1.0f, 1.0f); - float3 grad_clip = make_float3(1.0f, 1.0f, 1.0f); - float3 grad_sum = make_float3(1.0f, 1.0f, 1.0f); - - // Check if sum was clipped. - float grad_sum_clip = 1.0f; - if (w_sum < 1e-5) { - grad_sum_clip = 0.0f; - w_sum = 1e-5; - } - - // Check if any of bary values have been clipped. - if (bary.x < 0.0f) { - grad_clip.x = 0.0f; - } - if (bary.y < 0.0f) { - grad_clip.y = 0.0f; - } - if (bary.z < 0.0f) { - grad_clip.z = 0.0f; - } - - // Gradients of the sum. - grad_sum.x = -w.x / (pow(w_sum, 2.0f)) * grad_sum_clip; - grad_sum.y = -w.y / (pow(w_sum, 2.0f)) * grad_sum_clip; - grad_sum.z = -w.z / (pow(w_sum, 2.0f)) * grad_sum_clip; - - // Gradients for each of the bary coordinates including the cross terms - // from the sum. - grad_bary.x = grad_clip.x * - (grad_baryclip_upstream.x * (1.0f / w_sum + grad_sum.x) + - grad_baryclip_upstream.y * (grad_sum.y) + - grad_baryclip_upstream.z * (grad_sum.z)); - - grad_bary.y = grad_clip.y * - (grad_baryclip_upstream.y * (1.0f / w_sum + grad_sum.y) + - grad_baryclip_upstream.x * (grad_sum.x) + - grad_baryclip_upstream.z * (grad_sum.z)); - - grad_bary.z = grad_clip.z * - (grad_baryclip_upstream.z * (1.0f / w_sum + grad_sum.z) + - grad_baryclip_upstream.x * (grad_sum.x) + - grad_baryclip_upstream.y * (grad_sum.y)); - - return grad_bary; -} - -// Return minimum distance between line segment (v1 - v0) and point p. -// -// Args: -// p: Coordinates of a point. -// v0, v1: Coordinates of the end points of the line segment. -// -// Returns: -// squared distance to the boundary of the triangle. -// -__device__ inline float -PointLineDistanceForward(const float2& p, const float2& a, const float2& b) { - const float2 ba = b - a; - float l2 = dot(ba, ba); - float t = dot(ba, p - a) / l2; - if (l2 <= kEpsilon) { - return dot(p - b, p - b); - } - t = __saturatef(t); // clamp to the interval [+0.0, 1.0] - const float2 p_proj = a + t * ba; - const float2 d = (p_proj - p); - return dot(d, d); // squared distance -} - -// Backward pass for point to line distance in 2D. -// -// Args: -// p: Coordinates of a point. -// v0, v1: Coordinates of the end points of the line segment. -// grad_dist: Upstream gradient for the distance. -// -// Returns: -// tuple of gradients for each of the input points: -// (float2 grad_p, float2 grad_v0, float2 grad_v1) -// -__device__ inline thrust::tuple -PointLineDistanceBackward( - const float2& p, - const float2& v0, - const float2& v1, - const float& grad_dist) { - // Redo some of the forward pass calculations. - const float2 v1v0 = v1 - v0; - const float2 pv0 = p - v0; - const float t_bot = dot(v1v0, v1v0); - const float t_top = dot(v1v0, pv0); - float tt = t_top / t_bot; - tt = __saturatef(tt); - const float2 p_proj = (1.0f - tt) * v0 + tt * v1; - const float2 d = p - p_proj; - const float dist = sqrt(dot(d, d)); - - const float2 grad_p = -1.0f * grad_dist * 2.0f * (p_proj - p); - const float2 grad_v0 = grad_dist * (1.0f - tt) * 2.0f * (p_proj - p); - const float2 grad_v1 = grad_dist * tt * 2.0f * (p_proj - p); - - return thrust::make_tuple(grad_p, grad_v0, grad_v1); -} - -// The forward pass for calculating the shortest distance between a point -// and a triangle. -// -// Args: -// p: Coordinates of a point. -// v0, v1, v2: Coordinates of the three triangle vertices. -// -// Returns: -// shortest squared distance from a point to a triangle. -// -__device__ inline float PointTriangleDistanceForward( - const float2& p, - const float2& v0, - const float2& v1, - const float2& v2) { - // Compute distance to all 3 edges of the triangle and return the min. - const float e01_dist = PointLineDistanceForward(p, v0, v1); - const float e02_dist = PointLineDistanceForward(p, v0, v2); - const float e12_dist = PointLineDistanceForward(p, v1, v2); - const float edge_dist = fminf(fminf(e01_dist, e02_dist), e12_dist); - return edge_dist; -} - -// Backward pass for point triangle distance. -// -// Args: -// p: Coordinates of a point. -// v0, v1, v2: Coordinates of the three triangle vertices. -// grad_dist: Upstream gradient for the distance. -// -// Returns: -// tuple of gradients for each of the triangle vertices: -// (float2 grad_v0, float2 grad_v1, float2 grad_v2) -// -__device__ inline thrust::tuple -PointTriangleDistanceBackward( - const float2& p, - const float2& v0, - const float2& v1, - const float2& v2, - const float& grad_dist) { - // Compute distance to all 3 edges of the triangle. - const float e01_dist = PointLineDistanceForward(p, v0, v1); - const float e02_dist = PointLineDistanceForward(p, v0, v2); - const float e12_dist = PointLineDistanceForward(p, v1, v2); - - // Initialize output tensors. - float2 grad_v0 = make_float2(0.0f, 0.0f); - float2 grad_v1 = make_float2(0.0f, 0.0f); - float2 grad_v2 = make_float2(0.0f, 0.0f); - float2 grad_p = make_float2(0.0f, 0.0f); - - // Find which edge is the closest and return PointLineDistanceBackward for - // that edge. - if (e01_dist <= e02_dist && e01_dist <= e12_dist) { - // Closest edge is v1 - v0. - auto grad_e01 = PointLineDistanceBackward(p, v0, v1, grad_dist); - grad_p = thrust::get<0>(grad_e01); - grad_v0 = thrust::get<1>(grad_e01); - grad_v1 = thrust::get<2>(grad_e01); - } else if (e02_dist <= e01_dist && e02_dist <= e12_dist) { - // Closest edge is v2 - v0. - auto grad_e02 = PointLineDistanceBackward(p, v0, v2, grad_dist); - grad_p = thrust::get<0>(grad_e02); - grad_v0 = thrust::get<1>(grad_e02); - grad_v2 = thrust::get<2>(grad_e02); - } else if (e12_dist <= e01_dist && e12_dist <= e02_dist) { - // Closest edge is v2 - v1. - auto grad_e12 = PointLineDistanceBackward(p, v1, v2, grad_dist); - grad_p = thrust::get<0>(grad_e12); - grad_v1 = thrust::get<1>(grad_e12); - grad_v2 = thrust::get<2>(grad_e12); - } - - return thrust::make_tuple(grad_p, grad_v0, grad_v1, grad_v2); -} - -// ************************************************************* // -// vec3 utils // -// ************************************************************* // - -// Computes the area of a triangle (v0, v1, v2). -// -// Args: -// v0, v1, v2: vec3 coordinates of the triangle vertices -// -// Returns -// area: float: The area of the triangle -// -__device__ inline float -AreaOfTriangle(const float3& v0, const float3& v1, const float3& v2) { - float3 p0 = v1 - v0; - float3 p1 = v2 - v0; - - // compute the hypotenus of the scross product (p0 x p1) - float dd = hypot( - p0.y * p1.z - p0.z * p1.y, - hypot(p0.z * p1.x - p0.x * p1.z, p0.x * p1.y - p0.y * p1.x)); - - return dd / 2.0; -} - -// Computes the barycentric coordinates of a point p relative -// to a triangle (v0, v1, v2), i.e. p = w0 * v0 + w1 * v1 + w2 * v2 -// s.t. w0 + w1 + w2 = 1.0 -// -// NOTE that this function assumes that p lives on the space spanned -// by (v0, v1, v2). -// TODO(gkioxari) explicitly check whether p is coplanar with (v0, v1, v2) -// and throw an error if check fails -// -// Args: -// p: vec3 coordinates of a point -// v0, v1, v2: vec3 coordinates of the triangle vertices -// -// Returns -// bary: (w0, w1, w2) barycentric coordinates -// -__device__ inline float3 BarycentricCoords3Forward( - const float3& p, - const float3& v0, - const float3& v1, - const float3& v2) { - float3 p0 = v1 - v0; - float3 p1 = v2 - v0; - float3 p2 = p - v0; - - const float d00 = dot(p0, p0); - const float d01 = dot(p0, p1); - const float d11 = dot(p1, p1); - const float d20 = dot(p2, p0); - const float d21 = dot(p2, p1); - - const float denom = d00 * d11 - d01 * d01 + kEpsilon; - const float w1 = (d11 * d20 - d01 * d21) / denom; - const float w2 = (d00 * d21 - d01 * d20) / denom; - const float w0 = 1.0f - w1 - w2; - - return make_float3(w0, w1, w2); -} - -// Checks whether the point p is inside the triangle (v0, v1, v2). -// A point is inside the triangle, if all barycentric coordinates -// wrt the triangle are >= 0 & <= 1. -// If the triangle is degenerate, aka line or point, then return False. -// -// NOTE that this function assumes that p lives on the space spanned -// by (v0, v1, v2). -// TODO(gkioxari) explicitly check whether p is coplanar with (v0, v1, v2) -// and throw an error if check fails -// -// Args: -// p: vec3 coordinates of a point -// v0, v1, v2: vec3 coordinates of the triangle vertices -// min_triangle_area: triangles less than this size are considered -// points/lines, IsInsideTriangle returns False -// -// Returns: -// inside: bool indicating wether p is inside triangle -// -__device__ inline bool IsInsideTriangle( - const float3& p, - const float3& v0, - const float3& v1, - const float3& v2, - const double min_triangle_area) { - bool inside; - if (AreaOfTriangle(v0, v1, v2) < min_triangle_area) { - inside = 0; - } else { - float3 bary = BarycentricCoords3Forward(p, v0, v1, v2); - bool x_in = 0.0f <= bary.x && bary.x <= 1.0f; - bool y_in = 0.0f <= bary.y && bary.y <= 1.0f; - bool z_in = 0.0f <= bary.z && bary.z <= 1.0f; - inside = x_in && y_in && z_in; - } - return inside; -} - -// Computes the minimum squared Euclidean distance between the point p -// and the segment spanned by (v0, v1). -// To find this we parametrize p as: x(t) = v0 + t * (v1 - v0) -// and find t which minimizes (x(t) - p) ^ 2. -// Note that p does not need to live in the space spanned by (v0, v1) -// -// Args: -// p: vec3 coordinates of a point -// v0, v1: vec3 coordinates of start and end of segment -// -// Returns: -// dist: the minimum squared distance of p from segment (v0, v1) -// - -__device__ inline float -PointLine3DistanceForward(const float3& p, const float3& v0, const float3& v1) { - const float3 v1v0 = v1 - v0; - const float3 pv0 = p - v0; - const float t_bot = dot(v1v0, v1v0); - const float t_top = dot(pv0, v1v0); - // if t_bot small, then v0 == v1, set tt to 0. - float tt = (t_bot < kEpsilon) ? 0.0f : (t_top / t_bot); - - tt = __saturatef(tt); // clamps to [0, 1] - - const float3 p_proj = v0 + tt * v1v0; - const float3 diff = p - p_proj; - const float dist = dot(diff, diff); - return dist; -} - -// Backward function of the minimum squared Euclidean distance between the point -// p and the line segment (v0, v1). -// -// Args: -// p: vec3 coordinates of a point -// v0, v1: vec3 coordinates of start and end of segment -// grad_dist: Float of the gradient wrt dist -// -// Returns: -// tuple of gradients for the point and line segment (v0, v1): -// (float3 grad_p, float3 grad_v0, float3 grad_v1) - -__device__ inline thrust::tuple -PointLine3DistanceBackward( - const float3& p, - const float3& v0, - const float3& v1, - const float& grad_dist) { - const float3 v1v0 = v1 - v0; - const float3 pv0 = p - v0; - const float t_bot = dot(v1v0, v1v0); - const float t_top = dot(v1v0, pv0); - - float3 grad_p = make_float3(0.0f, 0.0f, 0.0f); - float3 grad_v0 = make_float3(0.0f, 0.0f, 0.0f); - float3 grad_v1 = make_float3(0.0f, 0.0f, 0.0f); - - const float tt = t_top / t_bot; - - if (t_bot < kEpsilon) { - // if t_bot small, then v0 == v1, - // and dist = 0.5 * dot(pv0, pv0) + 0.5 * dot(pv1, pv1) - grad_p = grad_dist * 2.0f * pv0; - grad_v0 = -0.5f * grad_p; - grad_v1 = grad_v0; - } else if (tt < 0.0f) { - grad_p = grad_dist * 2.0f * pv0; - grad_v0 = -1.0f * grad_p; - // no gradients wrt v1 - } else if (tt > 1.0f) { - grad_p = grad_dist * 2.0f * (p - v1); - grad_v1 = -1.0f * grad_p; - // no gradients wrt v0 - } else { - const float3 p_proj = v0 + tt * v1v0; - const float3 diff = p - p_proj; - const float3 grad_base = grad_dist * 2.0f * diff; - grad_p = grad_base - dot(grad_base, v1v0) * v1v0 / t_bot; - const float3 dtt_v0 = (-1.0f * v1v0 - pv0 + 2.0f * tt * v1v0) / t_bot; - grad_v0 = (-1.0f + tt) * grad_base - dot(grad_base, v1v0) * dtt_v0; - const float3 dtt_v1 = (pv0 - 2.0f * tt * v1v0) / t_bot; - grad_v1 = -dot(grad_base, v1v0) * dtt_v1 - tt * grad_base; - } - - return thrust::make_tuple(grad_p, grad_v0, grad_v1); -} - -// Computes the squared distance of a point p relative to a triangle (v0, v1, -// v2). If the point's projection p0 on the plane spanned by (v0, v1, v2) is -// inside the triangle with vertices (v0, v1, v2), then the returned value is -// the squared distance of p to its projection p0. Otherwise, the returned value -// is the smallest squared distance of p from the line segments (v0, v1), (v0, -// v2) and (v1, v2). -// -// Args: -// p: vec3 coordinates of a point -// v0, v1, v2: vec3 coordinates of the triangle vertices -// min_triangle_area: triangles less than this size are considered -// points/lines, IsInsideTriangle returns False -// -// Returns: -// dist: Float of the squared distance -// - -__device__ inline float PointTriangle3DistanceForward( - const float3& p, - const float3& v0, - const float3& v1, - const float3& v2, - const double min_triangle_area) { - float3 normal = cross(v2 - v0, v1 - v0); - const float norm_normal = norm(normal); - normal = normalize(normal); - - // p0 is the projection of p on the plane spanned by (v0, v1, v2) - // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal - const float t = dot(v0 - p, normal); - const float3 p0 = p + t * normal; - - bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area); - float dist = 0.0f; - - if ((is_inside) && (norm_normal > kEpsilon)) { - // if projection p0 is inside triangle spanned by (v0, v1, v2) - // then distance is equal to norm(p0 - p)^2 - dist = t * t; - } else { - const float e01 = PointLine3DistanceForward(p, v0, v1); - const float e02 = PointLine3DistanceForward(p, v0, v2); - const float e12 = PointLine3DistanceForward(p, v1, v2); - - dist = (e01 > e02) ? e02 : e01; - dist = (dist > e12) ? e12 : dist; - } - - return dist; -} - -// The backward pass for computing the squared distance of a point -// to the triangle (v0, v1, v2). -// -// Args: -// p: xyz coordinates of a point -// v0, v1, v2: xyz coordinates of the triangle vertices -// grad_dist: Float of the gradient wrt dist -// min_triangle_area: triangles less than this size are considered -// points/lines, IsInsideTriangle returns False -// -// Returns: -// tuple of gradients for the point and triangle: -// (float3 grad_p, float3 grad_v0, float3 grad_v1, float3 grad_v2) -// - -__device__ inline thrust::tuple -PointTriangle3DistanceBackward( - const float3& p, - const float3& v0, - const float3& v1, - const float3& v2, - const float& grad_dist, - const double min_triangle_area) { - const float3 v2v0 = v2 - v0; - const float3 v1v0 = v1 - v0; - const float3 v0p = v0 - p; - float3 raw_normal = cross(v2v0, v1v0); - const float norm_normal = norm(raw_normal); - float3 normal = normalize(raw_normal); - - // p0 is the projection of p on the plane spanned by (v0, v1, v2) - // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal - const float t = dot(v0 - p, normal); - const float3 p0 = p + t * normal; - const float3 diff = t * normal; - - bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area); - - float3 grad_p = make_float3(0.0f, 0.0f, 0.0f); - float3 grad_v0 = make_float3(0.0f, 0.0f, 0.0f); - float3 grad_v1 = make_float3(0.0f, 0.0f, 0.0f); - float3 grad_v2 = make_float3(0.0f, 0.0f, 0.0f); - - if ((is_inside) && (norm_normal > kEpsilon)) { - // derivative of dist wrt p - grad_p = -2.0f * grad_dist * t * normal; - // derivative of dist wrt normal - const float3 grad_normal = 2.0f * grad_dist * t * (v0p + diff); - // derivative of dist wrt raw_normal - const float3 grad_raw_normal = normalize_backward(raw_normal, grad_normal); - // derivative of dist wrt v2v0 and v1v0 - const auto grad_cross = cross_backward(v2v0, v1v0, grad_raw_normal); - const float3 grad_cross_v2v0 = thrust::get<0>(grad_cross); - const float3 grad_cross_v1v0 = thrust::get<1>(grad_cross); - grad_v0 = - grad_dist * 2.0f * t * normal - (grad_cross_v2v0 + grad_cross_v1v0); - grad_v1 = grad_cross_v1v0; - grad_v2 = grad_cross_v2v0; - } else { - const float e01 = PointLine3DistanceForward(p, v0, v1); - const float e02 = PointLine3DistanceForward(p, v0, v2); - const float e12 = PointLine3DistanceForward(p, v1, v2); - - if ((e01 <= e02) && (e01 <= e12)) { - // e01 is smallest - const auto grads = PointLine3DistanceBackward(p, v0, v1, grad_dist); - grad_p = thrust::get<0>(grads); - grad_v0 = thrust::get<1>(grads); - grad_v1 = thrust::get<2>(grads); - } else if ((e02 <= e01) && (e02 <= e12)) { - // e02 is smallest - const auto grads = PointLine3DistanceBackward(p, v0, v2, grad_dist); - grad_p = thrust::get<0>(grads); - grad_v0 = thrust::get<1>(grads); - grad_v2 = thrust::get<2>(grads); - } else if ((e12 <= e01) && (e12 <= e02)) { - // e12 is smallest - const auto grads = PointLine3DistanceBackward(p, v1, v2, grad_dist); - grad_p = thrust::get<0>(grads); - grad_v1 = thrust::get<1>(grads); - grad_v2 = thrust::get<2>(grads); - } - } - - return thrust::make_tuple(grad_p, grad_v0, grad_v1, grad_v2); -} diff --git a/pytorch3d/pytorch3d/csrc/utils/geometry_utils.h b/pytorch3d/pytorch3d/csrc/utils/geometry_utils.h deleted file mode 100644 index ad9f7ff3f34dde7b119ea708eb0901cb826794d7..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/utils/geometry_utils.h +++ /dev/null @@ -1,823 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include "vec2.h" -#include "vec3.h" - -// Set epsilon for preventing floating point errors and division by 0. -const auto kEpsilon = 1e-8; - -// Determines whether a point p is on the right side of a 2D line segment -// given by the end points v0, v1. -// -// Args: -// p: vec2 Coordinates of a point. -// v0, v1: vec2 Coordinates of the end points of the edge. -// -// Returns: -// area: The signed area of the parallelogram given by the vectors -// A = p - v0 -// B = v1 - v0 -// -// v1 ________ -// /\ / -// A / \ / -// / \ / -// v0 /______\/ -// B p -// -// The area can also be interpreted as the cross product A x B. -// If the sign of the area is positive, the point p is on the -// right side of the edge. Negative area indicates the point is on -// the left side of the edge. i.e. for an edge v1 - v0: -// -// v1 -// / -// / -// - / + -// / -// / -// v0 -// -template -T EdgeFunctionForward(const vec2& p, const vec2& v0, const vec2& v1) { - const T edge = (p.x - v0.x) * (v1.y - v0.y) - (p.y - v0.y) * (v1.x - v0.x); - return edge; -} - -// Backward pass for the edge function returning partial dervivatives for each -// of the input points. -// -// Args: -// p: vec2 Coordinates of a point. -// v0, v1: vec2 Coordinates of the end points of the edge. -// grad_edge: Upstream gradient for output from edge function. -// -// Returns: -// tuple of gradients for each of the input points: -// (vec2 d_edge_dp, vec2 d_edge_dv0, vec2 d_edge_dv1) -// -template -inline std::tuple, vec2, vec2> EdgeFunctionBackward( - const vec2& p, - const vec2& v0, - const vec2& v1, - const T grad_edge) { - const vec2 dedge_dp(v1.y - v0.y, v0.x - v1.x); - const vec2 dedge_dv0(p.y - v1.y, v1.x - p.x); - const vec2 dedge_dv1(v0.y - p.y, p.x - v0.x); - return std::make_tuple( - grad_edge * dedge_dp, grad_edge * dedge_dv0, grad_edge * dedge_dv1); -} - -// The forward pass for computing the barycentric coordinates of a point -// relative to a triangle. -// Ref: -// https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/barycentric-coordinates -// -// Args: -// p: Coordinates of a point. -// v0, v1, v2: Coordinates of the triangle vertices. -// -// Returns -// bary: (w0, w1, w2) barycentric coordinates in the range [0, 1]. -// -template -vec3 BarycentricCoordinatesForward( - const vec2& p, - const vec2& v0, - const vec2& v1, - const vec2& v2) { - const T area = EdgeFunctionForward(v2, v0, v1) + kEpsilon; - const T w0 = EdgeFunctionForward(p, v1, v2) / area; - const T w1 = EdgeFunctionForward(p, v2, v0) / area; - const T w2 = EdgeFunctionForward(p, v0, v1) / area; - return vec3(w0, w1, w2); -} - -// The backward pass for computing the barycentric coordinates of a point -// relative to a triangle. -// -// Args: -// p: Coordinates of a point. -// v0, v1, v2: (x, y) coordinates of the triangle vertices. -// grad_bary_upstream: vec3 Upstream gradient for each of the -// barycentric coordaintes [grad_w0, grad_w1, grad_w2]. -// -// Returns -// tuple of gradients for each of the triangle vertices: -// (vec2 grad_v0, vec2 grad_v1, vec2 grad_v2) -// -template -inline std::tuple, vec2, vec2, vec2> BarycentricCoordsBackward( - const vec2& p, - const vec2& v0, - const vec2& v1, - const vec2& v2, - const vec3& grad_bary_upstream) { - const T area = EdgeFunctionForward(v2, v0, v1) + kEpsilon; - const T area2 = pow(area, 2.0f); - const T area_inv = 1.0f / area; - const T e0 = EdgeFunctionForward(p, v1, v2); - const T e1 = EdgeFunctionForward(p, v2, v0); - const T e2 = EdgeFunctionForward(p, v0, v1); - - const T grad_w0 = grad_bary_upstream.x; - const T grad_w1 = grad_bary_upstream.y; - const T grad_w2 = grad_bary_upstream.z; - - // Calculate component of the gradient from each of w0, w1 and w2. - // e.g. for w0: - // dloss/dw0_v = dl/dw0 * dw0/dw0_top * dw0_top/dv - // + dl/dw0 * dw0/dw0_bot * dw0_bot/dv - const T dw0_darea = -e0 / (area2); - const T dw0_e0 = area_inv; - const T dloss_d_w0area = grad_w0 * dw0_darea; - const T dloss_e0 = grad_w0 * dw0_e0; - auto de0_dv = EdgeFunctionBackward(p, v1, v2, dloss_e0); - auto dw0area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w0area); - const vec2 dw0_p = std::get<0>(de0_dv); - const vec2 dw0_dv0 = std::get<1>(dw0area_dv); - const vec2 dw0_dv1 = std::get<1>(de0_dv) + std::get<2>(dw0area_dv); - const vec2 dw0_dv2 = std::get<2>(de0_dv) + std::get<0>(dw0area_dv); - - const T dw1_darea = -e1 / (area2); - const T dw1_e1 = area_inv; - const T dloss_d_w1area = grad_w1 * dw1_darea; - const T dloss_e1 = grad_w1 * dw1_e1; - auto de1_dv = EdgeFunctionBackward(p, v2, v0, dloss_e1); - auto dw1area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w1area); - const vec2 dw1_p = std::get<0>(de1_dv); - const vec2 dw1_dv0 = std::get<2>(de1_dv) + std::get<1>(dw1area_dv); - const vec2 dw1_dv1 = std::get<2>(dw1area_dv); - const vec2 dw1_dv2 = std::get<1>(de1_dv) + std::get<0>(dw1area_dv); - - const T dw2_darea = -e2 / (area2); - const T dw2_e2 = area_inv; - const T dloss_d_w2area = grad_w2 * dw2_darea; - const T dloss_e2 = grad_w2 * dw2_e2; - auto de2_dv = EdgeFunctionBackward(p, v0, v1, dloss_e2); - auto dw2area_dv = EdgeFunctionBackward(v2, v0, v1, dloss_d_w2area); - const vec2 dw2_p = std::get<0>(de2_dv); - const vec2 dw2_dv0 = std::get<1>(de2_dv) + std::get<1>(dw2area_dv); - const vec2 dw2_dv1 = std::get<2>(de2_dv) + std::get<2>(dw2area_dv); - const vec2 dw2_dv2 = std::get<0>(dw2area_dv); - - const vec2 dbary_p = dw0_p + dw1_p + dw2_p; - const vec2 dbary_dv0 = dw0_dv0 + dw1_dv0 + dw2_dv0; - const vec2 dbary_dv1 = dw0_dv1 + dw1_dv1 + dw2_dv1; - const vec2 dbary_dv2 = dw0_dv2 + dw1_dv2 + dw2_dv2; - - return std::make_tuple(dbary_p, dbary_dv0, dbary_dv1, dbary_dv2); -} - -// Forward pass for applying perspective correction to barycentric coordinates. -// -// Args: -// bary: Screen-space barycentric coordinates for a point -// z0, z1, z2: Camera-space z-coordinates of the triangle vertices -// -// Returns -// World-space barycentric coordinates -// -template -inline vec3 BarycentricPerspectiveCorrectionForward( - const vec3& bary, - const T z0, - const T z1, - const T z2) { - const T w0_top = bary.x * z1 * z2; - const T w1_top = bary.y * z0 * z2; - const T w2_top = bary.z * z0 * z1; - const T denom = std::max(w0_top + w1_top + w2_top, kEpsilon); - const T w0 = w0_top / denom; - const T w1 = w1_top / denom; - const T w2 = w2_top / denom; - return vec3(w0, w1, w2); -} - -// Backward pass for applying perspective correction to barycentric coordinates. -// -// Args: -// bary: Screen-space barycentric coordinates for a point -// z0, z1, z2: Camera-space z-coordinates of the triangle vertices -// grad_out: Upstream gradient of the loss with respect to the corrected -// barycentric coordinates. -// -// Returns a tuple of: -// grad_bary: Downstream gradient of the loss with respect to the the -// uncorrected barycentric coordinates. -// grad_z0, grad_z1, grad_z2: Downstream gradient of the loss with respect -// to the z-coordinates of the triangle verts -template -inline std::tuple, T, T, T> BarycentricPerspectiveCorrectionBackward( - const vec3& bary, - const T z0, - const T z1, - const T z2, - const vec3& grad_out) { - // Recompute forward pass - const T w0_top = bary.x * z1 * z2; - const T w1_top = bary.y * z0 * z2; - const T w2_top = bary.z * z0 * z1; - const T denom = std::max(w0_top + w1_top + w2_top, kEpsilon); - - // Now do backward pass - const T grad_denom_top = - -w0_top * grad_out.x - w1_top * grad_out.y - w2_top * grad_out.z; - const T grad_denom = grad_denom_top / (denom * denom); - const T grad_w0_top = grad_denom + grad_out.x / denom; - const T grad_w1_top = grad_denom + grad_out.y / denom; - const T grad_w2_top = grad_denom + grad_out.z / denom; - const T grad_bary_x = grad_w0_top * z1 * z2; - const T grad_bary_y = grad_w1_top * z0 * z2; - const T grad_bary_z = grad_w2_top * z0 * z1; - const vec3 grad_bary(grad_bary_x, grad_bary_y, grad_bary_z); - const T grad_z0 = grad_w1_top * bary.y * z2 + grad_w2_top * bary.z * z1; - const T grad_z1 = grad_w0_top * bary.x * z2 + grad_w2_top * bary.z * z0; - const T grad_z2 = grad_w0_top * bary.x * z1 + grad_w1_top * bary.y * z0; - return std::make_tuple(grad_bary, grad_z0, grad_z1, grad_z2); -} - -// Clip negative barycentric coordinates to 0.0 and renormalize so -// the barycentric coordinates for a point sum to 1. When the blur_radius -// is greater than 0, a face will still be recorded as overlapping a pixel -// if the pixel is outside the face. In this case at least one of the -// barycentric coordinates for the pixel relative to the face will be negative. -// Clipping will ensure that the texture and z buffer are interpolated -// correctly. -// -// Args -// bary: (w0, w1, w2) barycentric coordinates which can contain values < 0. -// -// Returns -// bary: (w0, w1, w2) barycentric coordinates in the range [0, 1] which -// satisfy the condition: sum(w0, w1, w2) = 1.0. -// -template -vec3 BarycentricClipForward(const vec3 bary) { - vec3 w(0.0f, 0.0f, 0.0f); - // Only clamp negative values to 0.0. - // No need to clamp values > 1.0 as they will be renormalized. - w.x = std::max(bary.x, 0.0f); - w.y = std::max(bary.y, 0.0f); - w.z = std::max(bary.z, 0.0f); - float w_sum = w.x + w.y + w.z; - w_sum = std::fmaxf(w_sum, 1e-5); - w.x /= w_sum; - w.y /= w_sum; - w.z /= w_sum; - return w; -} - -// Backward pass for barycentric coordinate clipping. -// -// Args -// bary: (w0, w1, w2) barycentric coordinates which can contain values < 0. -// grad_baryclip_upstream: vec3 Upstream gradient for each of the clipped -// barycentric coordinates [grad_w0, grad_w1, grad_w2]. -// -// Returns -// vec3 of gradients for the unclipped barycentric coordinates: -// (grad_w0, grad_w1, grad_w2) -// -template -vec3 BarycentricClipBackward( - const vec3 bary, - const vec3 grad_baryclip_upstream) { - // Redo some of the forward pass calculations - vec3 w(0.0f, 0.0f, 0.0f); - w.x = std::max(bary.x, 0.0f); - w.y = std::max(bary.y, 0.0f); - w.z = std::max(bary.z, 0.0f); - float w_sum = w.x + w.y + w.z; - - vec3 grad_bary(1.0f, 1.0f, 1.0f); - vec3 grad_clip(1.0f, 1.0f, 1.0f); - vec3 grad_sum(1.0f, 1.0f, 1.0f); - - // Check if the sum was clipped. - float grad_sum_clip = 1.0f; - if (w_sum < 1e-5) { - grad_sum_clip = 0.0f; - w_sum = 1e-5; - } - - // Check if any of the bary coordinates have been clipped. - // Only negative values are clamped to 0.0. - if (bary.x < 0.0f) { - grad_clip.x = 0.0f; - } - if (bary.y < 0.0f) { - grad_clip.y = 0.0f; - } - if (bary.z < 0.0f) { - grad_clip.z = 0.0f; - } - - // Gradients of the sum. - grad_sum.x = -w.x / (pow(w_sum, 2.0f)) * grad_sum_clip; - grad_sum.y = -w.y / (pow(w_sum, 2.0f)) * grad_sum_clip; - grad_sum.z = -w.z / (pow(w_sum, 2.0f)) * grad_sum_clip; - - // Gradients for each of the bary coordinates including the cross terms - // from the sum. - grad_bary.x = grad_clip.x * - (grad_baryclip_upstream.x * (1.0f / w_sum + grad_sum.x) + - grad_baryclip_upstream.y * (grad_sum.y) + - grad_baryclip_upstream.z * (grad_sum.z)); - - grad_bary.y = grad_clip.y * - (grad_baryclip_upstream.y * (1.0f / w_sum + grad_sum.y) + - grad_baryclip_upstream.x * (grad_sum.x) + - grad_baryclip_upstream.z * (grad_sum.z)); - - grad_bary.z = grad_clip.z * - (grad_baryclip_upstream.z * (1.0f / w_sum + grad_sum.z) + - grad_baryclip_upstream.x * (grad_sum.x) + - grad_baryclip_upstream.y * (grad_sum.y)); - - return grad_bary; -} - -// Calculate minimum distance between a line segment (v1 - v0) and point p. -// -// Args: -// p: Coordinates of a point. -// v0, v1: Coordinates of the end points of the line segment. -// -// Returns: -// squared distance of the point to the line. -// -// Consider the line extending the segment - this can be parameterized as: -// v0 + t (v1 - v0). -// -// First find the projection of point p onto the line. It falls where: -// t = [(p - v0) . (v1 - v0)] / |v1 - v0|^2 -// where . is the dot product. -// -// The parameter t is clamped from [0, 1] to handle points outside the -// segment (v1 - v0). -// -// Once the projection of the point on the segment is known, the distance from -// p to the projection gives the minimum distance to the segment. -// -template -T PointLineDistanceForward( - const vec2& p, - const vec2& v0, - const vec2& v1) { - const vec2 v1v0 = v1 - v0; - const T l2 = dot(v1v0, v1v0); - if (l2 <= kEpsilon) { - return dot(p - v1, p - v1); - } - - const T t = dot(v1v0, p - v0) / l2; - const T tt = std::min(std::max(t, 0.00f), 1.00f); - const vec2 p_proj = v0 + tt * v1v0; - return dot(p - p_proj, p - p_proj); -} - -template -T PointLine3DistanceForward( - const vec3& p, - const vec3& v0, - const vec3& v1) { - const vec3 v1v0 = v1 - v0; - const T l2 = dot(v1v0, v1v0); - if (l2 <= kEpsilon) { - return dot(p - v1, p - v1); - } - - const T t = dot(v1v0, p - v0) / l2; - const T tt = std::min(std::max(t, 0.00f), 1.00f); - const vec3 p_proj = v0 + tt * v1v0; - return dot(p - p_proj, p - p_proj); -} - -// Backward pass for point to line distance in 2D. -// -// Args: -// p: Coordinates of a point. -// v0, v1: Coordinates of the end points of the line segment. -// grad_dist: Upstream gradient for the distance. -// -// Returns: -// tuple of gradients for each of the input points: -// (vec2 grad_p, vec2 grad_v0, vec2 grad_v1) -// -template -inline std::tuple, vec2, vec2> PointLineDistanceBackward( - const vec2& p, - const vec2& v0, - const vec2& v1, - const T& grad_dist) { - // Redo some of the forward pass calculations. - const vec2 v1v0 = v1 - v0; - const vec2 pv0 = p - v0; - const T t_bot = dot(v1v0, v1v0); - const T t_top = dot(v1v0, pv0); - const T t = t_top / t_bot; - const T tt = std::min(std::max(t, 0.00f), 1.00f); - const vec2 p_proj = (1.0f - tt) * v0 + tt * v1; - - const vec2 grad_v0 = grad_dist * (1.0f - tt) * 2.0f * (p_proj - p); - const vec2 grad_v1 = grad_dist * tt * 2.0f * (p_proj - p); - const vec2 grad_p = -1.0f * grad_dist * 2.0f * (p_proj - p); - - return std::make_tuple(grad_p, grad_v0, grad_v1); -} - -template -std::tuple, vec3, vec3> PointLine3DistanceBackward( - const vec3& p, - const vec3& v0, - const vec3& v1, - const T& grad_dist) { - const vec3 v1v0 = v1 - v0; - const vec3 pv0 = p - v0; - const T t_bot = dot(v1v0, v1v0); - const T t_top = dot(v1v0, pv0); - - vec3 grad_p{0.0f, 0.0f, 0.0f}; - vec3 grad_v0{0.0f, 0.0f, 0.0f}; - vec3 grad_v1{0.0f, 0.0f, 0.0f}; - - const T tt = t_top / t_bot; - - if (t_bot < kEpsilon) { - // if t_bot small, then v0 == v1, - // and dist = 0.5 * dot(pv0, pv0) + 0.5 * dot(pv1, pv1) - grad_p = grad_dist * 2.0f * pv0; - grad_v0 = -0.5f * grad_p; - grad_v1 = grad_v0; - } else if (tt < 0.0f) { - grad_p = grad_dist * 2.0f * pv0; - grad_v0 = -1.0f * grad_p; - // no gradients wrt v1 - } else if (tt > 1.0f) { - grad_p = grad_dist * 2.0f * (p - v1); - grad_v1 = -1.0f * grad_p; - // no gradients wrt v0 - } else { - const vec3 p_proj = v0 + tt * v1v0; - const vec3 diff = p - p_proj; - const vec3 grad_base = grad_dist * 2.0f * diff; - grad_p = grad_base - dot(grad_base, v1v0) * v1v0 / t_bot; - const vec3 dtt_v0 = (-1.0f * v1v0 - pv0 + 2.0f * tt * v1v0) / t_bot; - grad_v0 = (-1.0f + tt) * grad_base - dot(grad_base, v1v0) * dtt_v0; - const vec3 dtt_v1 = (pv0 - 2.0f * tt * v1v0) / t_bot; - grad_v1 = -dot(grad_base, v1v0) * dtt_v1 - tt * grad_base; - } - - return std::make_tuple(grad_p, grad_v0, grad_v1); -} - -// The forward pass for calculating the shortest distance between a point -// and a triangle. -// Ref: https://www.randygaul.net/2014/07/23/distance-point-to-line-segment/ -// -// Args: -// p: Coordinates of a point. -// v0, v1, v2: Coordinates of the three triangle vertices. -// -// Returns: -// shortest squared distance from a point to a triangle. -// -// -template -T PointTriangleDistanceForward( - const vec2& p, - const vec2& v0, - const vec2& v1, - const vec2& v2) { - // Compute distance of point to 3 edges of the triangle and return the - // minimum value. - const T e01_dist = PointLineDistanceForward(p, v0, v1); - const T e02_dist = PointLineDistanceForward(p, v0, v2); - const T e12_dist = PointLineDistanceForward(p, v1, v2); - const T edge_dist = std::min(std::min(e01_dist, e02_dist), e12_dist); - - return edge_dist; -} - -// Backward pass for point triangle distance. -// -// Args: -// p: Coordinates of a point. -// v0, v1, v2: Coordinates of the three triangle vertices. -// grad_dist: Upstream gradient for the distance. -// -// Returns: -// tuple of gradients for each of the triangle vertices: -// (vec2 grad_v0, vec2 grad_v1, vec2 grad_v2) -// -template -inline std::tuple, vec2, vec2, vec2> -PointTriangleDistanceBackward( - const vec2& p, - const vec2& v0, - const vec2& v1, - const vec2& v2, - const T& grad_dist) { - // Compute distance to all 3 edges of the triangle. - const T e01_dist = PointLineDistanceForward(p, v0, v1); - const T e02_dist = PointLineDistanceForward(p, v0, v2); - const T e12_dist = PointLineDistanceForward(p, v1, v2); - - // Initialize output tensors. - vec2 grad_v0(0.0f, 0.0f); - vec2 grad_v1(0.0f, 0.0f); - vec2 grad_v2(0.0f, 0.0f); - vec2 grad_p(0.0f, 0.0f); - - // Find which edge is the closest and return PointLineDistanceBackward for - // that edge. - if (e01_dist <= e02_dist && e01_dist <= e12_dist) { - // Closest edge is v1 - v0. - auto grad_e01 = PointLineDistanceBackward(p, v0, v1, grad_dist); - grad_p = std::get<0>(grad_e01); - grad_v0 = std::get<1>(grad_e01); - grad_v1 = std::get<2>(grad_e01); - } else if (e02_dist <= e01_dist && e02_dist <= e12_dist) { - // Closest edge is v2 - v0. - auto grad_e02 = PointLineDistanceBackward(p, v0, v2, grad_dist); - grad_p = std::get<0>(grad_e02); - grad_v0 = std::get<1>(grad_e02); - grad_v2 = std::get<2>(grad_e02); - } else if (e12_dist <= e01_dist && e12_dist <= e02_dist) { - // Closest edge is v2 - v1. - auto grad_e12 = PointLineDistanceBackward(p, v1, v2, grad_dist); - grad_p = std::get<0>(grad_e12); - grad_v1 = std::get<1>(grad_e12); - grad_v2 = std::get<2>(grad_e12); - } - - return std::make_tuple(grad_p, grad_v0, grad_v1, grad_v2); -} - -// Computes the area of a triangle (v0, v1, v2). -// Args: -// v0, v1, v2: vec3 coordinates of the triangle vertices -// -// Returns: -// area: float: the area of the triangle -// -template -T AreaOfTriangle(const vec3& v0, const vec3& v1, const vec3& v2) { - vec3 p0 = v1 - v0; - vec3 p1 = v2 - v0; - - // compute the hypotenus of the scross product (p0 x p1) - float dd = std::hypot( - p0.y * p1.z - p0.z * p1.y, - std::hypot(p0.z * p1.x - p0.x * p1.z, p0.x * p1.y - p0.y * p1.x)); - - return dd / 2.0; -} - -// Computes the squared distance of a point p relative to a triangle (v0, v1, -// v2). If the point's projection p0 on the plane spanned by (v0, v1, v2) is -// inside the triangle with vertices (v0, v1, v2), then the returned value is -// the squared distance of p to its projection p0. Otherwise, the returned value -// is the smallest squared distance of p from the line segments (v0, v1), (v0, -// v2) and (v1, v2). -// -// Args: -// p: vec3 coordinates of a point -// v0, v1, v2: vec3 coordinates of the triangle vertices -// -// Returns: -// dist: Float of the squared distance -// - -const float vEpsilon = 1e-8; - -template -vec3 BarycentricCoords3Forward( - const vec3& p, - const vec3& v0, - const vec3& v1, - const vec3& v2) { - vec3 p0 = v1 - v0; - vec3 p1 = v2 - v0; - vec3 p2 = p - v0; - - const T d00 = dot(p0, p0); - const T d01 = dot(p0, p1); - const T d11 = dot(p1, p1); - const T d20 = dot(p2, p0); - const T d21 = dot(p2, p1); - - const T denom = d00 * d11 - d01 * d01 + kEpsilon; - const T w1 = (d11 * d20 - d01 * d21) / denom; - const T w2 = (d00 * d21 - d01 * d20) / denom; - const T w0 = 1.0f - w1 - w2; - - return vec3(w0, w1, w2); -} - -// Checks whether the point p is inside the triangle (v0, v1, v2). -// A point is inside the triangle, if all barycentric coordinates -// wrt the triangle are >= 0 & <= 1. -// If the triangle is degenerate, aka line or point, then return False. -// -// NOTE that this function assumes that p lives on the space spanned -// by (v0, v1, v2). -// TODO(gkioxari) explicitly check whether p is coplanar with (v0, v1, v2) -// and throw an error if check fails -// -// Args: -// p: vec3 coordinates of a point -// v0, v1, v2: vec3 coordinates of the triangle vertices -// min_triangle_area: triangles less than this size are considered -// points/lines, IsInsideTriangle returns False -// -// Returns: -// inside: bool indicating wether p is inside triangle -// -template -static bool IsInsideTriangle( - const vec3& p, - const vec3& v0, - const vec3& v1, - const vec3& v2, - const double min_triangle_area) { - bool inside; - if (AreaOfTriangle(v0, v1, v2) < min_triangle_area) { - inside = 0; - } else { - vec3 bary = BarycentricCoords3Forward(p, v0, v1, v2); - bool x_in = 0.0f <= bary.x && bary.x <= 1.0f; - bool y_in = 0.0f <= bary.y && bary.y <= 1.0f; - bool z_in = 0.0f <= bary.z && bary.z <= 1.0f; - inside = x_in && y_in && z_in; - } - return inside; -} - -template -T PointTriangle3DistanceForward( - const vec3& p, - const vec3& v0, - const vec3& v1, - const vec3& v2, - const double min_triangle_area) { - vec3 normal = cross(v2 - v0, v1 - v0); - const T norm_normal = norm(normal); - normal = normal / (norm_normal + vEpsilon); - - // p0 is the projection of p on the plane spanned by (v0, v1, v2) - // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal - const T t = dot(v0 - p, normal); - const vec3 p0 = p + t * normal; - - bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area); - T dist = 0.0f; - - if ((is_inside) && (norm_normal > kEpsilon)) { - // if projection p0 is inside triangle spanned by (v0, v1, v2) - // then distance is equal to norm(p0 - p)^2 - dist = t * t; - } else { - const float e01 = PointLine3DistanceForward(p, v0, v1); - const float e02 = PointLine3DistanceForward(p, v0, v2); - const float e12 = PointLine3DistanceForward(p, v1, v2); - - dist = (e01 > e02) ? e02 : e01; - dist = (dist > e12) ? e12 : dist; - } - - return dist; -} - -template -std::tuple, vec3> -cross_backward(const vec3& a, const vec3& b, const vec3& grad_cross) { - const float grad_ax = -grad_cross.y * b.z + grad_cross.z * b.y; - const float grad_ay = grad_cross.x * b.z - grad_cross.z * b.x; - const float grad_az = -grad_cross.x * b.y + grad_cross.y * b.x; - const vec3 grad_a = vec3(grad_ax, grad_ay, grad_az); - - const float grad_bx = grad_cross.y * a.z - grad_cross.z * a.y; - const float grad_by = -grad_cross.x * a.z + grad_cross.z * a.x; - const float grad_bz = grad_cross.x * a.y - grad_cross.y * a.x; - const vec3 grad_b = vec3(grad_bx, grad_by, grad_bz); - - return std::make_tuple(grad_a, grad_b); -} - -template -vec3 normalize_backward(const vec3& a, const vec3& grad_normz) { - const float a_norm = norm(a) + vEpsilon; - const vec3 out = a / a_norm; - - const float grad_ax = grad_normz.x * (1.0f - out.x * out.x) / a_norm + - grad_normz.y * (-out.x * out.y) / a_norm + - grad_normz.z * (-out.x * out.z) / a_norm; - const float grad_ay = grad_normz.x * (-out.x * out.y) / a_norm + - grad_normz.y * (1.0f - out.y * out.y) / a_norm + - grad_normz.z * (-out.y * out.z) / a_norm; - const float grad_az = grad_normz.x * (-out.x * out.z) / a_norm + - grad_normz.y * (-out.y * out.z) / a_norm + - grad_normz.z * (1.0f - out.z * out.z) / a_norm; - return vec3(grad_ax, grad_ay, grad_az); -} - -// The backward pass for computing the squared distance of a point -// to the triangle (v0, v1, v2). -// -// Args: -// p: xyz coordinates of a point -// v0, v1, v2: xyz coordinates of the triangle vertices -// grad_dist: Float of the gradient wrt dist -// min_triangle_area: triangles less than this size are considered -// points/lines, IsInsideTriangle returns False -// -// Returns: -// tuple of gradients for the point and triangle: -// (float3 grad_p, float3 grad_v0, float3 grad_v1, float3 grad_v2) -// - -template -static std::tuple, vec3, vec3, vec3> -PointTriangle3DistanceBackward( - const vec3& p, - const vec3& v0, - const vec3& v1, - const vec3& v2, - const T& grad_dist, - const double min_triangle_area) { - const vec3 v2v0 = v2 - v0; - const vec3 v1v0 = v1 - v0; - const vec3 v0p = v0 - p; - vec3 raw_normal = cross(v2v0, v1v0); - const T norm_normal = norm(raw_normal); - vec3 normal = raw_normal / (norm_normal + vEpsilon); - - // p0 is the projection of p on the plane spanned by (v0, v1, v2) - // i.e. p0 = p + t * normal, s.t. (p0 - v0) is orthogonal to normal - const T t = dot(v0 - p, normal); - const vec3 p0 = p + t * normal; - const vec3 diff = t * normal; - - bool is_inside = IsInsideTriangle(p0, v0, v1, v2, min_triangle_area); - - vec3 grad_p(0.0f, 0.0f, 0.0f); - vec3 grad_v0(0.0f, 0.0f, 0.0f); - vec3 grad_v1(0.0f, 0.0f, 0.0f); - vec3 grad_v2(0.0f, 0.0f, 0.0f); - - if ((is_inside) && (norm_normal > kEpsilon)) { - // derivative of dist wrt p - grad_p = -2.0f * grad_dist * t * normal; - // derivative of dist wrt normal - const vec3 grad_normal = 2.0f * grad_dist * t * (v0p + diff); - // derivative of dist wrt raw_normal - const vec3 grad_raw_normal = normalize_backward(raw_normal, grad_normal); - // derivative of dist wrt v2v0 and v1v0 - const auto grad_cross = cross_backward(v2v0, v1v0, grad_raw_normal); - const vec3 grad_cross_v2v0 = std::get<0>(grad_cross); - const vec3 grad_cross_v1v0 = std::get<1>(grad_cross); - grad_v0 = - grad_dist * 2.0f * t * normal - (grad_cross_v2v0 + grad_cross_v1v0); - grad_v1 = grad_cross_v1v0; - grad_v2 = grad_cross_v2v0; - } else { - const T e01 = PointLine3DistanceForward(p, v0, v1); - const T e02 = PointLine3DistanceForward(p, v0, v2); - const T e12 = PointLine3DistanceForward(p, v1, v2); - - if ((e01 <= e02) && (e01 <= e12)) { - // e01 is smallest - const auto grads = PointLine3DistanceBackward(p, v0, v1, grad_dist); - grad_p = std::get<0>(grads); - grad_v0 = std::get<1>(grads); - grad_v1 = std::get<2>(grads); - } else if ((e02 <= e01) && (e02 <= e12)) { - // e02 is smallest - const auto grads = PointLine3DistanceBackward(p, v0, v2, grad_dist); - grad_p = std::get<0>(grads); - grad_v0 = std::get<1>(grads); - grad_v2 = std::get<2>(grads); - } else if ((e12 <= e01) && (e12 <= e02)) { - // e12 is smallest - const auto grads = PointLine3DistanceBackward(p, v1, v2, grad_dist); - grad_p = std::get<0>(grads); - grad_v1 = std::get<1>(grads); - grad_v2 = std::get<2>(grads); - } - } - - return std::make_tuple(grad_p, grad_v0, grad_v1, grad_v2); -} diff --git a/pytorch3d/pytorch3d/csrc/utils/index_utils.cuh b/pytorch3d/pytorch3d/csrc/utils/index_utils.cuh deleted file mode 100644 index 74bca270f344b0d0b0bc19c45fc7373eb5875a14..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/utils/index_utils.cuh +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -// This converts dynamic array lookups into static array lookups, for small -// arrays up to size 32. -// -// Suppose we have a small thread-local array: -// -// float vals[10]; -// -// Ideally we should only index this array using static indices: -// -// for (int i = 0; i < 10; ++i) vals[i] = i * i; -// -// If we do so, then the CUDA compiler may be able to place the array into -// registers, which can have a big performance improvement. However if we -// access the array dynamically, the the compiler may force the array into -// local memory, which has the same latency as global memory. -// -// These functions convert dynamic array access into static array access -// using a brute-force lookup table. It can be used like this: -// -// float vals[10]; -// int idx = 3; -// float val = 3.14f; -// RegisterIndexUtils::set(vals, idx, val); -// float val2 = RegisterIndexUtils::get(vals, idx); -// -// The implementation is based on fbcuda/RegisterUtils.cuh: -// https://github.com/facebook/fbcuda/blob/master/RegisterUtils.cuh -// To avoid depending on the entire library, we just reimplement these two -// functions. The fbcuda implementation is a bit more sophisticated, and uses -// the preprocessor to generate switch statements that go up to N for each -// value of N. We are lazy and just have a giant explicit switch statement. -// -// We might be able to use a template metaprogramming approach similar to -// DispatchKernel1D for this. However DispatchKernel1D is intended to be used -// for dispatching to the correct CUDA kernel on the host, while this is -// is intended to run on the device. I was concerned that a metaprogramming -// approach for this might lead to extra function calls at runtime if the -// compiler fails to optimize them away, which could be very slow on device. -// However I didn't actually benchmark or test this. -template -struct RegisterIndexUtils { - __device__ __forceinline__ static T get(const T arr[N], int idx) { - if (idx < 0 || idx >= N) - return T(); - switch (idx) { - case 0: - return arr[0]; - case 1: - return arr[1]; - case 2: - return arr[2]; - case 3: - return arr[3]; - case 4: - return arr[4]; - case 5: - return arr[5]; - case 6: - return arr[6]; - case 7: - return arr[7]; - case 8: - return arr[8]; - case 9: - return arr[9]; - case 10: - return arr[10]; - case 11: - return arr[11]; - case 12: - return arr[12]; - case 13: - return arr[13]; - case 14: - return arr[14]; - case 15: - return arr[15]; - case 16: - return arr[16]; - case 17: - return arr[17]; - case 18: - return arr[18]; - case 19: - return arr[19]; - case 20: - return arr[20]; - case 21: - return arr[21]; - case 22: - return arr[22]; - case 23: - return arr[23]; - case 24: - return arr[24]; - case 25: - return arr[25]; - case 26: - return arr[26]; - case 27: - return arr[27]; - case 28: - return arr[28]; - case 29: - return arr[29]; - case 30: - return arr[30]; - case 31: - return arr[31]; - }; - return T(); - } - - __device__ __forceinline__ static void set(T arr[N], int idx, T val) { - if (idx < 0 || idx >= N) - return; - switch (idx) { - case 0: - arr[0] = val; - break; - case 1: - arr[1] = val; - break; - case 2: - arr[2] = val; - break; - case 3: - arr[3] = val; - break; - case 4: - arr[4] = val; - break; - case 5: - arr[5] = val; - break; - case 6: - arr[6] = val; - break; - case 7: - arr[7] = val; - break; - case 8: - arr[8] = val; - break; - case 9: - arr[9] = val; - break; - case 10: - arr[10] = val; - break; - case 11: - arr[11] = val; - break; - case 12: - arr[12] = val; - break; - case 13: - arr[13] = val; - break; - case 14: - arr[14] = val; - break; - case 15: - arr[15] = val; - break; - case 16: - arr[16] = val; - break; - case 17: - arr[17] = val; - break; - case 18: - arr[18] = val; - break; - case 19: - arr[19] = val; - break; - case 20: - arr[20] = val; - break; - case 21: - arr[21] = val; - break; - case 22: - arr[22] = val; - break; - case 23: - arr[23] = val; - break; - case 24: - arr[24] = val; - break; - case 25: - arr[25] = val; - break; - case 26: - arr[26] = val; - break; - case 27: - arr[27] = val; - break; - case 28: - arr[28] = val; - break; - case 29: - arr[29] = val; - break; - case 30: - arr[30] = val; - break; - case 31: - arr[31] = val; - break; - } - } -}; diff --git a/pytorch3d/pytorch3d/csrc/utils/mink.cuh b/pytorch3d/pytorch3d/csrc/utils/mink.cuh deleted file mode 100644 index c7858f58c8b92c12f5e889c10fe6e98a622d82b7..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/utils/mink.cuh +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#define MINK_H - -#include "index_utils.cuh" - -// A data structure to keep track of the smallest K keys seen so far as well -// as their associated values, intended to be used in device code. -// This data structure doesn't allocate any memory; keys and values are stored -// in arrays passed to the constructor. -// -// The implementation is generic; it can be used for any key type that supports -// the < operator, and can be used with any value type. -// -// Example usage: -// -// float keys[K]; -// int values[K]; -// MinK mink(keys, values, K); -// for (...) { -// // Produce some key and value from somewhere -// mink.add(key, value); -// } -// mink.sort(); -// -// Now keys and values store the smallest K keys seen so far and the values -// associated to these keys: -// -// for (int k = 0; k < K; ++k) { -// float key_k = keys[k]; -// int value_k = values[k]; -// } -template -class MinK { - public: - // Constructor. - // - // Arguments: - // keys: Array in which to store keys - // values: Array in which to store values - // K: How many values to keep track of - __device__ MinK(key_t* keys, value_t* vals, int K) - : keys(keys), vals(vals), K(K), _size(0) {} - - // Try to add a new key and associated value to the data structure. If the key - // is one of the smallest K seen so far then it will be kept; otherwise it - // it will not be kept. - // - // This takes O(1) operations if the new key is not kept, or if the structure - // currently contains fewer than K elements. Otherwise this takes O(K) time. - // - // Arguments: - // key: The key to add - // val: The value associated to the key - __device__ __forceinline__ void add(const key_t& key, const value_t& val) { - if (_size < K) { - keys[_size] = key; - vals[_size] = val; - if (_size == 0 || key > max_key) { - max_key = key; - max_idx = _size; - } - _size++; - } else if (key < max_key) { - keys[max_idx] = key; - vals[max_idx] = val; - max_key = key; - for (int k = 0; k < K; ++k) { - key_t cur_key = keys[k]; - if (cur_key > max_key) { - max_key = cur_key; - max_idx = k; - } - } - } - } - - // Get the number of items currently stored in the structure. - // This takes O(1) time. - __device__ __forceinline__ int size() { - return _size; - } - - // Sort the items stored in the structure using bubble sort. - // This takes O(K^2) time. - __device__ __forceinline__ void sort() { - for (int i = 0; i < _size - 1; ++i) { - for (int j = 0; j < _size - i - 1; ++j) { - if (keys[j + 1] < keys[j]) { - key_t key = keys[j]; - value_t val = vals[j]; - keys[j] = keys[j + 1]; - vals[j] = vals[j + 1]; - keys[j + 1] = key; - vals[j + 1] = val; - } - } - } - } - - private: - key_t* keys; - value_t* vals; - int K; - int _size; - key_t max_key; - int max_idx; -}; - -// This is a version of MinK that only touches the arrays using static indexing -// via RegisterIndexUtils. If the keys and values are stored in thread-local -// arrays, then this may allow the compiler to place them in registers for -// fast access. -// -// This has the same API as RegisterMinK, but doesn't support sorting. -// We found that sorting via RegisterIndexUtils gave very poor performance, -// and suspect it may have prevented the compiler from placing the arrays -// into registers. -template -class RegisterMinK { - public: - __device__ RegisterMinK(key_t* keys, value_t* vals) - : keys(keys), vals(vals), _size(0) {} - - __device__ __forceinline__ void add(const key_t& key, const value_t& val) { - if (_size < K) { - RegisterIndexUtils::set(keys, _size, key); - RegisterIndexUtils::set(vals, _size, val); - if (_size == 0 || key > max_key) { - max_key = key; - max_idx = _size; - } - _size++; - } else if (key < max_key) { - RegisterIndexUtils::set(keys, max_idx, key); - RegisterIndexUtils::set(vals, max_idx, val); - max_key = key; - for (int k = 0; k < K; ++k) { - key_t cur_key = RegisterIndexUtils::get(keys, k); - if (cur_key > max_key) { - max_key = cur_key; - max_idx = k; - } - } - } - } - - __device__ __forceinline__ int size() { - return _size; - } - - private: - key_t* keys; - value_t* vals; - int _size; - key_t max_key; - int max_idx; -}; diff --git a/pytorch3d/pytorch3d/csrc/utils/pytorch3d_cutils.h b/pytorch3d/pytorch3d/csrc/utils/pytorch3d_cutils.h deleted file mode 100644 index 48d04546e6f96fd7eb2c182f7b6d692f5821c1d8..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/utils/pytorch3d_cutils.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include - -#define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor.") -#define CHECK_CONTIGUOUS(x) \ - TORCH_CHECK(x.is_contiguous(), #x " must be contiguous.") -#define CHECK_CONTIGUOUS_CUDA(x) \ - CHECK_CUDA(x); \ - CHECK_CONTIGUOUS(x) diff --git a/pytorch3d/pytorch3d/csrc/utils/vec2.h b/pytorch3d/pytorch3d/csrc/utils/vec2.h deleted file mode 100644 index f4550f918394c06ecbcca5db013e4f0f014ee914..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/utils/vec2.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once -#include - -// A fixed-sized vector with basic arithmetic operators useful for -// representing 2D coordinates. -// TODO: switch to Eigen if more functionality is needed. - -template < - typename T, - typename = std::enable_if_t< - std::is_same::value || std::is_same::value>> -struct vec2 { - T x, y; - typedef T scalar_t; - vec2(T x, T y) : x(x), y(y) {} -}; - -template -inline vec2 operator+(const vec2& a, const vec2& b) { - return vec2(a.x + b.x, a.y + b.y); -} - -template -inline vec2 operator-(const vec2& a, const vec2& b) { - return vec2(a.x - b.x, a.y - b.y); -} - -template -inline vec2 operator*(const T a, const vec2& b) { - return vec2(a * b.x, a * b.y); -} - -template -inline vec2 operator/(const vec2& a, const T b) { - if (b == 0.0) { - AT_ERROR( - "denominator in vec2 division is 0"); // prevent divide by 0 errors. - } - return vec2(a.x / b, a.y / b); -} - -template -inline T dot(const vec2& a, const vec2& b) { - return a.x * b.x + a.y * b.y; -} - -template -inline T norm(const vec2& a, const vec2& b) { - const vec2 ba = b - a; - return sqrt(dot(ba, ba)); -} - -template -std::ostream& operator<<(std::ostream& os, const vec2& v) { - os << "vec2(" << v.x << ", " << v.y << ")"; - return os; -} diff --git a/pytorch3d/pytorch3d/csrc/utils/vec3.h b/pytorch3d/pytorch3d/csrc/utils/vec3.h deleted file mode 100644 index fc37bf5c0dc0f041e58bd801dbf4b7d36eb1979b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/utils/vec3.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -// A fixed-sized vector with basic arithmetic operators useful for -// representing 3D coordinates. -// TODO: switch to Eigen if more functionality is needed. - -template < - typename T, - typename = std::enable_if_t< - std::is_same::value || std::is_same::value>> -struct vec3 { - T x, y, z; - typedef T scalar_t; - vec3(T x, T y, T z) : x(x), y(y), z(z) {} -}; - -template -inline vec3 operator+(const vec3& a, const vec3& b) { - return vec3(a.x + b.x, a.y + b.y, a.z + b.z); -} - -template -inline vec3 operator-(const vec3& a, const vec3& b) { - return vec3(a.x - b.x, a.y - b.y, a.z - b.z); -} - -template -inline vec3 operator/(const vec3& a, const T b) { - if (b == 0.0) { - AT_ERROR( - "denominator in vec3 division is 0"); // prevent divide by 0 errors. - } - return vec3(a.x / b, a.y / b, a.z / b); -} - -template -inline vec3 operator*(const T a, const vec3& b) { - return vec3(a * b.x, a * b.y, a * b.z); -} - -template -inline vec3 operator*(const vec3& a, const vec3& b) { - return vec3(a.x * b.x, a.y * b.y, a.z * b.z); -} - -template -inline T dot(const vec3& a, const vec3& b) { - return a.x * b.x + a.y * b.y + a.z * b.z; -} - -template -inline vec3 cross(const vec3& a, const vec3& b) { - return vec3( - a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); -} - -template -inline T norm(const vec3& a) { - return sqrt(dot(a, a)); -} - -template -std::ostream& operator<<(std::ostream& os, const vec3& v) { - os << "vec3(" << v.x << ", " << v.y << ", " << v.z << ")"; - return os; -} diff --git a/pytorch3d/pytorch3d/csrc/utils/warp_reduce.cuh b/pytorch3d/pytorch3d/csrc/utils/warp_reduce.cuh deleted file mode 100644 index 3c903019debf5db594a6c71e1296ccd764991736..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/csrc/utils/warp_reduce.cuh +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -// Helper functions WarpReduceMin and WarpReduceMax used in .cu files -// Starting in Volta, instructions are no longer synchronous within a warp. -// We need to call __syncwarp() to sync the 32 threads in the warp -// instead of all the threads in the block. - -template -__device__ void -WarpReduceMin(scalar_t* min_dists, int64_t* min_idxs, const size_t tid) { - // s = 32 - if (min_dists[tid] > min_dists[tid + 32]) { - min_idxs[tid] = min_idxs[tid + 32]; - min_dists[tid] = min_dists[tid + 32]; - } - __syncwarp(); - // s = 16 - if (min_dists[tid] > min_dists[tid + 16]) { - min_idxs[tid] = min_idxs[tid + 16]; - min_dists[tid] = min_dists[tid + 16]; - } - __syncwarp(); - // s = 8 - if (min_dists[tid] > min_dists[tid + 8]) { - min_idxs[tid] = min_idxs[tid + 8]; - min_dists[tid] = min_dists[tid + 8]; - } - __syncwarp(); - // s = 4 - if (min_dists[tid] > min_dists[tid + 4]) { - min_idxs[tid] = min_idxs[tid + 4]; - min_dists[tid] = min_dists[tid + 4]; - } - __syncwarp(); - // s = 2 - if (min_dists[tid] > min_dists[tid + 2]) { - min_idxs[tid] = min_idxs[tid + 2]; - min_dists[tid] = min_dists[tid + 2]; - } - __syncwarp(); - // s = 1 - if (min_dists[tid] > min_dists[tid + 1]) { - min_idxs[tid] = min_idxs[tid + 1]; - min_dists[tid] = min_dists[tid + 1]; - } - __syncwarp(); -} - -template -__device__ void WarpReduceMax( - volatile scalar_t* dists, - volatile int64_t* dists_idx, - const size_t tid) { - if (dists[tid] < dists[tid + 32]) { - dists[tid] = dists[tid + 32]; - dists_idx[tid] = dists_idx[tid + 32]; - } - __syncwarp(); - if (dists[tid] < dists[tid + 16]) { - dists[tid] = dists[tid + 16]; - dists_idx[tid] = dists_idx[tid + 16]; - } - __syncwarp(); - if (dists[tid] < dists[tid + 8]) { - dists[tid] = dists[tid + 8]; - dists_idx[tid] = dists_idx[tid + 8]; - } - __syncwarp(); - if (dists[tid] < dists[tid + 4]) { - dists[tid] = dists[tid + 4]; - dists_idx[tid] = dists_idx[tid + 4]; - } - __syncwarp(); - if (dists[tid] < dists[tid + 2]) { - dists[tid] = dists[tid + 2]; - dists_idx[tid] = dists_idx[tid + 2]; - } - __syncwarp(); - if (dists[tid] < dists[tid + 1]) { - dists[tid] = dists[tid + 1]; - dists_idx[tid] = dists_idx[tid + 1]; - } - __syncwarp(); -} diff --git a/pytorch3d/pytorch3d/datasets/__init__.py b/pytorch3d/pytorch3d/datasets/__init__.py deleted file mode 100644 index 3dbee1ebd4940b0671fe027a109d7e0704a659c5..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/datasets/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .r2n2 import BlenderCamera, collate_batched_R2N2, R2N2, render_cubified_voxels -from .shapenet import ShapeNetCore -from .utils import collate_batched_meshes - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/datasets/r2n2/__init__.py b/pytorch3d/pytorch3d/datasets/r2n2/__init__.py deleted file mode 100644 index d2593d0440e2bfe463330c7e2551a114a00ed0d4..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/datasets/r2n2/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .r2n2 import R2N2 -from .utils import BlenderCamera, collate_batched_R2N2, render_cubified_voxels - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/datasets/r2n2/r2n2.py b/pytorch3d/pytorch3d/datasets/r2n2/r2n2.py deleted file mode 100644 index 6f93ad765b2c288aaf319931f49118fb5d58fc0c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/datasets/r2n2/r2n2.py +++ /dev/null @@ -1,425 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import json -import warnings -from os import path -from pathlib import Path -from typing import Dict, List, Optional - -import numpy as np -import torch -from PIL import Image -from pytorch3d.common.datatypes import Device -from pytorch3d.datasets.shapenet_base import ShapeNetBase -from pytorch3d.renderer import HardPhongShader -from tabulate import tabulate - -from .utils import ( - align_bbox, - BlenderCamera, - compute_extrinsic_matrix, - read_binvox_coords, - voxelize, -) - - -SYNSET_DICT_DIR = Path(__file__).resolve().parent -MAX_CAMERA_DISTANCE = 1.75 # Constant from R2N2. -VOXEL_SIZE = 128 -# Intrinsic matrix extracted from Blender. Taken from meshrcnn codebase: -# https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/coords.py -BLENDER_INTRINSIC = torch.tensor( - [ - [2.1875, 0.0, 0.0, 0.0], - [0.0, 2.1875, 0.0, 0.0], - [0.0, 0.0, -1.002002, -0.2002002], - [0.0, 0.0, -1.0, 0.0], - ] -) - - -class R2N2(ShapeNetBase): # pragma: no cover - """ - This class loads the R2N2 dataset from a given directory into a Dataset object. - The R2N2 dataset contains 13 categories that are a subset of the ShapeNetCore v.1 - dataset. The R2N2 dataset also contains its own 24 renderings of each object and - voxelized models. Most of the models have all 24 views in the same split, but there - are eight of them that divide their views between train and test splits. - - """ - - def __init__( - self, - split: str, - shapenet_dir: str, - r2n2_dir: str, - splits_file: str, - return_all_views: bool = True, - return_voxels: bool = False, - views_rel_path: str = "ShapeNetRendering", - voxels_rel_path: str = "ShapeNetVoxels", - load_textures: bool = True, - texture_resolution: int = 4, - ) -> None: - """ - Store each object's synset id and models id the given directories. - - Args: - split (str): One of (train, val, test). - shapenet_dir (str): Path to ShapeNet core v1. - r2n2_dir (str): Path to the R2N2 dataset. - splits_file (str): File containing the train/val/test splits. - return_all_views (bool): Indicator of whether or not to load all the views in - the split. If set to False, one of the views in the split will be randomly - selected and loaded. - return_voxels(bool): Indicator of whether or not to return voxels as a tensor - of shape (D, D, D) where D is the number of voxels along each dimension. - views_rel_path: path to rendered views within the r2n2_dir. If not specified, - the renderings are assumed to be at os.path.join(rn2n_dir, "ShapeNetRendering"). - voxels_rel_path: path to rendered views within the r2n2_dir. If not specified, - the renderings are assumed to be at os.path.join(rn2n_dir, "ShapeNetVoxels"). - load_textures: Boolean indicating whether textures should loaded for the model. - Textures will be of type TexturesAtlas i.e. a texture map per face. - texture_resolution: Int specifying the resolution of the texture map per face - created using the textures in the obj file. A - (texture_resolution, texture_resolution, 3) map is created per face. - - """ - super().__init__() - self.shapenet_dir = shapenet_dir - self.r2n2_dir = r2n2_dir - self.views_rel_path = views_rel_path - self.voxels_rel_path = voxels_rel_path - self.load_textures = load_textures - self.texture_resolution = texture_resolution - # Examine if split is valid. - if split not in ["train", "val", "test"]: - raise ValueError("split has to be one of (train, val, test).") - # Synset dictionary mapping synset offsets in R2N2 to corresponding labels. - with open( - path.join(SYNSET_DICT_DIR, "r2n2_synset_dict.json"), "r" - ) as read_dict: - self.synset_dict = json.load(read_dict) - # Inverse dictionary mapping synset labels to corresponding offsets. - self.synset_inv = {label: offset for offset, label in self.synset_dict.items()} - - # Store synset and model ids of objects mentioned in the splits_file. - with open(splits_file) as splits: - split_dict = json.load(splits)[split] - - self.return_images = True - # Check if the folder containing R2N2 renderings is included in r2n2_dir. - if not path.isdir(path.join(r2n2_dir, views_rel_path)): - self.return_images = False - msg = ( - "%s not found in %s. R2N2 renderings will " - "be skipped when returning models." - ) % (views_rel_path, r2n2_dir) - warnings.warn(msg) - - self.return_voxels = return_voxels - # Check if the folder containing voxel coordinates is included in r2n2_dir. - if not path.isdir(path.join(r2n2_dir, voxels_rel_path)): - self.return_voxels = False - msg = ( - "%s not found in %s. Voxel coordinates will " - "be skipped when returning models." - ) % (voxels_rel_path, r2n2_dir) - warnings.warn(msg) - - synset_set = set() - # Store lists of views of each model in a list. - self.views_per_model_list = [] - # Store tuples of synset label and total number of views in each category in a list. - synset_num_instances = [] - for synset in split_dict.keys(): - # Examine if the given synset is present in the ShapeNetCore dataset - # and is also part of the standard R2N2 dataset. - if not ( - path.isdir(path.join(shapenet_dir, synset)) - and synset in self.synset_dict - ): - msg = ( - "Synset category %s from the splits file is either not " - "present in %s or not part of the standard R2N2 dataset." - ) % (synset, shapenet_dir) - warnings.warn(msg) - continue - - synset_set.add(synset) - self.synset_start_idxs[synset] = len(self.synset_ids) - # Start counting total number of views in the current category. - synset_view_count = 0 - for model in split_dict[synset]: - # Examine if the given model is present in the ShapeNetCore path. - shapenet_path = path.join(shapenet_dir, synset, model) - if not path.isdir(shapenet_path): - msg = "Model %s from category %s is not present in %s." % ( - model, - synset, - shapenet_dir, - ) - warnings.warn(msg) - continue - self.synset_ids.append(synset) - self.model_ids.append(model) - - model_views = split_dict[synset][model] - # Randomly select a view index if return_all_views set to False. - if not return_all_views: - rand_idx = torch.randint(len(model_views), (1,)) - model_views = [model_views[rand_idx]] - self.views_per_model_list.append(model_views) - synset_view_count += len(model_views) - synset_num_instances.append((self.synset_dict[synset], synset_view_count)) - model_count = len(self.synset_ids) - self.synset_start_idxs[synset] - self.synset_num_models[synset] = model_count - headers = ["category", "#instances"] - synset_num_instances.append(("total", sum(n for _, n in synset_num_instances))) - print( - tabulate(synset_num_instances, headers, numalign="left", stralign="center") - ) - - # Examine if all the synsets in the standard R2N2 mapping are present. - # Update self.synset_inv so that it only includes the loaded categories. - synset_not_present = [ - self.synset_inv.pop(self.synset_dict[synset]) - for synset in self.synset_dict - if synset not in synset_set - ] - if len(synset_not_present) > 0: - msg = ( - "The following categories are included in R2N2's" - "official mapping but not found in the dataset location %s: %s" - ) % (shapenet_dir, ", ".join(synset_not_present)) - warnings.warn(msg) - - def __getitem__(self, model_idx, view_idxs: Optional[List[int]] = None) -> Dict: - """ - Read a model by the given index. - - Args: - model_idx: The idx of the model to be retrieved in the dataset. - view_idx: List of indices of the view to be returned. Each index needs to be - contained in the loaded split (always between 0 and 23, inclusive). If - an invalid index is supplied, view_idx will be ignored and all the loaded - views will be returned. - - Returns: - dictionary with following keys: - - verts: FloatTensor of shape (V, 3). - - faces: faces.verts_idx, LongTensor of shape (F, 3). - - synset_id (str): synset id. - - model_id (str): model id. - - label (str): synset label. - - images: FloatTensor of shape (V, H, W, C), where V is number of views - returned. Returns a batch of the renderings of the models from the R2N2 dataset. - - R: Rotation matrix of shape (V, 3, 3), where V is number of views returned. - - T: Translation matrix of shape (V, 3), where V is number of views returned. - - K: Intrinsic matrix of shape (V, 4, 4), where V is number of views returned. - - voxels: Voxels of shape (D, D, D), where D is the number of voxels along each - dimension. - """ - if isinstance(model_idx, tuple): - model_idx, view_idxs = model_idx - if view_idxs is not None: - if isinstance(view_idxs, int): - view_idxs = [view_idxs] - if not isinstance(view_idxs, list) and not torch.is_tensor(view_idxs): - raise TypeError( - "view_idxs is of type %s but it needs to be a list." - % type(view_idxs) - ) - - model_views = self.views_per_model_list[model_idx] - if view_idxs is not None and any( - idx not in self.views_per_model_list[model_idx] for idx in view_idxs - ): - msg = """At least one of the indices in view_idxs is not available. - Specified view of the model needs to be contained in the - loaded split. If return_all_views is set to False, only one - random view is loaded. Try accessing the specified view(s) - after loading the dataset with self.return_all_views set to True. - Now returning all view(s) in the loaded dataset.""" - warnings.warn(msg) - elif view_idxs is not None: - model_views = view_idxs - - model = self._get_item_ids(model_idx) - model_path = path.join( - self.shapenet_dir, model["synset_id"], model["model_id"], "model.obj" - ) - - verts, faces, textures = self._load_mesh(model_path) - model["verts"] = verts - model["faces"] = faces - model["textures"] = textures - model["label"] = self.synset_dict[model["synset_id"]] - - model["images"] = None - images, Rs, Ts, voxel_RTs = [], [], [], [] - # Retrieve R2N2's renderings if required. - if self.return_images: - rendering_path = path.join( - self.r2n2_dir, - self.views_rel_path, - model["synset_id"], - model["model_id"], - "rendering", - ) - # Read metadata file to obtain params for calibration matrices. - with open(path.join(rendering_path, "rendering_metadata.txt"), "r") as f: - metadata_lines = f.readlines() - for i in model_views: - # Read image. - image_path = path.join(rendering_path, "%02d.png" % i) - raw_img = Image.open(image_path) - image = torch.from_numpy(np.array(raw_img) / 255.0)[..., :3] - images.append(image.to(dtype=torch.float32)) - - # Get camera calibration. - azim, elev, yaw, dist_ratio, fov = [ - float(v) for v in metadata_lines[i].strip().split(" ") - ] - dist = dist_ratio * MAX_CAMERA_DISTANCE - # Extrinsic matrix before transformation to PyTorch3D world space. - RT = compute_extrinsic_matrix(azim, elev, dist) - R, T = self._compute_camera_calibration(RT) - Rs.append(R) - Ts.append(T) - voxel_RTs.append(RT) - - # Intrinsic matrix extracted from the Blender with slight modification to work with - # PyTorch3D world space. Taken from meshrcnn codebase: - # https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/coords.py - K = torch.tensor( - [ - [2.1875, 0.0, 0.0, 0.0], - [0.0, 2.1875, 0.0, 0.0], - [0.0, 0.0, -1.002002, -0.2002002], - [0.0, 0.0, 1.0, 0.0], - ] - ) - model["images"] = torch.stack(images) - model["R"] = torch.stack(Rs) - model["T"] = torch.stack(Ts) - model["K"] = K.expand(len(model_views), 4, 4) - - voxels_list = [] - - # Read voxels if required. - voxel_path = path.join( - self.r2n2_dir, - self.voxels_rel_path, - model["synset_id"], - model["model_id"], - "model.binvox", - ) - if self.return_voxels: - if not path.isfile(voxel_path): - msg = "Voxel file not found for model %s from category %s." - raise FileNotFoundError(msg % (model["model_id"], model["synset_id"])) - - with open(voxel_path, "rb") as f: - # Read voxel coordinates as a tensor of shape (N, 3). - voxel_coords = read_binvox_coords(f) - # Align voxels to the same coordinate system as mesh verts. - voxel_coords = align_bbox(voxel_coords, model["verts"]) - for RT in voxel_RTs: - # Compute projection matrix. - P = BLENDER_INTRINSIC.mm(RT) - # Convert voxel coordinates of shape (N, 3) to voxels of shape (D, D, D). - voxels = voxelize(voxel_coords, P, VOXEL_SIZE) - voxels_list.append(voxels) - model["voxels"] = torch.stack(voxels_list) - - return model - - def _compute_camera_calibration(self, RT): - """ - Helper function for calculating rotation and translation matrices from ShapeNet - to camera transformation and ShapeNet to PyTorch3D transformation. - - Args: - RT: Extrinsic matrix that performs ShapeNet world view to camera view - transformation. - - Returns: - R: Rotation matrix of shape (3, 3). - T: Translation matrix of shape (3). - """ - # Transform the mesh vertices from shapenet world to pytorch3d world. - shapenet_to_pytorch3d = torch.tensor( - [ - [-1.0, 0.0, 0.0, 0.0], - [0.0, 1.0, 0.0, 0.0], - [0.0, 0.0, -1.0, 0.0], - [0.0, 0.0, 0.0, 1.0], - ], - dtype=torch.float32, - ) - RT = torch.transpose(RT, 0, 1).mm(shapenet_to_pytorch3d) # (4, 4) - # Extract rotation and translation matrices from RT. - R = RT[:3, :3] - T = RT[3, :3] - return R, T - - def render( - self, - model_ids: Optional[List[str]] = None, - categories: Optional[List[str]] = None, - sample_nums: Optional[List[int]] = None, - idxs: Optional[List[int]] = None, - view_idxs: Optional[List[int]] = None, - shader_type=HardPhongShader, - device: Device = "cpu", - **kwargs, - ) -> torch.Tensor: - """ - Render models with BlenderCamera by default to achieve the same orientations as the - R2N2 renderings. Also accepts other types of cameras and any of the args that the - render function in the ShapeNetBase class accepts. - - Args: - view_idxs: each model will be rendered with the orientation(s) of the specified - views. Only render by view_idxs if no camera or args for BlenderCamera is - supplied. - Accepts any of the args of the render function in ShapeNetBase: - model_ids: List[str] of model_ids of models intended to be rendered. - categories: List[str] of categories intended to be rendered. categories - and sample_nums must be specified at the same time. categories can be given - in the form of synset offsets or labels, or a combination of both. - sample_nums: List[int] of number of models to be randomly sampled from - each category. Could also contain one single integer, in which case it - will be broadcasted for every category. - idxs: List[int] of indices of models to be rendered in the dataset. - shader_type: Shader to use for rendering. Examples include HardPhongShader - (default), SoftPhongShader etc or any other type of valid Shader class. - device: Device (as str or torch.device) on which the tensors should be located. - **kwargs: Accepts any of the kwargs that the renderer supports and any of the - args that BlenderCamera supports. - - Returns: - Batch of rendered images of shape (N, H, W, 3). - """ - idxs = self._handle_render_inputs(model_ids, categories, sample_nums, idxs) - r = torch.cat([self[idxs[i], view_idxs]["R"] for i in range(len(idxs))]) - t = torch.cat([self[idxs[i], view_idxs]["T"] for i in range(len(idxs))]) - k = torch.cat([self[idxs[i], view_idxs]["K"] for i in range(len(idxs))]) - # Initialize default camera using R, T, K from kwargs or R, T, K of the specified views. - blend_cameras = BlenderCamera( - R=kwargs.get("R", r), - T=kwargs.get("T", t), - K=kwargs.get("K", k), - device=device, - ) - cameras = kwargs.get("cameras", blend_cameras).to(device) - kwargs.pop("cameras", None) - # pass down all the same inputs - return super().render( - idxs=idxs, shader_type=shader_type, device=device, cameras=cameras, **kwargs - ) diff --git a/pytorch3d/pytorch3d/datasets/r2n2/r2n2_synset_dict.json b/pytorch3d/pytorch3d/datasets/r2n2/r2n2_synset_dict.json deleted file mode 100644 index b8cbae58173e58ea0607e95161e65944979aff23..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/datasets/r2n2/r2n2_synset_dict.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "04256520": "sofa", - "02933112": "cabinet", - "02828884": "bench", - "03001627": "chair", - "03211117": "display", - "04090263": "rifle", - "03691459": "loudspeaker", - "03636649": "lamp", - "04401088": "telephone", - "02691156": "airplane", - "04379243": "table", - "02958343": "car", - "04530566": "watercraft" -} diff --git a/pytorch3d/pytorch3d/datasets/r2n2/utils.py b/pytorch3d/pytorch3d/datasets/r2n2/utils.py deleted file mode 100644 index c7b80537fdf5bc443d90a48277e837d7abeeed62..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/datasets/r2n2/utils.py +++ /dev/null @@ -1,502 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -from typing import Dict, List - -import numpy as np -import torch -from pytorch3d.common.datatypes import Device -from pytorch3d.datasets.utils import collate_batched_meshes -from pytorch3d.ops import cubify -from pytorch3d.renderer import ( - HardPhongShader, - MeshRasterizer, - MeshRenderer, - PointLights, - RasterizationSettings, - TexturesVertex, -) -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.transforms import Transform3d - - -# Empirical min and max over the dataset from meshrcnn. -# https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/coords.py#L9 -SHAPENET_MIN_ZMIN = 0.67 -SHAPENET_MAX_ZMAX = 0.92 -# Threshold for cubify from meshrcnn: -# https://github.com/facebookresearch/meshrcnn/blob/main/configs/shapenet/voxmesh_R50.yaml#L11 -CUBIFY_THRESH = 0.2 - -# Default values of rotation, translation and intrinsic matrices for BlenderCamera. -r = np.expand_dims(np.eye(3), axis=0) # (1, 3, 3) -t = np.expand_dims(np.zeros(3), axis=0) # (1, 3) -k = np.expand_dims(np.eye(4), axis=0) # (1, 4, 4) - - -def collate_batched_R2N2(batch: List[Dict]): # pragma: no cover - """ - Take a list of objects in the form of dictionaries and merge them - into a single dictionary. This function can be used with a Dataset - object to create a torch.utils.data.Dataloader which directly - returns Meshes objects. - TODO: Add support for textures. - - Args: - batch: List of dictionaries containing information about objects - in the dataset. - - Returns: - collated_dict: Dictionary of collated lists. If batch contains both - verts and faces, a collated mesh batch is also returned. - """ - collated_dict = collate_batched_meshes(batch) - - # If collate_batched_meshes receives R2N2 items with images and that - # all models have the same number of views V, stack the batches of - # views of each model into a new batch of shape (N, V, H, W, 3). - # Otherwise leave it as a list. - if "images" in collated_dict: - try: - collated_dict["images"] = torch.stack(collated_dict["images"]) - except RuntimeError: - print( - "Models don't have the same number of views. Now returning " - "lists of images instead of batches." - ) - - # If collate_batched_meshes receives R2N2 items with camera calibration - # matrices and that all models have the same number of views V, stack each - # type of matrices into a new batch of shape (N, V, ...). - # Otherwise leave them as lists. - if all(x in collated_dict for x in ["R", "T", "K"]): - try: - collated_dict["R"] = torch.stack(collated_dict["R"]) # (N, V, 3, 3) - collated_dict["T"] = torch.stack(collated_dict["T"]) # (N, V, 3) - collated_dict["K"] = torch.stack(collated_dict["K"]) # (N, V, 4, 4) - except RuntimeError: - print( - "Models don't have the same number of views. Now returning " - "lists of calibration matrices instead of a batched tensor." - ) - - # If collate_batched_meshes receives voxels and all models have the same - # number of views V, stack the batches of voxels into a new batch of shape - # (N, V, S, S, S), where S is the voxel size. - if "voxels" in collated_dict: - try: - collated_dict["voxels"] = torch.stack(collated_dict["voxels"]) - except RuntimeError: - print( - "Models don't have the same number of views. Now returning " - "lists of voxels instead of a batched tensor." - ) - return collated_dict - - -def compute_extrinsic_matrix( - azimuth: float, elevation: float, distance: float -): # pragma: no cover - """ - Copied from meshrcnn codebase: - https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/coords.py#L96 - - Compute 4x4 extrinsic matrix that converts from homogeneous world coordinates - to homogeneous camera coordinates. We assume that the camera is looking at the - origin. - Used in R2N2 Dataset when computing calibration matrices. - - Args: - azimuth: Rotation about the z-axis, in degrees. - elevation: Rotation above the xy-plane, in degrees. - distance: Distance from the origin. - - Returns: - FloatTensor of shape (4, 4). - """ - azimuth, elevation, distance = float(azimuth), float(elevation), float(distance) - - az_rad = -math.pi * azimuth / 180.0 - el_rad = -math.pi * elevation / 180.0 - sa = math.sin(az_rad) - ca = math.cos(az_rad) - se = math.sin(el_rad) - ce = math.cos(el_rad) - R_world2obj = torch.tensor( - [[ca * ce, sa * ce, -se], [-sa, ca, 0], [ca * se, sa * se, ce]] - ) - R_obj2cam = torch.tensor([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]]) - R_world2cam = R_obj2cam.mm(R_world2obj) - cam_location = torch.tensor([[distance, 0, 0]]).t() - T_world2cam = -(R_obj2cam.mm(cam_location)) - RT = torch.cat([R_world2cam, T_world2cam], dim=1) - RT = torch.cat([RT, torch.tensor([[0.0, 0, 0, 1]])]) - - # Georgia: For some reason I cannot fathom, when Blender loads a .obj file it - # rotates the model 90 degrees about the x axis. To compensate for this quirk we - # roll that rotation into the extrinsic matrix here - rot = torch.tensor([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) - RT = RT.mm(rot.to(RT)) - - return RT - - -def read_binvox_coords( - f, - integer_division: bool = True, - dtype: torch.dtype = torch.float32, -): # pragma: no cover - """ - Copied from meshrcnn codebase: - https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/binvox_torch.py#L5 - - Read a binvox file and return the indices of all nonzero voxels. - - This matches the behavior of binvox_rw.read_as_coord_array - (https://github.com/dimatura/binvox-rw-py/blob/public/binvox_rw.py#L153) - but this implementation uses torch rather than numpy, and is more efficient - due to improved vectorization. - - Georgia: I think that binvox_rw.read_as_coord_array actually has a bug; when converting - linear indices into three-dimensional indices, they use floating-point - division instead of integer division. We can reproduce their incorrect - implementation by passing integer_division=False. - - Args: - f (str): A file pointer to the binvox file to read - integer_division (bool): If False, then match the buggy implementation from binvox_rw - dtype: Datatype of the output tensor. Use float64 to match binvox_rw - - Returns: - coords (tensor): A tensor of shape (N, 3) where N is the number of nonzero voxels, - and coords[i] = (x, y, z) gives the index of the ith nonzero voxel. If the - voxel grid has shape (V, V, V) then we have 0 <= x, y, z < V. - """ - size, translation, scale = _read_binvox_header(f) - storage = torch.ByteStorage.from_buffer(f.read()) - data = torch.tensor([], dtype=torch.uint8) - # pyre-fixme[28]: Unexpected keyword argument `source`. - data.set_(source=storage) - vals, counts = data[::2], data[1::2] - idxs = _compute_idxs(vals, counts) - if not integer_division: - idxs = idxs.to(dtype) - x_idxs = idxs // (size * size) - zy_idxs = idxs % (size * size) - z_idxs = zy_idxs // size - y_idxs = zy_idxs % size - coords = torch.stack([x_idxs, y_idxs, z_idxs], dim=1) - return coords.to(dtype) - - -def _compute_idxs(vals, counts): # pragma: no cover - """ - Copied from meshrcnn codebase: - https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/binvox_torch.py#L58 - - Fast vectorized version of index computation. - - Args: - vals: tensor of binary values indicating voxel presence in a dense format. - counts: tensor of number of occurrence of each value in vals. - - Returns: - idxs: A tensor of shape (N), where N is the number of nonzero voxels. - """ - # Consider an example where: - # vals = [0, 1, 0, 1, 1] - # counts = [2, 3, 3, 2, 1] - # - # These values of counts and vals mean that the dense binary grid is: - # [0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1] - # - # So the nonzero indices we want to return are: - # [2, 3, 4, 8, 9, 10] - - # After the cumsum we will have: - # end_idxs = [2, 5, 8, 10, 11] - end_idxs = counts.cumsum(dim=0) - - # After masking and computing start_idx we have: - # end_idxs = [5, 10, 11] - # counts = [3, 2, 1] - # start_idxs = [2, 8, 10] - mask = vals == 1 - end_idxs = end_idxs[mask] - counts = counts[mask].to(end_idxs) - start_idxs = end_idxs - counts - - # We initialize delta as: - # [2, 1, 1, 1, 1, 1] - delta = torch.ones(counts.sum().item(), dtype=torch.int64) - delta[0] = start_idxs[0] - - # We compute pos = [3, 5], val = [3, 0]; then delta is - # [2, 1, 1, 4, 1, 1] - pos = counts.cumsum(dim=0)[:-1] - val = start_idxs[1:] - end_idxs[:-1] - delta[pos] += val - - # A final cumsum gives the idx we want: [2, 3, 4, 8, 9, 10] - idxs = delta.cumsum(dim=0) - return idxs - - -def _read_binvox_header(f): # pragma: no cover - """ - Copied from meshrcnn codebase: - https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/binvox_torch.py#L99 - - Read binvox header and extract information regarding voxel sizes and translations - to original voxel coordinates. - - Args: - f (str): A file pointer to the binvox file to read. - - Returns: - size (int): size of voxel. - translation (tuple(float)): translation to original voxel coordinates. - scale (float): scale to original voxel coordinates. - """ - # First line of the header should be "#binvox 1" - line = f.readline().strip() - if line != b"#binvox 1": - raise ValueError("Invalid header (line 1)") - - # Second line of the header should be "dim [int] [int] [int]" - # and all three int should be the same - line = f.readline().strip() - if not line.startswith(b"dim "): - raise ValueError("Invalid header (line 2)") - dims = line.split(b" ") - try: - dims = [int(d) for d in dims[1:]] - except ValueError: - raise ValueError("Invalid header (line 2)") from None - if len(dims) != 3 or dims[0] != dims[1] or dims[0] != dims[2]: - raise ValueError("Invalid header (line 2)") - size = dims[0] - - # Third line of the header should be "translate [float] [float] [float]" - line = f.readline().strip() - if not line.startswith(b"translate "): - raise ValueError("Invalid header (line 3)") - translation = line.split(b" ") - if len(translation) != 4: - raise ValueError("Invalid header (line 3)") - try: - translation = tuple(float(t) for t in translation[1:]) - except ValueError: - raise ValueError("Invalid header (line 3)") from None - - # Fourth line of the header should be "scale [float]" - line = f.readline().strip() - if not line.startswith(b"scale "): - raise ValueError("Invalid header (line 4)") - line = line.split(b" ") - if not len(line) == 2: - raise ValueError("Invalid header (line 4)") - scale = float(line[1]) - - # Fifth line of the header should be "data" - line = f.readline().strip() - if not line == b"data": - raise ValueError("Invalid header (line 5)") - - return size, translation, scale - - -def align_bbox(src, tgt): # pragma: no cover - """ - Copied from meshrcnn codebase: - https://github.com/facebookresearch/meshrcnn/blob/main/tools/preprocess_shapenet.py#L263 - - Return a copy of src points in the coordinate system of tgt by applying a - scale and shift along each coordinate axis to make the min / max values align. - - Args: - src, tgt: Torch Tensor of shape (N, 3) - - Returns: - out: Torch Tensor of shape (N, 3) - """ - if src.ndim != 2 or tgt.ndim != 2: - raise ValueError("Both src and tgt need to have dimensions of 2.") - if src.shape[-1] != 3 or tgt.shape[-1] != 3: - raise ValueError( - "Both src and tgt need to have sizes of 3 along the second dimension." - ) - src_min = src.min(dim=0)[0] - src_max = src.max(dim=0)[0] - tgt_min = tgt.min(dim=0)[0] - tgt_max = tgt.max(dim=0)[0] - scale = (tgt_max - tgt_min) / (src_max - src_min) - shift = tgt_min - scale * src_min - out = scale * src + shift - return out - - -def voxelize(voxel_coords, P, V): # pragma: no cover - """ - Copied from meshrcnn codebase: - https://github.com/facebookresearch/meshrcnn/blob/main/tools/preprocess_shapenet.py#L284 - but changing flip y to flip x. - - Creating voxels of shape (D, D, D) from voxel_coords and projection matrix. - - Args: - voxel_coords: FloatTensor of shape (V, 3) giving voxel's coordinates aligned to - the vertices. - P: FloatTensor of shape (4, 4) giving the projection matrix. - V: Voxel size of the output. - - Returns: - voxels: Tensor of shape (D, D, D) giving the voxelized result. - """ - device = voxel_coords.device - voxel_coords = project_verts(voxel_coords, P) - - # Using the actual zmin and zmax of the model is bad because we need them - # to perform the inverse transform, which transform voxels back into world - # space for refinement or evaluation. Instead we use an empirical min and - # max over the dataset; that way it is consistent for all images. - zmin = SHAPENET_MIN_ZMIN - zmax = SHAPENET_MAX_ZMAX - - # Once we know zmin and zmax, we need to adjust the z coordinates so the - # range [zmin, zmax] instead runs from [-1, 1] - m = 2.0 / (zmax - zmin) - b = -2.0 * zmin / (zmax - zmin) - 1 - voxel_coords[:, 2].mul_(m).add_(b) - voxel_coords[:, 0].mul_(-1) # Flip x - - # Now voxels are in [-1, 1]^3; map to [0, V-1)^3 - voxel_coords = 0.5 * (V - 1) * (voxel_coords + 1.0) - voxel_coords = voxel_coords.round().to(torch.int64) - valid = (0 <= voxel_coords) * (voxel_coords < V) - valid = valid[:, 0] * valid[:, 1] * valid[:, 2] - x, y, z = voxel_coords.unbind(dim=1) - x, y, z = x[valid], y[valid], z[valid] - voxels = torch.zeros(V, V, V, dtype=torch.uint8, device=device) - voxels[z, y, x] = 1 - - return voxels - - -def project_verts(verts, P, eps: float = 1e-1): # pragma: no cover - """ - Copied from meshrcnn codebase: - https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/coords.py#L159 - - Project vertices using a 4x4 transformation matrix. - - Args: - verts: FloatTensor of shape (N, V, 3) giving a batch of vertex positions or of - shape (V, 3) giving a single set of vertex positions. - P: FloatTensor of shape (N, 4, 4) giving projection matrices or of shape (4, 4) - giving a single projection matrix. - - Returns: - verts_out: FloatTensor of shape (N, V, 3) giving vertex positions (x, y, z) - where verts_out[i] is the result of transforming verts[i] by P[i]. - """ - # Handle unbatched inputs - singleton = False - if verts.dim() == 2: - assert P.dim() == 2 - singleton = True - verts, P = verts[None], P[None] - - N, V = verts.shape[0], verts.shape[1] - dtype, device = verts.dtype, verts.device - - # Add an extra row of ones to the world-space coordinates of verts before - # multiplying by the projection matrix. We could avoid this allocation by - # instead multiplying by a 4x3 submatrix of the projection matrix, then - # adding the remaining 4x1 vector. Not sure whether there will be much - # performance difference between the two. - ones = torch.ones(N, V, 1, dtype=dtype, device=device) - verts_hom = torch.cat([verts, ones], dim=2) - verts_cam_hom = torch.bmm(verts_hom, P.transpose(1, 2)) - - # Avoid division by zero by clamping the absolute value - w = verts_cam_hom[:, :, 3:] - w_sign = w.sign() - w_sign[w == 0] = 1 - w = w_sign * w.abs().clamp(min=eps) - - verts_proj = verts_cam_hom[:, :, :3] / w - - if singleton: - return verts_proj[0] - return verts_proj - - -class BlenderCamera(CamerasBase): # pragma: no cover - """ - Camera for rendering objects with calibration matrices from the R2N2 dataset - (which uses Blender for rendering the views for each model). - """ - - def __init__(self, R=r, T=t, K=k, device: Device = "cpu") -> None: - """ - Args: - R: Rotation matrix of shape (N, 3, 3). - T: Translation matrix of shape (N, 3). - K: Intrinsic matrix of shape (N, 4, 4). - device: Device (as str or torch.device). - """ - # The initializer formats all inputs to torch tensors and broadcasts - # all the inputs to have the same batch dimension where necessary. - super().__init__(device=device, R=R, T=T, K=K) - - def get_projection_transform(self, **kwargs) -> Transform3d: - transform = Transform3d(device=self.device) - transform._matrix = self.K.transpose(1, 2).contiguous() - return transform - - def is_perspective(self): - return False - - def in_ndc(self): - return True - - -def render_cubified_voxels( - voxels: torch.Tensor, shader_type=HardPhongShader, device: Device = "cpu", **kwargs -): # pragma: no cover - """ - Use the Cubify operator to convert inputs voxels to a mesh and then render that mesh. - - Args: - voxels: FloatTensor of shape (N, D, D, D) where N is the batch size and - D is the number of voxels along each dimension. - shader_type: shader_type: shader_type: Shader to use for rendering. Examples - include HardPhongShader (default), SoftPhongShader etc or any other type - of valid Shader class. - device: Device (as str or torch.device) on which the tensors should be located. - **kwargs: Accepts any of the kwargs that the renderer supports. - Returns: - Batch of rendered images of shape (N, H, W, 3). - """ - cubified_voxels = cubify(voxels, CUBIFY_THRESH).to(device) - cubified_voxels.textures = TexturesVertex( - verts_features=torch.ones_like(cubified_voxels.verts_padded(), device=device) - ) - cameras = BlenderCamera(device=device) - renderer = MeshRenderer( - rasterizer=MeshRasterizer( - cameras=cameras, - raster_settings=kwargs.get("raster_settings", RasterizationSettings()), - ), - shader=shader_type( - device=device, - cameras=cameras, - lights=kwargs.get("lights", PointLights()).to(device), - ), - ) - return renderer(cubified_voxels) diff --git a/pytorch3d/pytorch3d/datasets/shapenet/__init__.py b/pytorch3d/pytorch3d/datasets/shapenet/__init__.py deleted file mode 100644 index 295ec79312e17f92baf722667f5a2d727bc703fa..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/datasets/shapenet/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .shapenet_core import ShapeNetCore - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/datasets/shapenet/shapenet_core.py b/pytorch3d/pytorch3d/datasets/shapenet/shapenet_core.py deleted file mode 100644 index 61908414ea6a8a91e679cabc699f542f50deae62..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/datasets/shapenet/shapenet_core.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import json -import os -import warnings -from os import path -from pathlib import Path -from typing import Dict - -from pytorch3d.datasets.shapenet_base import ShapeNetBase - - -SYNSET_DICT_DIR = Path(__file__).resolve().parent - - -class ShapeNetCore(ShapeNetBase): # pragma: no cover - """ - This class loads ShapeNetCore from a given directory into a Dataset object. - ShapeNetCore is a subset of the ShapeNet dataset and can be downloaded from - https://www.shapenet.org/. - """ - - def __init__( - self, - data_dir, - synsets=None, - version: int = 1, - load_textures: bool = True, - texture_resolution: int = 4, - ) -> None: - """ - Store each object's synset id and models id from data_dir. - - Args: - data_dir: Path to ShapeNetCore data. - synsets: List of synset categories to load from ShapeNetCore in the form of - synset offsets or labels. A combination of both is also accepted. - When no category is specified, all categories in data_dir are loaded. - version: (int) version of ShapeNetCore data in data_dir, 1 or 2. - Default is set to be 1. Version 1 has 57 categories and version 2 has 55 - categories. - Note: version 1 has two categories 02858304(boat) and 02992529(cellphone) - that are hyponyms of categories 04530566(watercraft) and 04401088(telephone) - respectively. You can combine the categories manually if needed. - Version 2 doesn't have 02858304(boat) or 02834778(bicycle) compared to - version 1. - load_textures: Boolean indicating whether textures should loaded for the model. - Textures will be of type TexturesAtlas i.e. a texture map per face. - texture_resolution: Int specifying the resolution of the texture map per face - created using the textures in the obj file. A - (texture_resolution, texture_resolution, 3) map is created per face. - """ - super().__init__() - self.shapenet_dir = data_dir - self.load_textures = load_textures - self.texture_resolution = texture_resolution - - if version not in [1, 2]: - raise ValueError("Version number must be either 1 or 2.") - self.model_dir = "model.obj" if version == 1 else "models/model_normalized.obj" - - # Synset dictionary mapping synset offsets to corresponding labels. - dict_file = "shapenet_synset_dict_v%d.json" % version - with open(path.join(SYNSET_DICT_DIR, dict_file), "r") as read_dict: - self.synset_dict = json.load(read_dict) - # Inverse dictionary mapping synset labels to corresponding offsets. - self.synset_inv = {label: offset for offset, label in self.synset_dict.items()} - - # If categories are specified, check if each category is in the form of either - # synset offset or synset label, and if the category exists in the given directory. - if synsets is not None: - # Set of categories to load in the form of synset offsets. - synset_set = set() - for synset in synsets: - if (synset in self.synset_dict.keys()) and ( - path.isdir(path.join(data_dir, synset)) - ): - synset_set.add(synset) - elif (synset in self.synset_inv.keys()) and ( - (path.isdir(path.join(data_dir, self.synset_inv[synset]))) - ): - synset_set.add(self.synset_inv[synset]) - else: - msg = ( - "Synset category %s either not part of ShapeNetCore dataset " - "or cannot be found in %s." - ) % (synset, data_dir) - warnings.warn(msg) - # If no category is given, load every category in the given directory. - # Ignore synset folders not included in the official mapping. - else: - synset_set = { - synset - for synset in os.listdir(data_dir) - if path.isdir(path.join(data_dir, synset)) - and synset in self.synset_dict - } - - # Check if there are any categories in the official mapping that are not loaded. - # Update self.synset_inv so that it only includes the loaded categories. - synset_not_present = set(self.synset_dict.keys()).difference(synset_set) - [self.synset_inv.pop(self.synset_dict[synset]) for synset in synset_not_present] - - if len(synset_not_present) > 0: - msg = ( - "The following categories are included in ShapeNetCore ver.%d's " - "official mapping but not found in the dataset location %s: %s" - "" - ) % (version, data_dir, ", ".join(synset_not_present)) - warnings.warn(msg) - - # Extract model_id of each object from directory names. - # Each grandchildren directory of data_dir contains an object, and the name - # of the directory is the object's model_id. - for synset in synset_set: - self.synset_start_idxs[synset] = len(self.synset_ids) - for model in os.listdir(path.join(data_dir, synset)): - if not path.exists(path.join(data_dir, synset, model, self.model_dir)): - msg = ( - "Object file not found in the model directory %s " - "under synset directory %s." - ) % (model, synset) - warnings.warn(msg) - continue - self.synset_ids.append(synset) - self.model_ids.append(model) - model_count = len(self.synset_ids) - self.synset_start_idxs[synset] - self.synset_num_models[synset] = model_count - - def __getitem__(self, idx: int) -> Dict: - """ - Read a model by the given index. - - Args: - idx: The idx of the model to be retrieved in the dataset. - - Returns: - dictionary with following keys: - - verts: FloatTensor of shape (V, 3). - - faces: LongTensor of shape (F, 3) which indexes into the verts tensor. - - synset_id (str): synset id - - model_id (str): model id - - label (str): synset label. - """ - model = self._get_item_ids(idx) - model_path = path.join( - self.shapenet_dir, model["synset_id"], model["model_id"], self.model_dir - ) - verts, faces, textures = self._load_mesh(model_path) - model["verts"] = verts - model["faces"] = faces - model["textures"] = textures - model["label"] = self.synset_dict[model["synset_id"]] - return model diff --git a/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v1.json b/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v1.json deleted file mode 100644 index b2fc62ae62107a81e078ec02432fb554ae8f1b41..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v1.json +++ /dev/null @@ -1,59 +0,0 @@ -{ - "04379243": "table", - "02958343": "car", - "03001627": "chair", - "02691156": "airplane", - "04256520": "sofa", - "04090263": "rifle", - "03636649": "lamp", - "04530566": "watercraft", - "02828884": "bench", - "03691459": "loudspeaker", - "02933112": "cabinet", - "03211117": "display", - "04401088": "telephone", - "02924116": "bus", - "02808440": "bathtub", - "03467517": "guitar", - "03325088": "faucet", - "03046257": "clock", - "03991062": "flowerpot", - "03593526": "jar", - "02876657": "bottle", - "02871439": "bookshelf", - "03642806": "laptop", - "03624134": "knife", - "04468005": "train", - "02747177": "trash bin", - "03790512": "motorbike", - "03948459": "pistol", - "03337140": "file cabinet", - "02818832": "bed", - "03928116": "piano", - "04330267": "stove", - "03797390": "mug", - "02880940": "bowl", - "04554684": "washer", - "04004475": "printer", - "03513137": "helmet", - "03761084": "microwaves", - "04225987": "skateboard", - "04460130": "tower", - "02942699": "camera", - "02801938": "basket", - "02946921": "can", - "03938244": "pillow", - "03710193": "mailbox", - "03207941": "dishwasher", - "04099429": "rocket", - "02773838": "bag", - "02843684": "birdhouse", - "03261776": "earphone", - "03759954": "microphone", - "04074963": "remote", - "03085013": "keyboard", - "02834778": "bicycle", - "02954340": "cap", - "02858304": "boat", - "02992529": "mobile phone" -} diff --git a/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v2.json b/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v2.json deleted file mode 100644 index f0107c93c3535e2454070be1dcb622ac66899c90..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/datasets/shapenet/shapenet_synset_dict_v2.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "02691156": "airplane", - "02747177": "trash bin", - "02773838": "bag", - "02801938": "basket", - "02808440": "bathtub", - "02818832": "bed", - "02828884": "bench", - "02843684": "birdhouse", - "02871439": "bookshelf", - "02876657": "bottle", - "02880940": "bowl", - "02924116": "bus", - "02933112": "cabinet", - "02942699": "camera", - "02946921": "can", - "02954340": "cap", - "02958343": "car", - "02992529": "cellphone", - "03001627": "chair", - "03046257": "clock", - "03085013": "keyboard", - "03207941": "dishwasher", - "03211117": "display", - "03261776": "earphone", - "03325088": "faucet", - "03337140": "file cabinet", - "03467517": "guitar", - "03513137": "helmet", - "03593526": "jar", - "03624134": "knife", - "03636649": "lamp", - "03642806": "laptop", - "03691459": "loudspeaker", - "03710193": "mailbox", - "03759954": "microphone", - "03761084": "microwaves", - "03790512": "motorbike", - "03797390": "mug", - "03928116": "piano", - "03938244": "pillow", - "03948459": "pistol", - "03991062": "flowerpot", - "04004475": "printer", - "04074963": "remote", - "04090263": "rifle", - "04099429": "rocket", - "04225987": "skateboard", - "04256520": "sofa", - "04330267": "stove", - "04379243": "table", - "04401088": "telephone", - "04460130": "tower", - "04468005": "train", - "04530566": "watercraft", - "04554684": "washer" -} diff --git a/pytorch3d/pytorch3d/datasets/shapenet_base.py b/pytorch3d/pytorch3d/datasets/shapenet_base.py deleted file mode 100644 index 7160ca6cbec680faeffd5de65b077b3c13f13057..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/datasets/shapenet_base.py +++ /dev/null @@ -1,289 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import warnings -from typing import Dict, List, Optional, Tuple - -import torch -from pytorch3d.common.datatypes import Device -from pytorch3d.io import load_obj -from pytorch3d.renderer import ( - FoVPerspectiveCameras, - HardPhongShader, - MeshRasterizer, - MeshRenderer, - PointLights, - RasterizationSettings, - TexturesVertex, -) - -from .utils import collate_batched_meshes - - -class ShapeNetBase(torch.utils.data.Dataset): # pragma: no cover - """ - 'ShapeNetBase' implements a base Dataset for ShapeNet and R2N2 with helper methods. - It is not intended to be used on its own as a Dataset for a Dataloader. Both __init__ - and __getitem__ need to be implemented. - """ - - def __init__(self) -> None: - """ - Set up lists of synset_ids and model_ids. - """ - self.synset_ids = [] - self.model_ids = [] - self.synset_inv = {} - self.synset_start_idxs = {} - self.synset_num_models = {} - self.shapenet_dir = "" - self.model_dir = "model.obj" - self.load_textures = True - self.texture_resolution = 4 - - def __len__(self) -> int: - """ - Return number of total models in the loaded dataset. - """ - return len(self.model_ids) - - def __getitem__(self, idx) -> Dict: - """ - Read a model by the given index. Need to be implemented for every child class - of ShapeNetBase. - - Args: - idx: The idx of the model to be retrieved in the dataset. - - Returns: - dictionary containing information about the model. - """ - raise NotImplementedError( - "__getitem__ should be implemented in the child class of ShapeNetBase" - ) - - def _get_item_ids(self, idx) -> Dict: - """ - Read a model by the given index. - - Args: - idx: The idx of the model to be retrieved in the dataset. - - Returns: - dictionary with following keys: - - synset_id (str): synset id - - model_id (str): model id - """ - model = {} - model["synset_id"] = self.synset_ids[idx] - model["model_id"] = self.model_ids[idx] - return model - - def _load_mesh(self, model_path) -> Tuple: - verts, faces, aux = load_obj( - model_path, - create_texture_atlas=self.load_textures, - load_textures=self.load_textures, - texture_atlas_size=self.texture_resolution, - ) - if self.load_textures: - textures = aux.texture_atlas - # Some meshes don't have textures. In this case - # create a white texture map - if textures is None: - textures = verts.new_ones( - faces.verts_idx.shape[0], - self.texture_resolution, - self.texture_resolution, - 3, - ) - else: - textures = None - - return verts, faces.verts_idx, textures - - def render( - self, - model_ids: Optional[List[str]] = None, - categories: Optional[List[str]] = None, - sample_nums: Optional[List[int]] = None, - idxs: Optional[List[int]] = None, - shader_type=HardPhongShader, - device: Device = "cpu", - **kwargs, - ) -> torch.Tensor: - """ - If a list of model_ids are supplied, render all the objects by the given model_ids. - If no model_ids are supplied, but categories and sample_nums are specified, randomly - select a number of objects (number specified in sample_nums) in the given categories - and render these objects. If instead a list of idxs is specified, check if the idxs - are all valid and render models by the given idxs. Otherwise, randomly select a number - (first number in sample_nums, default is set to be 1) of models from the loaded dataset - and render these models. - - Args: - model_ids: List[str] of model_ids of models intended to be rendered. - categories: List[str] of categories intended to be rendered. categories - and sample_nums must be specified at the same time. categories can be given - in the form of synset offsets or labels, or a combination of both. - sample_nums: List[int] of number of models to be randomly sampled from - each category. Could also contain one single integer, in which case it - will be broadcasted for every category. - idxs: List[int] of indices of models to be rendered in the dataset. - shader_type: Select shading. Valid options include HardPhongShader (default), - SoftPhongShader, HardGouraudShader, SoftGouraudShader, HardFlatShader, - SoftSilhouetteShader. - device: Device (as str or torch.device) on which the tensors should be located. - **kwargs: Accepts any of the kwargs that the renderer supports. - - Returns: - Batch of rendered images of shape (N, H, W, 3). - """ - idxs = self._handle_render_inputs(model_ids, categories, sample_nums, idxs) - # Use the getitem method which loads mesh + texture - models = [self[idx] for idx in idxs] - meshes = collate_batched_meshes(models)["mesh"] - if meshes.textures is None: - meshes.textures = TexturesVertex( - verts_features=torch.ones_like(meshes.verts_padded(), device=device) - ) - - meshes = meshes.to(device) - cameras = kwargs.get("cameras", FoVPerspectiveCameras()).to(device) - if len(cameras) != 1 and len(cameras) % len(meshes) != 0: - raise ValueError("Mismatch between batch dims of cameras and meshes.") - if len(cameras) > 1: - # When rendering R2N2 models, if more than one views are provided, broadcast - # the meshes so that each mesh can be rendered for each of the views. - meshes = meshes.extend(len(cameras) // len(meshes)) - renderer = MeshRenderer( - rasterizer=MeshRasterizer( - cameras=cameras, - raster_settings=kwargs.get("raster_settings", RasterizationSettings()), - ), - shader=shader_type( - device=device, - cameras=cameras, - lights=kwargs.get("lights", PointLights()).to(device), - ), - ) - return renderer(meshes) - - def _handle_render_inputs( - self, - model_ids: Optional[List[str]] = None, - categories: Optional[List[str]] = None, - sample_nums: Optional[List[int]] = None, - idxs: Optional[List[int]] = None, - ) -> List[int]: - """ - Helper function for converting user provided model_ids, categories and sample_nums - to indices of models in the loaded dataset. If model idxs are provided, we check if - the idxs are valid. If no models are specified, the first model in the loaded dataset - is chosen. The function returns the file paths to the selected models. - - Args: - model_ids: List[str] of model_ids of models to be rendered. - categories: List[str] of categories to be rendered. - sample_nums: List[int] of number of models to be randomly sampled from - each category. - idxs: List[int] of indices of models to be rendered in the dataset. - - Returns: - List of paths of models to be rendered. - """ - # Get corresponding indices if model_ids are supplied. - if model_ids is not None and len(model_ids) > 0: - idxs = [] - for model_id in model_ids: - if model_id not in self.model_ids: - raise ValueError( - "model_id %s not found in the loaded dataset." % model_id - ) - idxs.append(self.model_ids.index(model_id)) - - # Sample random models if categories and sample_nums are supplied and get - # the corresponding indices. - elif categories is not None and len(categories) > 0: - sample_nums = [1] if sample_nums is None else sample_nums - if len(categories) != len(sample_nums) and len(sample_nums) != 1: - raise ValueError( - "categories and sample_nums needs to be of the same length or " - "sample_nums needs to be of length 1." - ) - - idxs_tensor = torch.empty(0, dtype=torch.int32) - for i in range(len(categories)): - category = self.synset_inv.get(categories[i], categories[i]) - if category not in self.synset_inv.values(): - raise ValueError( - "Category %s is not in the loaded dataset." % category - ) - # Broadcast if sample_nums has length of 1. - sample_num = sample_nums[i] if len(sample_nums) > 1 else sample_nums[0] - sampled_idxs = self._sample_idxs_from_category( - sample_num=sample_num, category=category - ) - # pyre-fixme[6]: For 1st param expected `Union[List[Tensor], - # typing.Tuple[Tensor, ...]]` but got `Tuple[Tensor, List[int]]`. - idxs_tensor = torch.cat((idxs_tensor, sampled_idxs)) - idxs = idxs_tensor.tolist() - # Check if the indices are valid if idxs are supplied. - elif idxs is not None and len(idxs) > 0: - if any(idx < 0 or idx >= len(self.model_ids) for idx in idxs): - raise IndexError( - "One or more idx values are out of bounds. Indices need to be" - "between 0 and %s." % (len(self.model_ids) - 1) - ) - # Check if sample_nums is specified, if so sample sample_nums[0] number - # of indices from the entire loaded dataset. Otherwise randomly select one - # index from the dataset. - else: - sample_nums = [1] if sample_nums is None else sample_nums - if len(sample_nums) > 1: - msg = ( - "More than one sample sizes specified, now sampling " - "%d models from the dataset." % sample_nums[0] - ) - warnings.warn(msg) - idxs = self._sample_idxs_from_category(sample_nums[0]) - return idxs - - def _sample_idxs_from_category( - self, sample_num: int = 1, category: Optional[str] = None - ) -> List[int]: - """ - Helper function for sampling a number of indices from the given category. - - Args: - sample_num: number of indices to be sampled from the given category. - category: category synset of the category to be sampled from. If not - specified, sample from all models in the loaded dataset. - """ - start = self.synset_start_idxs[category] if category is not None else 0 - range_len = ( - self.synset_num_models[category] if category is not None else self.__len__() - ) - replacement = sample_num > range_len - sampled_idxs = ( - torch.multinomial( - torch.ones((range_len), dtype=torch.float32), - sample_num, - replacement=replacement, - ) - + start - ) - if replacement: - msg = ( - "Sample size %d is larger than the number of objects in %s, " - "values sampled with replacement." - ) % ( - sample_num, - "category " + category if category is not None else "all categories", - ) - warnings.warn(msg) - # pyre-fixme[7]: Expected `List[int]` but got `Tensor`. - return sampled_idxs diff --git a/pytorch3d/pytorch3d/datasets/utils.py b/pytorch3d/pytorch3d/datasets/utils.py deleted file mode 100644 index 88aafac1df8de57992a6fee5c8ab8f10a33639d8..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/datasets/utils.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Dict, List - -from pytorch3d.renderer.mesh import TexturesAtlas -from pytorch3d.structures import Meshes - - -def collate_batched_meshes(batch: List[Dict]): # pragma: no cover - """ - Take a list of objects in the form of dictionaries and merge them - into a single dictionary. This function can be used with a Dataset - object to create a torch.utils.data.Dataloader which directly - returns Meshes objects. - TODO: Add support for textures. - - Args: - batch: List of dictionaries containing information about objects - in the dataset. - - Returns: - collated_dict: Dictionary of collated lists. If batch contains both - verts and faces, a collated mesh batch is also returned. - """ - if batch is None or len(batch) == 0: - return None - collated_dict = {} - for k in batch[0].keys(): - collated_dict[k] = [d[k] for d in batch] - - collated_dict["mesh"] = None - if {"verts", "faces"}.issubset(collated_dict.keys()): - - textures = None - if "textures" in collated_dict: - textures = TexturesAtlas(atlas=collated_dict["textures"]) - - collated_dict["mesh"] = Meshes( - verts=collated_dict["verts"], - faces=collated_dict["faces"], - textures=textures, - ) - - return collated_dict diff --git a/pytorch3d/pytorch3d/implicitron/__init__.py b/pytorch3d/pytorch3d/implicitron/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/pytorch3d/implicitron/dataset/__init__.py b/pytorch3d/pytorch3d/implicitron/dataset/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/pytorch3d/implicitron/dataset/blender_dataset_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/blender_dataset_map_provider.py deleted file mode 100644 index 2eab25602659b802bdb2e411b2a4cf63b9542591..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/blender_dataset_map_provider.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import torch -from pytorch3d.implicitron.tools.config import registry - -from .load_blender import load_blender_data -from .single_sequence_dataset import ( - _interpret_blender_cameras, - SingleSceneDatasetMapProviderBase, -) - - -@registry.register -class BlenderDatasetMapProvider(SingleSceneDatasetMapProviderBase): - """ - Provides data for one scene from Blender synthetic dataset. - Uses the code in load_blender.py - - Members: - base_dir: directory holding the data for the scene. - object_name: The name of the scene (e.g. "lego"). This is just used as a label. - It will typically be equal to the name of the directory self.base_dir. - path_manager_factory: Creates path manager which may be used for - interpreting paths. - n_known_frames_for_test: If set, training frames are included in the val - and test datasets, and this many random training frames are added to - each test batch. If not set, test batches each contain just a single - testing frame. - """ - - def _load_data(self) -> None: - path_manager = self.path_manager_factory.get() - images, poses, _, hwf, i_split = load_blender_data( - self.base_dir, - testskip=1, - path_manager=path_manager, - ) - H, W, focal = hwf - images_masks = torch.from_numpy(images).permute(0, 3, 1, 2) - - # pyre-ignore[16] - self.poses = _interpret_blender_cameras(poses, focal) - # pyre-ignore[16] - self.images = images_masks[:, :3] - # pyre-ignore[16] - self.fg_probabilities = images_masks[:, 3:4] - # pyre-ignore[16] - self.i_split = i_split diff --git a/pytorch3d/pytorch3d/implicitron/dataset/data_loader_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/data_loader_map_provider.py deleted file mode 100644 index 6c0436adf5b9551d735a46bf6f7c52d7d0660cab..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/data_loader_map_provider.py +++ /dev/null @@ -1,524 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from dataclasses import dataclass -from enum import Enum -from typing import Iterator, List, Optional, Tuple - -import torch -from pytorch3d.implicitron.tools.config import registry, ReplaceableBase -from torch.utils.data import ( - BatchSampler, - ConcatDataset, - DataLoader, - RandomSampler, - Sampler, -) - -from .dataset_base import DatasetBase -from .dataset_map_provider import DatasetMap -from .frame_data import FrameData -from .scene_batch_sampler import SceneBatchSampler -from .utils import is_known_frame_scalar - - -@dataclass -class DataLoaderMap: - """ - A collection of data loaders for Implicitron. - - Members: - - train: a data loader for training - val: a data loader for validating during training - test: a data loader for final evaluation - """ - - train: Optional[DataLoader[FrameData]] - val: Optional[DataLoader[FrameData]] - test: Optional[DataLoader[FrameData]] - - def __getitem__(self, split: str) -> Optional[DataLoader[FrameData]]: - """ - Get one of the data loaders by key (name of data split) - """ - if split not in ["train", "val", "test"]: - raise ValueError(f"{split} was not a valid split name (train/val/test)") - return getattr(self, split) - - -class DataLoaderMapProviderBase(ReplaceableBase): - """ - Provider of a collection of data loaders for a given collection of datasets. - """ - - def get_data_loader_map(self, datasets: DatasetMap) -> DataLoaderMap: - """ - Returns a collection of data loaders for a given collection of datasets. - """ - raise NotImplementedError() - - -@registry.register -class SimpleDataLoaderMapProvider(DataLoaderMapProviderBase): - """ - Trivial implementation of DataLoaderMapProviderBase. - - If a dataset returns batches from get_eval_batches(), then - they will be what the corresponding dataloader returns, - independently of any of the fields on this class. - - Otherwise, returns shuffled batches. - """ - - batch_size: int = 1 - num_workers: int = 0 - dataset_length_train: int = 0 - dataset_length_val: int = 0 - dataset_length_test: int = 0 - - def get_data_loader_map(self, datasets: DatasetMap) -> DataLoaderMap: - """ - Returns a collection of data loaders for a given collection of datasets. - """ - return DataLoaderMap( - train=self._make_data_loader( - datasets.train, - self.dataset_length_train, - ), - val=self._make_data_loader( - datasets.val, - self.dataset_length_val, - ), - test=self._make_data_loader( - datasets.test, - self.dataset_length_test, - ), - ) - - def _make_data_loader( - self, - dataset: Optional[DatasetBase], - num_batches: int, - ) -> Optional[DataLoader[FrameData]]: - """ - Returns the dataloader for a dataset. - - Args: - dataset: the dataset - num_batches: possible ceiling on number of batches per epoch - """ - if dataset is None: - return None - - data_loader_kwargs = { - "num_workers": self.num_workers, - "collate_fn": dataset.frame_data_type.collate, - } - - eval_batches = dataset.get_eval_batches() - if eval_batches is not None: - return DataLoader( - dataset, - batch_sampler=eval_batches, - **data_loader_kwargs, - ) - - if num_batches > 0: - num_samples = self.batch_size * num_batches - else: - num_samples = None - - # sample with replacement only if a custom number of samples is specified - sampler = RandomSampler( - dataset, - replacement=num_samples is not None, - num_samples=num_samples, - ) - - batch_sampler = BatchSampler(sampler, self.batch_size, drop_last=True) - return DataLoader( - dataset, - batch_sampler=batch_sampler, - **data_loader_kwargs, - ) - - -class DoublePoolBatchSampler(Sampler[List[int]]): - """ - Batch sampler for making random batches of a single frame - from one list and a number of known frames from another list. - """ - - def __init__( - self, - first_indices: List[int], - rest_indices: List[int], - batch_size: int, - replacement: bool, - num_batches: Optional[int] = None, - ) -> None: - """ - Args: - first_indices: indexes of dataset items to use as the first element - of each batch. - rest_indices: indexes of dataset items to use as the subsequent - elements of each batch. Not used if batch_size==1. - batch_size: The common size of any batch. - replacement: Whether the sampling of first items is with replacement. - num_batches: The number of batches in an epoch. If 0 or None, - one epoch is the length of `first_indices`. - """ - self.first_indices = first_indices - self.rest_indices = rest_indices - self.batch_size = batch_size - self.replacement = replacement - self.num_batches = None if num_batches == 0 else num_batches - - if batch_size - 1 > len(rest_indices): - raise ValueError( - f"Cannot make up ({batch_size})-batches from {len(self.rest_indices)}" - ) - - # copied from RandomSampler - seed = int(torch.empty((), dtype=torch.int64).random_().item()) - self.generator = torch.Generator() - self.generator.manual_seed(seed) - - def __len__(self) -> int: - if self.num_batches is not None: - return self.num_batches - return len(self.first_indices) - - def __iter__(self) -> Iterator[List[int]]: - num_batches = self.num_batches - if self.replacement: - i_first = torch.randint( - len(self.first_indices), - size=(len(self),), - generator=self.generator, - ) - elif num_batches is not None: - n_copies = 1 + (num_batches - 1) // len(self.first_indices) - raw_indices = [ - torch.randperm(len(self.first_indices), generator=self.generator) - for _ in range(n_copies) - ] - i_first = torch.cat(raw_indices)[:num_batches] - else: - i_first = torch.randperm(len(self.first_indices), generator=self.generator) - first_indices = [self.first_indices[i] for i in i_first] - - if self.batch_size == 1: - for first_index in first_indices: - yield [first_index] - return - - for first_index in first_indices: - # Consider using this class in a program which sets the seed. This use - # of randperm means that rerunning with a higher batch_size - # results in batches whose first elements as the first run. - i_rest = torch.randperm( - len(self.rest_indices), - generator=self.generator, - )[: self.batch_size - 1] - yield [first_index] + [self.rest_indices[i] for i in i_rest] - - -class BatchConditioningType(Enum): - """ - Ways to add conditioning frames for the val and test batches. - - SAME: Use the corresponding dataset for all elements of val batches - without regard to frame type. - TRAIN: Use the corresponding dataset for the first element of each - batch, and the training dataset for the extra conditioning - elements. No regard to frame type. - KNOWN: Use frames from the corresponding dataset but separate them - according to their frame_type. Each batch will contain one UNSEEN - frame followed by many KNOWN frames. - """ - - SAME = "same" - TRAIN = "train" - KNOWN = "known" - - -@registry.register -class SequenceDataLoaderMapProvider(DataLoaderMapProviderBase): - """ - Default implementation of DataLoaderMapProviderBase. - - If a dataset returns batches from get_eval_batches(), then - they will be what the corresponding dataloader returns, - independently of any of the fields on this class. - - If conditioning is not required, then the batch size should - be set as 1, and most of the fields do not matter. - - If conditioning is required, each batch will contain one main - frame first to predict and the, rest of the elements are for - conditioning. - - If images_per_seq_options is left empty, the conditioning - frames are picked according to the conditioning type given. - This does not have regard to the order of frames in a - scene, or which frames belong to what scene. - - If images_per_seq_options is given, then the conditioning types - must be SAME and the remaining fields are used. - - Members: - batch_size: The size of the batch of the data loader. - num_workers: Number of data-loading threads in each data loader. - dataset_length_train: The number of batches in a training epoch. Or 0 to mean - an epoch is the length of the training set. - dataset_length_val: The number of batches in a validation epoch. Or 0 to mean - an epoch is the length of the validation set. - dataset_length_test: The number of batches in a testing epoch. Or 0 to mean - an epoch is the length of the test set. - train_conditioning_type: Whether the train data loader should use - only known frames for conditioning. - Only used if batch_size>1 and train dataset is - present and does not return eval_batches. - val_conditioning_type: Whether the val data loader should use - training frames or known frames for conditioning. - Only used if batch_size>1 and val dataset is - present and does not return eval_batches. - test_conditioning_type: Whether the test data loader should use - training frames or known frames for conditioning. - Only used if batch_size>1 and test dataset is - present and does not return eval_batches. - images_per_seq_options: Possible numbers of frames sampled per sequence in a batch. - If a conditioning_type is KNOWN or TRAIN, then this must be left at its initial - value. Empty (the default) means that we are not careful about which frames - come from which scene. - sample_consecutive_frames: if True, will sample a contiguous interval of frames - in the sequence. It first sorts the frames by timestimps when available, - otherwise by frame numbers, finds the connected segments within the sequence - of sufficient length, then samples a random pivot element among them and - ideally uses it as a middle of the temporal window, shifting the borders - where necessary. This strategy mitigates the bias against shorter segments - and their boundaries. - consecutive_frames_max_gap: if a number > 0, then used to define the maximum - difference in frame_number of neighbouring frames when forming connected - segments; if both this and consecutive_frames_max_gap_seconds are 0s, - the whole sequence is considered a segment regardless of frame numbers. - consecutive_frames_max_gap_seconds: if a number > 0.0, then used to define the - maximum difference in frame_timestamp of neighbouring frames when forming - connected segments; if both this and consecutive_frames_max_gap are 0s, - the whole sequence is considered a segment regardless of frame timestamps. - """ - - batch_size: int = 1 - num_workers: int = 0 - dataset_length_train: int = 0 - dataset_length_val: int = 0 - dataset_length_test: int = 0 - train_conditioning_type: BatchConditioningType = BatchConditioningType.SAME - val_conditioning_type: BatchConditioningType = BatchConditioningType.SAME - test_conditioning_type: BatchConditioningType = BatchConditioningType.KNOWN - images_per_seq_options: Tuple[int, ...] = () - sample_consecutive_frames: bool = False - consecutive_frames_max_gap: int = 0 - consecutive_frames_max_gap_seconds: float = 0.1 - - def get_data_loader_map(self, datasets: DatasetMap) -> DataLoaderMap: - """ - Returns a collection of data loaders for a given collection of datasets. - """ - return DataLoaderMap( - train=self._make_data_loader( - datasets.train, - self.dataset_length_train, - datasets.train, - self.train_conditioning_type, - ), - val=self._make_data_loader( - datasets.val, - self.dataset_length_val, - datasets.train, - self.val_conditioning_type, - ), - test=self._make_data_loader( - datasets.test, - self.dataset_length_test, - datasets.train, - self.test_conditioning_type, - ), - ) - - def _make_data_loader( - self, - dataset: Optional[DatasetBase], - num_batches: int, - train_dataset: Optional[DatasetBase], - conditioning_type: BatchConditioningType, - ) -> Optional[DataLoader[FrameData]]: - """ - Returns the dataloader for a dataset. - - Args: - dataset: the dataset - num_batches: possible ceiling on number of batches per epoch - train_dataset: the training dataset, used if conditioning_type==TRAIN - conditioning_type: source for padding of batches - """ - if dataset is None: - return None - - data_loader_kwargs = { - "num_workers": self.num_workers, - "collate_fn": dataset.frame_data_type.collate, - } - - eval_batches = dataset.get_eval_batches() - if eval_batches is not None: - return DataLoader( - dataset, - batch_sampler=eval_batches, - **data_loader_kwargs, - ) - - scenes_matter = len(self.images_per_seq_options) > 0 - if scenes_matter and conditioning_type != BatchConditioningType.SAME: - raise ValueError( - f"{conditioning_type} cannot be used with images_per_seq " - + str(self.images_per_seq_options) - ) - - if self.batch_size == 1 or ( - not scenes_matter and conditioning_type == BatchConditioningType.SAME - ): - return self._simple_loader(dataset, num_batches, data_loader_kwargs) - - if scenes_matter: - assert conditioning_type == BatchConditioningType.SAME - batch_sampler = SceneBatchSampler( - dataset, - self.batch_size, - num_batches=len(dataset) if num_batches <= 0 else num_batches, - images_per_seq_options=self.images_per_seq_options, - sample_consecutive_frames=self.sample_consecutive_frames, - consecutive_frames_max_gap=self.consecutive_frames_max_gap, - consecutive_frames_max_gap_seconds=self.consecutive_frames_max_gap_seconds, - ) - return DataLoader( - dataset, - batch_sampler=batch_sampler, - **data_loader_kwargs, - ) - - if conditioning_type == BatchConditioningType.TRAIN: - return self._train_loader( - dataset, train_dataset, num_batches, data_loader_kwargs - ) - - assert conditioning_type == BatchConditioningType.KNOWN - return self._known_loader(dataset, num_batches, data_loader_kwargs) - - def _simple_loader( - self, - dataset: DatasetBase, - num_batches: int, - data_loader_kwargs: dict, - ) -> DataLoader[FrameData]: - """ - Return a simple loader for frames in the dataset. - - This is equivalent to - Dataloader(dataset, batch_size=self.batch_size, **data_loader_kwargs) - except that num_batches is fixed. - - Args: - dataset: the dataset - num_batches: possible ceiling on number of batches per epoch - data_loader_kwargs: common args for dataloader - """ - if num_batches > 0: - num_samples = self.batch_size * num_batches - replacement = True - else: - num_samples = None - replacement = False - sampler = RandomSampler( - dataset, replacement=replacement, num_samples=num_samples - ) - batch_sampler = BatchSampler(sampler, self.batch_size, drop_last=True) - return DataLoader( - dataset, - batch_sampler=batch_sampler, - **data_loader_kwargs, - ) - - def _train_loader( - self, - dataset: DatasetBase, - train_dataset: Optional[DatasetBase], - num_batches: int, - data_loader_kwargs: dict, - ) -> DataLoader[FrameData]: - """ - Return the loader for TRAIN conditioning. - - Args: - dataset: the dataset - train_dataset: the training dataset - num_batches: possible ceiling on number of batches per epoch - data_loader_kwargs: common args for dataloader - """ - if train_dataset is None: - raise ValueError("No training data for conditioning.") - length = len(dataset) - first_indices = list(range(length)) - rest_indices = list(range(length, length + len(train_dataset))) - sampler = DoublePoolBatchSampler( - first_indices=first_indices, - rest_indices=rest_indices, - batch_size=self.batch_size, - replacement=True, - num_batches=num_batches, - ) - return DataLoader( - ConcatDataset([dataset, train_dataset]), - batch_sampler=sampler, - **data_loader_kwargs, - ) - - def _known_loader( - self, - dataset: DatasetBase, - num_batches: int, - data_loader_kwargs: dict, - ) -> DataLoader[FrameData]: - """ - Return the loader for KNOWN conditioning. - - Args: - dataset: the dataset - num_batches: possible ceiling on number of batches per epoch - data_loader_kwargs: common args for dataloader - """ - first_indices, rest_indices = [], [] - for idx in range(len(dataset)): - frame_type = dataset[idx].frame_type - assert isinstance(frame_type, str) - if is_known_frame_scalar(frame_type): - rest_indices.append(idx) - else: - first_indices.append(idx) - sampler = DoublePoolBatchSampler( - first_indices=first_indices, - rest_indices=rest_indices, - batch_size=self.batch_size, - replacement=True, - num_batches=num_batches, - ) - return DataLoader( - dataset, - batch_sampler=sampler, - **data_loader_kwargs, - ) diff --git a/pytorch3d/pytorch3d/implicitron/dataset/data_source.py b/pytorch3d/pytorch3d/implicitron/dataset/data_source.py deleted file mode 100644 index a7989ac900daff4a8eb9f7c724829df0e0d4dc6b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/data_source.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Optional, Tuple - -from pytorch3d.implicitron.tools.config import ( - registry, - ReplaceableBase, - run_auto_creation, -) -from pytorch3d.renderer.cameras import CamerasBase - -from .data_loader_map_provider import DataLoaderMap, DataLoaderMapProviderBase -from .dataset_map_provider import DatasetMap, DatasetMapProviderBase - - -class DataSourceBase(ReplaceableBase): - """ - Base class for a data source in Implicitron. It encapsulates Dataset - and DataLoader configuration. - """ - - def get_datasets_and_dataloaders(self) -> Tuple[DatasetMap, DataLoaderMap]: - raise NotImplementedError() - - @property - def all_train_cameras(self) -> Optional[CamerasBase]: - """ - DEPRECATED! The property will be removed in future versions. - If the data is all for a single scene, a list - of the known training cameras for that scene, which is - used for evaluating the viewpoint difficulty of the - unseen cameras. - """ - raise NotImplementedError() - - -@registry.register -class ImplicitronDataSource(DataSourceBase): # pyre-ignore[13] - """ - Represents the data used in Implicitron. This is the only implementation - of DataSourceBase provided. - - Members: - dataset_map_provider_class_type: identifies type for dataset_map_provider. - e.g. JsonIndexDatasetMapProvider for Co3D. - data_loader_map_provider_class_type: identifies type for data_loader_map_provider. - """ - - dataset_map_provider: DatasetMapProviderBase - dataset_map_provider_class_type: str - data_loader_map_provider: DataLoaderMapProviderBase - data_loader_map_provider_class_type: str = "SequenceDataLoaderMapProvider" - - @classmethod - def pre_expand(cls) -> None: - # use try/finally to bypass cinder's lazy imports - try: - from .blender_dataset_map_provider import ( # noqa: F401 - BlenderDatasetMapProvider, - ) - from .json_index_dataset_map_provider import ( # noqa: F401 - JsonIndexDatasetMapProvider, - ) - from .json_index_dataset_map_provider_v2 import ( # noqa: F401 - JsonIndexDatasetMapProviderV2, - ) - from .llff_dataset_map_provider import LlffDatasetMapProvider # noqa: F401 - from .rendered_mesh_dataset_map_provider import ( # noqa: F401 - RenderedMeshDatasetMapProvider, - ) - from .train_eval_data_loader_provider import ( # noqa: F401 - TrainEvalDataLoaderMapProvider, - ) - - try: - from .sql_dataset_provider import ( # noqa: F401 # pyre-ignore - SqlIndexDatasetMapProvider, - ) - except ModuleNotFoundError: - pass # environment without SQL dataset - finally: - pass - - def __post_init__(self): - run_auto_creation(self) - self._all_train_cameras_cache: Optional[Tuple[Optional[CamerasBase]]] = None - - def get_datasets_and_dataloaders(self) -> Tuple[DatasetMap, DataLoaderMap]: - datasets = self.dataset_map_provider.get_dataset_map() - dataloaders = self.data_loader_map_provider.get_data_loader_map(datasets) - return datasets, dataloaders - - @property - def all_train_cameras(self) -> Optional[CamerasBase]: - """ - DEPRECATED! The property will be removed in future versions. - """ - if self._all_train_cameras_cache is None: # pyre-ignore[16] - all_train_cameras = self.dataset_map_provider.get_all_train_cameras() - self._all_train_cameras_cache = (all_train_cameras,) - - return self._all_train_cameras_cache[0] diff --git a/pytorch3d/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/pytorch3d/implicitron/dataset/dataset_base.py deleted file mode 100644 index 033b170c0969c9220fe8b6246ff5cfe768ab79e1..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/dataset_base.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from collections import defaultdict -from dataclasses import dataclass -from typing import ( - ClassVar, - Dict, - Iterable, - Iterator, - List, - Optional, - Sequence, - Tuple, - Type, -) - -import torch - -from pytorch3d.implicitron.dataset.frame_data import FrameData -from pytorch3d.implicitron.dataset.utils import GenericWorkaround - - -@dataclass(eq=False) -class DatasetBase(GenericWorkaround, torch.utils.data.Dataset[FrameData]): - """ - Base class to describe a dataset to be used with Implicitron. - - The dataset is made up of frames, and the frames are grouped into sequences. - Each sequence has a name (a string). - (A sequence could be a video, or a set of images of one scene.) - - This means they have a __getitem__ which returns an instance of a FrameData, - which will describe one frame in one sequence. - """ - - # _seq_to_idx is a member which implementations can define. - # It maps sequence name to the sequence's global frame indices. - # It is used for the default implementations of some functions in this class. - # Implementations which override them are free to ignore it. - # _seq_to_idx: Dict[str, List[int]] = field(init=False) - - def __len__(self) -> int: - raise NotImplementedError() - - def get_frame_numbers_and_timestamps( - self, idxs: Sequence[int], subset_filter: Optional[Sequence[str]] = None - ) -> List[Tuple[int, float]]: - """ - If the sequences in the dataset are videos rather than - unordered views, then the dataset should override this method to - return the index and timestamp in their videos of the frames whose - indices are given in `idxs`. In addition, - the values in _seq_to_idx should be in ascending order. - If timestamps are absent, they should be replaced with a constant. - - This is used for letting SceneBatchSampler identify consecutive - frames. - - Args: - idxs: frame index in self - subset_filter: If given, an index in idxs is ignored if the - corresponding frame is not in any of the named subsets. - - Returns: - tuple of - - frame index in video - - timestamp of frame in video - """ - raise ValueError("This dataset does not contain videos.") - - def join(self, other_datasets: Iterable["DatasetBase"]) -> None: - """ - Joins the current dataset with a list of other datasets of the same type. - """ - raise NotImplementedError() - - def get_eval_batches(self) -> Optional[List[List[int]]]: - return None - - def sequence_names(self) -> Iterable[str]: - """Returns an iterator over sequence names in the dataset.""" - # pyre-ignore[16] - return self._seq_to_idx.keys() - - def category_to_sequence_names(self) -> Dict[str, List[str]]: - """ - Returns a dict mapping from each dataset category to a list of its - sequence names. - - Returns: - category_to_sequence_names: Dict {category_i: [..., sequence_name_j, ...]} - """ - c2seq = defaultdict(list) - for sequence_name in self.sequence_names(): - first_frame_idx = next(self.sequence_indices_in_order(sequence_name)) - # crashes without overriding __getitem__ - sequence_category = self[first_frame_idx].sequence_category - c2seq[sequence_category].append(sequence_name) - return dict(c2seq) - - def sequence_frames_in_order( - self, seq_name: str, subset_filter: Optional[Sequence[str]] = None - ) -> Iterator[Tuple[float, int, int]]: - """Returns an iterator over the frame indices in a given sequence. - We attempt to first sort by timestamp (if they are available), - then by frame number. - - Args: - seq_name: the name of the sequence. - - Returns: - an iterator over triplets `(timestamp, frame_no, dataset_idx)`, - where `frame_no` is the index within the sequence, and - `dataset_idx` is the index within the dataset. - `None` timestamps are replaced with 0s. - """ - # pyre-ignore[16] - seq_frame_indices = self._seq_to_idx[seq_name] - nos_timestamps = self.get_frame_numbers_and_timestamps( - seq_frame_indices, subset_filter - ) - - yield from sorted( - [ - (timestamp, frame_no, idx) - for idx, (frame_no, timestamp) in zip(seq_frame_indices, nos_timestamps) - ] - ) - - def sequence_indices_in_order( - self, seq_name: str, subset_filter: Optional[Sequence[str]] = None - ) -> Iterator[int]: - """Same as `sequence_frames_in_order` but returns the iterator over - only dataset indices. - """ - for _, _, idx in self.sequence_frames_in_order(seq_name, subset_filter): - yield idx - - # frame_data_type is the actual type of frames returned by the dataset. - # Collation uses its classmethod `collate` - frame_data_type: ClassVar[Type[FrameData]] = FrameData diff --git a/pytorch3d/pytorch3d/implicitron/dataset/dataset_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/dataset_map_provider.py deleted file mode 100644 index 91274f18542a42831309ba0230597036e21c4c64..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/dataset_map_provider.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import logging -import os -from dataclasses import dataclass -from typing import Iterable, Iterator, Optional - -from iopath.common.file_io import PathManager -from pytorch3d.implicitron.tools.config import registry, ReplaceableBase -from pytorch3d.renderer.cameras import CamerasBase - -from .dataset_base import DatasetBase - - -@dataclass -class DatasetMap: - """ - A collection of datasets for implicitron. - - Members: - - train: a dataset for training - val: a dataset for validating during training - test: a dataset for final evaluation - """ - - train: Optional[DatasetBase] - val: Optional[DatasetBase] - test: Optional[DatasetBase] - - def __getitem__(self, split: str) -> Optional[DatasetBase]: - """ - Get one of the datasets by key (name of data split) - """ - if split not in ["train", "val", "test"]: - raise ValueError(f"{split} was not a valid split name (train/val/test)") - return getattr(self, split) - - def iter_datasets(self) -> Iterator[DatasetBase]: - """ - Iterator over all datasets. - """ - if self.train is not None: - yield self.train - if self.val is not None: - yield self.val - if self.test is not None: - yield self.test - - def join(self, other_dataset_maps: Iterable["DatasetMap"]) -> None: - """ - Joins the current DatasetMap with other dataset maps from the input list. - - For each subset of each dataset map (train/val/test), the function - omits joining the subsets that are None. - - Note the train/val/test datasets of the current dataset map will be - modified in-place. - - Args: - other_dataset_maps: The list of dataset maps to be joined into the - current dataset map. - """ - for set_ in ["train", "val", "test"]: - dataset_list = [ - getattr(self, set_), - *[getattr(dmap, set_) for dmap in other_dataset_maps], - ] - dataset_list = [d for d in dataset_list if d is not None] - if len(dataset_list) == 0: - setattr(self, set_, None) - continue - d0 = dataset_list[0] - if len(dataset_list) > 1: - d0.join(dataset_list[1:]) - setattr(self, set_, d0) - - -class DatasetMapProviderBase(ReplaceableBase): - """ - Base class for a provider of training / validation and testing - dataset objects. - """ - - def get_dataset_map(self) -> DatasetMap: - """ - Returns: - An object containing the torch.Dataset objects in train/val/test fields. - """ - raise NotImplementedError() - - def get_all_train_cameras(self) -> Optional[CamerasBase]: - """ - DEPRECATED! The function will be removed in future versions. - If the data is all for a single scene, returns a list - of the known training cameras for that scene, which is - used for evaluating the difficulty of the unknown - cameras. Otherwise return None. - """ - raise NotImplementedError() - - -@registry.register -class PathManagerFactory(ReplaceableBase): - """ - Base class and default implementation of a tool which dataset_map_provider implementations - may use to construct a path manager if needed. - - Args: - silence_logs: Whether to reduce log output from iopath library. - """ - - silence_logs: bool = True - - def get(self) -> Optional[PathManager]: - """ - Makes a PathManager if needed. - For open source users, this function should always return None. - Internally, this allows manifold access. - """ - if os.environ.get("INSIDE_RE_WORKER", False): - return None - - try: - from iopath.fb.manifold import ManifoldPathHandler - except ImportError: - return None - - if self.silence_logs: - logging.getLogger("iopath.fb.manifold").setLevel(logging.CRITICAL) - logging.getLogger("iopath.common.file_io").setLevel(logging.CRITICAL) - - path_manager = PathManager() - path_manager.register_handler(ManifoldPathHandler()) - - return path_manager diff --git a/pytorch3d/pytorch3d/implicitron/dataset/frame_data.py b/pytorch3d/pytorch3d/implicitron/dataset/frame_data.py deleted file mode 100644 index e32c086401eb7845815793d1223ef8c0ef7c5306..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/frame_data.py +++ /dev/null @@ -1,777 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import os -from abc import ABC, abstractmethod -from collections import defaultdict -from dataclasses import dataclass, field, fields -from typing import ( - Any, - ClassVar, - Generic, - List, - Mapping, - Optional, - Tuple, - Type, - TypeVar, - Union, -) - -import numpy as np -import torch - -from pytorch3d.implicitron.dataset import types -from pytorch3d.implicitron.dataset.utils import ( - adjust_camera_to_bbox_crop_, - adjust_camera_to_image_scale_, - bbox_xyxy_to_xywh, - clamp_box_to_image_bounds_and_round, - crop_around_box, - GenericWorkaround, - get_bbox_from_mask, - get_clamp_bbox, - load_depth, - load_depth_mask, - load_image, - load_mask, - load_pointcloud, - rescale_bbox, - resize_image, - safe_as_tensor, -) -from pytorch3d.implicitron.tools.config import registry, ReplaceableBase -from pytorch3d.renderer.camera_utils import join_cameras_as_batch -from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras -from pytorch3d.structures.pointclouds import join_pointclouds_as_batch, Pointclouds - - -@dataclass -class FrameData(Mapping[str, Any]): - """ - A type of the elements returned by indexing the dataset object. - It can represent both individual frames and batches of thereof; - in this documentation, the sizes of tensors refer to single frames; - add the first batch dimension for the collation result. - - Args: - frame_number: The number of the frame within its sequence. - 0-based continuous integers. - sequence_name: The unique name of the frame's sequence. - sequence_category: The object category of the sequence. - frame_timestamp: The time elapsed since the start of a sequence in sec. - image_size_hw: The size of the original image in pixels; (height, width) - tensor of shape (2,). Note that it is optional, e.g. it can be `None` - if the frame annotation has no size ans image_rgb has not [yet] been - loaded. Image-less FrameData is valid but mutators like crop/resize - may fail if the original image size cannot be deduced. - effective_image_size_hw: The size of the image after mutations such as - crop/resize in pixels; (height, width). if the image has not been mutated, - it is equal to `image_size_hw`. Note that it is also optional, for the - same reason as `image_size_hw`. - image_path: The qualified path to the loaded image (with dataset_root). - image_rgb: A Tensor of shape `(3, H, W)` holding the RGB image - of the frame; elements are floats in [0, 1]. - mask_crop: A binary mask of shape `(1, H, W)` denoting the valid image - regions. Regions can be invalid (mask_crop[i,j]=0) in case they - are a result of zero-padding of the image after cropping around - the object bounding box; elements are floats in {0.0, 1.0}. - depth_path: The qualified path to the frame's depth map. - depth_map: A float Tensor of shape `(1, H, W)` holding the depth map - of the frame; values correspond to distances from the camera; - use `depth_mask` and `mask_crop` to filter for valid pixels. - depth_mask: A binary mask of shape `(1, H, W)` denoting pixels of the - depth map that are valid for evaluation, they have been checked for - consistency across views; elements are floats in {0.0, 1.0}. - mask_path: A qualified path to the foreground probability mask. - fg_probability: A Tensor of `(1, H, W)` denoting the probability of the - pixels belonging to the captured object; elements are floats - in [0, 1]. - bbox_xywh: The bounding box tightly enclosing the foreground object in the - format (x0, y0, width, height). The convention assumes that - `x0+width` and `y0+height` includes the boundary of the box. - I.e., to slice out the corresponding crop from an image tensor `I` - we execute `crop = I[..., y0:y0+height, x0:x0+width]` - crop_bbox_xywh: The bounding box denoting the boundaries of `image_rgb` - in the original image coordinates in the format (x0, y0, width, height). - The convention is the same as for `bbox_xywh`. `crop_bbox_xywh` differs - from `bbox_xywh` due to padding (which can happen e.g. due to - setting `JsonIndexDataset.box_crop_context > 0`) - camera: A PyTorch3D camera object corresponding the frame's viewpoint, - corrected for cropping if it happened. - camera_quality_score: The score proportional to the confidence of the - frame's camera estimation (the higher the more accurate). - point_cloud_quality_score: The score proportional to the accuracy of the - frame's sequence point cloud (the higher the more accurate). - sequence_point_cloud_path: The path to the sequence's point cloud. - sequence_point_cloud: A PyTorch3D Pointclouds object holding the - point cloud corresponding to the frame's sequence. When the object - represents a batch of frames, point clouds may be deduplicated; - see `sequence_point_cloud_idx`. - sequence_point_cloud_idx: Integer indices mapping frame indices to the - corresponding point clouds in `sequence_point_cloud`; to get the - corresponding point cloud to `image_rgb[i]`, use - `sequence_point_cloud[sequence_point_cloud_idx[i]]`. - frame_type: The type of the loaded frame specified in - `subset_lists_file`, if provided. - meta: A dict for storing additional frame information. - """ - - frame_number: Optional[torch.LongTensor] - sequence_name: Union[str, List[str]] - sequence_category: Union[str, List[str]] - frame_timestamp: Optional[torch.Tensor] = None - image_size_hw: Optional[torch.LongTensor] = None - effective_image_size_hw: Optional[torch.LongTensor] = None - image_path: Union[str, List[str], None] = None - image_rgb: Optional[torch.Tensor] = None - # masks out padding added due to cropping the square bit - mask_crop: Optional[torch.Tensor] = None - depth_path: Union[str, List[str], None] = None - depth_map: Optional[torch.Tensor] = None - depth_mask: Optional[torch.Tensor] = None - mask_path: Union[str, List[str], None] = None - fg_probability: Optional[torch.Tensor] = None - bbox_xywh: Optional[torch.Tensor] = None - crop_bbox_xywh: Optional[torch.Tensor] = None - camera: Optional[PerspectiveCameras] = None - camera_quality_score: Optional[torch.Tensor] = None - point_cloud_quality_score: Optional[torch.Tensor] = None - sequence_point_cloud_path: Union[str, List[str], None] = None - sequence_point_cloud: Optional[Pointclouds] = None - sequence_point_cloud_idx: Optional[torch.Tensor] = None - frame_type: Union[str, List[str], None] = None # known | unseen - meta: dict = field(default_factory=lambda: {}) - - # NOTE that batching resets this attribute - _uncropped: bool = field(init=False, default=True) - - def to(self, *args, **kwargs): - new_params = {} - for field_name in iter(self): - value = getattr(self, field_name) - if isinstance(value, (torch.Tensor, Pointclouds, CamerasBase)): - new_params[field_name] = value.to(*args, **kwargs) - else: - new_params[field_name] = value - frame_data = type(self)(**new_params) - frame_data._uncropped = self._uncropped - return frame_data - - def cpu(self): - return self.to(device=torch.device("cpu")) - - def cuda(self): - return self.to(device=torch.device("cuda")) - - # the following functions make sure **frame_data can be passed to functions - def __iter__(self): - for f in fields(self): - if f.name.startswith("_"): - continue - - yield f.name - - def __getitem__(self, key): - return getattr(self, key) - - def __len__(self): - return sum(1 for f in iter(self)) - - def crop_by_metadata_bbox_( - self, - box_crop_context: float, - ) -> None: - """Crops the frame data in-place by (possibly expanded) bounding box. - The bounding box is taken from the object state (usually taken from - the frame annotation or estimated from the foregroubnd mask). - If the expanded bounding box does not fit the image, it is clamped, - i.e. the image is *not* padded. - - Args: - box_crop_context: rate of expansion for bbox; 0 means no expansion, - - Raises: - ValueError: If the object does not contain a bounding box (usually when no - mask annotation is provided) - ValueError: If the frame data have been cropped or resized, thus the intrinsic - bounding box is not valid for the current image size. - ValueError: If the frame does not have an image size (usually a corner case - when no image has been loaded) - """ - if self.bbox_xywh is None: - raise ValueError( - "Attempted cropping by metadata with empty bounding box. Consider either" - " to remove_empty_masks or turn off box_crop in the dataset config." - ) - - if not self._uncropped: - raise ValueError( - "Trying to apply the metadata bounding box to already cropped " - "or resized image; coordinates have changed." - ) - - self._crop_by_bbox_( - box_crop_context, - self.bbox_xywh, - ) - - def crop_by_given_bbox_( - self, - box_crop_context: float, - bbox_xywh: torch.Tensor, - ) -> None: - """Crops the frame data in-place by (possibly expanded) bounding box. - If the expanded bounding box does not fit the image, it is clamped, - i.e. the image is *not* padded. - - Args: - box_crop_context: rate of expansion for bbox; 0 means no expansion, - bbox_xywh: bounding box in [x0, y0, width, height] format. If float - tensor, values are floored (after converting to [x0, y0, x1, y1]). - - Raises: - ValueError: If the frame does not have an image size (usually a corner case - when no image has been loaded) - """ - self._crop_by_bbox_( - box_crop_context, - bbox_xywh, - ) - - def _crop_by_bbox_( - self, - box_crop_context: float, - bbox_xywh: torch.Tensor, - ) -> None: - """Crops the frame data in-place by (possibly expanded) bounding box. - If the expanded bounding box does not fit the image, it is clamped, - i.e. the image is *not* padded. - - Args: - box_crop_context: rate of expansion for bbox; 0 means no expansion, - bbox_xywh: bounding box in [x0, y0, width, height] format. If float - tensor, values are floored (after converting to [x0, y0, x1, y1]). - - Raises: - ValueError: If the frame does not have an image size (usually a corner case - when no image has been loaded) - """ - effective_image_size_hw = self.effective_image_size_hw - if effective_image_size_hw is None: - raise ValueError("Calling crop on image-less FrameData") - - bbox_xyxy = get_clamp_bbox( - bbox_xywh, - image_path=self.image_path, # pyre-ignore - box_crop_context=box_crop_context, - ) - clamp_bbox_xyxy = clamp_box_to_image_bounds_and_round( - bbox_xyxy, - image_size_hw=tuple(self.effective_image_size_hw), # pyre-ignore - ) - crop_bbox_xywh = bbox_xyxy_to_xywh(clamp_bbox_xyxy) - - if self.fg_probability is not None: - self.fg_probability = crop_around_box( - self.fg_probability, - clamp_bbox_xyxy, - self.mask_path, # pyre-ignore - ) - if self.image_rgb is not None: - self.image_rgb = crop_around_box( - self.image_rgb, - clamp_bbox_xyxy, - self.image_path, # pyre-ignore - ) - - depth_map = self.depth_map - if depth_map is not None: - clamp_bbox_xyxy_depth = rescale_bbox( - clamp_bbox_xyxy, tuple(depth_map.shape[-2:]), effective_image_size_hw - ).long() - self.depth_map = crop_around_box( - depth_map, - clamp_bbox_xyxy_depth, - self.depth_path, # pyre-ignore - ) - - depth_mask = self.depth_mask - if depth_mask is not None: - clamp_bbox_xyxy_depth = rescale_bbox( - clamp_bbox_xyxy, tuple(depth_mask.shape[-2:]), effective_image_size_hw - ).long() - self.depth_mask = crop_around_box( - depth_mask, - clamp_bbox_xyxy_depth, - self.mask_path, # pyre-ignore - ) - - # changing principal_point according to bbox_crop - if self.camera is not None: - adjust_camera_to_bbox_crop_( - camera=self.camera, - image_size_wh=effective_image_size_hw.flip(dims=[-1]), - clamp_bbox_xywh=crop_bbox_xywh, - ) - - # pyre-ignore - self.effective_image_size_hw = crop_bbox_xywh[..., 2:].flip(dims=[-1]) - self._uncropped = False - - def resize_frame_(self, new_size_hw: torch.LongTensor) -> None: - """Resizes frame data in-place according to given dimensions. - - Args: - new_size_hw: target image size [height, width], a LongTensor of shape (2,) - - Raises: - ValueError: If the frame does not have an image size (usually a corner case - when no image has been loaded) - """ - - effective_image_size_hw = self.effective_image_size_hw - if effective_image_size_hw is None: - raise ValueError("Calling resize on image-less FrameData") - - image_height, image_width = new_size_hw.tolist() - - if self.fg_probability is not None: - self.fg_probability, _, _ = resize_image( - self.fg_probability, - image_height=image_height, - image_width=image_width, - mode="nearest", - ) - - if self.image_rgb is not None: - self.image_rgb, _, self.mask_crop = resize_image( - self.image_rgb, image_height=image_height, image_width=image_width - ) - - if self.depth_map is not None: - self.depth_map, _, _ = resize_image( - self.depth_map, - image_height=image_height, - image_width=image_width, - mode="nearest", - ) - - if self.depth_mask is not None: - self.depth_mask, _, _ = resize_image( - self.depth_mask, - image_height=image_height, - image_width=image_width, - mode="nearest", - ) - - if self.camera is not None: - if self.image_size_hw is None: - raise ValueError( - "image_size_hw has to be defined for resizing FrameData with cameras." - ) - adjust_camera_to_image_scale_( - camera=self.camera, - original_size_wh=effective_image_size_hw.flip(dims=[-1]), - new_size_wh=new_size_hw.flip(dims=[-1]), # pyre-ignore - ) - - self.effective_image_size_hw = new_size_hw - self._uncropped = False - - @classmethod - def collate(cls, batch): - """ - Given a list objects `batch` of class `cls`, collates them into a batched - representation suitable for processing with deep networks. - """ - - elem = batch[0] - - if isinstance(elem, cls): - pointcloud_ids = [id(el.sequence_point_cloud) for el in batch] - id_to_idx = defaultdict(list) - for i, pc_id in enumerate(pointcloud_ids): - id_to_idx[pc_id].append(i) - - sequence_point_cloud = [] - sequence_point_cloud_idx = -np.ones((len(batch),)) - for i, ind in enumerate(id_to_idx.values()): - sequence_point_cloud_idx[ind] = i - sequence_point_cloud.append(batch[ind[0]].sequence_point_cloud) - assert (sequence_point_cloud_idx >= 0).all() - - override_fields = { - "sequence_point_cloud": sequence_point_cloud, - "sequence_point_cloud_idx": sequence_point_cloud_idx.tolist(), - } - # note that the pre-collate value of sequence_point_cloud_idx is unused - - collated = {} - for f in fields(elem): - if not f.init: - continue - - list_values = override_fields.get( - f.name, [getattr(d, f.name) for d in batch] - ) - collated[f.name] = ( - cls.collate(list_values) - if all(list_value is not None for list_value in list_values) - else None - ) - return cls(**collated) - - elif isinstance(elem, Pointclouds): - return join_pointclouds_as_batch(batch) - - elif isinstance(elem, CamerasBase): - # TODO: don't store K; enforce working in NDC space - return join_cameras_as_batch(batch) - else: - return torch.utils.data._utils.collate.default_collate(batch) - - -FrameDataSubtype = TypeVar("FrameDataSubtype", bound=FrameData) - - -class FrameDataBuilderBase(ReplaceableBase, Generic[FrameDataSubtype], ABC): - """A base class for FrameDataBuilders that build a FrameData object, load and - process the binary data (crop and resize). Implementations should parametrize - the class with a subtype of FrameData and set frame_data_type class variable to - that type. They have to also implement `build` method. - """ - - # To be initialised to FrameDataSubtype - frame_data_type: ClassVar[Type[FrameDataSubtype]] - - @abstractmethod - def build( - self, - frame_annotation: types.FrameAnnotation, - sequence_annotation: types.SequenceAnnotation, - *, - load_blobs: bool = True, - **kwargs, - ) -> FrameDataSubtype: - """An abstract method to build the frame data based on raw frame/sequence - annotations, load the binary data and adjust them according to the metadata. - """ - raise NotImplementedError() - - -class GenericFrameDataBuilder(FrameDataBuilderBase[FrameDataSubtype], ABC): - """ - A class to build a FrameData object, load and process the binary data (crop and - resize). This is an abstract class for extending to build FrameData subtypes. Most - users need to use concrete `FrameDataBuilder` class instead. - Beware that modifications of frame data are done in-place. - - Args: - dataset_root: The root folder of the dataset; all paths in frame / sequence - annotations are defined w.r.t. this root. Has to be set if any of the - load_* flabs below is true. - load_images: Enable loading the frame RGB data. - load_depths: Enable loading the frame depth maps. - load_depth_masks: Enable loading the frame depth map masks denoting the - depth values used for evaluation (the points consistent across views). - load_masks: Enable loading frame foreground masks. - load_point_clouds: Enable loading sequence-level point clouds. - max_points: Cap on the number of loaded points in the point cloud; - if reached, they are randomly sampled without replacement. - mask_images: Whether to mask the images with the loaded foreground masks; - 0 value is used for background. - mask_depths: Whether to mask the depth maps with the loaded foreground - masks; 0 value is used for background. - image_height: The height of the returned images, masks, and depth maps; - aspect ratio is preserved during cropping/resizing. - image_width: The width of the returned images, masks, and depth maps; - aspect ratio is preserved during cropping/resizing. - box_crop: Enable cropping of the image around the bounding box inferred - from the foreground region of the loaded segmentation mask; masks - and depth maps are cropped accordingly; cameras are corrected. - box_crop_mask_thr: The threshold used to separate pixels into foreground - and background based on the foreground_probability mask; if no value - is greater than this threshold, the loader lowers it and repeats. - box_crop_context: The amount of additional padding added to each - dimension of the cropping bounding box, relative to box size. - path_manager: Optionally a PathManager for interpreting paths in a special way. - """ - - dataset_root: Optional[str] = None - load_images: bool = True - load_depths: bool = True - load_depth_masks: bool = True - load_masks: bool = True - load_point_clouds: bool = False - max_points: int = 0 - mask_images: bool = False - mask_depths: bool = False - image_height: Optional[int] = 800 - image_width: Optional[int] = 800 - box_crop: bool = True - box_crop_mask_thr: float = 0.4 - box_crop_context: float = 0.3 - path_manager: Any = None - - def __post_init__(self) -> None: - load_any_blob = ( - self.load_images - or self.load_depths - or self.load_depth_masks - or self.load_masks - or self.load_point_clouds - ) - if load_any_blob and self.dataset_root is None: - raise ValueError( - "dataset_root must be set to load any blob data. " - "Make sure it is set in either FrameDataBuilder or Dataset params." - ) - - if load_any_blob and not self._exists_in_dataset_root(""): - raise ValueError( - f"dataset_root is passed but {self.dataset_root} does not exist." - ) - - def build( - self, - frame_annotation: types.FrameAnnotation, - sequence_annotation: types.SequenceAnnotation, - *, - load_blobs: bool = True, - **kwargs, - ) -> FrameDataSubtype: - """Builds the frame data based on raw frame/sequence annotations, loads the - binary data and adjust them according to the metadata. The processing includes: - * if box_crop is set, the image/mask/depth are cropped with the bounding - box provided or estimated from MaskAnnotation, - * if image_height/image_width are set, the image/mask/depth are resized to - fit that resolution. Note that the aspect ratio is preserved, and the - (possibly cropped) image is pasted into the top-left corner. In the - resulting frame_data, mask_crop field corresponds to the mask of the - pasted image. - - Args: - frame_annotation: frame annotation - sequence_annotation: sequence annotation - load_blobs: if the function should attempt loading the image, depth map - and mask, and foreground mask - - Returns: - The constructed FrameData object. - """ - - point_cloud = sequence_annotation.point_cloud - - frame_data = self.frame_data_type( - frame_number=safe_as_tensor(frame_annotation.frame_number, torch.long), - frame_timestamp=safe_as_tensor( - frame_annotation.frame_timestamp, torch.float - ), - sequence_name=frame_annotation.sequence_name, - sequence_category=sequence_annotation.category, - camera_quality_score=safe_as_tensor( - sequence_annotation.viewpoint_quality_score, torch.float - ), - point_cloud_quality_score=safe_as_tensor( - point_cloud.quality_score, torch.float - ) - if point_cloud is not None - else None, - ) - - fg_mask_np: Optional[np.ndarray] = None - mask_annotation = frame_annotation.mask - if mask_annotation is not None: - if load_blobs and self.load_masks: - fg_mask_np, mask_path = self._load_fg_probability(frame_annotation) - frame_data.mask_path = mask_path - frame_data.fg_probability = safe_as_tensor(fg_mask_np, torch.float) - - bbox_xywh = mask_annotation.bounding_box_xywh - if bbox_xywh is None and fg_mask_np is not None: - bbox_xywh = get_bbox_from_mask(fg_mask_np, self.box_crop_mask_thr) - - frame_data.bbox_xywh = safe_as_tensor(bbox_xywh, torch.float) - - if frame_annotation.image is not None: - image_size_hw = safe_as_tensor(frame_annotation.image.size, torch.long) - frame_data.image_size_hw = image_size_hw # original image size - # image size after crop/resize - frame_data.effective_image_size_hw = image_size_hw - image_path = None - dataset_root = self.dataset_root - if frame_annotation.image.path is not None and dataset_root is not None: - image_path = os.path.join(dataset_root, frame_annotation.image.path) - frame_data.image_path = image_path - - if load_blobs and self.load_images: - if image_path is None: - raise ValueError("Image path is required to load images.") - - image_np = load_image(self._local_path(image_path)) - frame_data.image_rgb = self._postprocess_image( - image_np, frame_annotation.image.size, frame_data.fg_probability - ) - - if ( - load_blobs - and self.load_depths - and frame_annotation.depth is not None - and frame_annotation.depth.path is not None - ): - ( - frame_data.depth_map, - frame_data.depth_path, - frame_data.depth_mask, - ) = self._load_mask_depth(frame_annotation, fg_mask_np) - - if load_blobs and self.load_point_clouds and point_cloud is not None: - pcl_path = self._fix_point_cloud_path(point_cloud.path) - frame_data.sequence_point_cloud = load_pointcloud( - self._local_path(pcl_path), max_points=self.max_points - ) - frame_data.sequence_point_cloud_path = pcl_path - - if frame_annotation.viewpoint is not None: - frame_data.camera = self._get_pytorch3d_camera(frame_annotation) - - if self.box_crop: - frame_data.crop_by_metadata_bbox_(self.box_crop_context) - - if self.image_height is not None and self.image_width is not None: - new_size = (self.image_height, self.image_width) - frame_data.resize_frame_( - new_size_hw=torch.tensor(new_size, dtype=torch.long), # pyre-ignore - ) - - return frame_data - - def _load_fg_probability( - self, entry: types.FrameAnnotation - ) -> Tuple[np.ndarray, str]: - assert self.dataset_root is not None and entry.mask is not None - full_path = os.path.join(self.dataset_root, entry.mask.path) - fg_probability = load_mask(self._local_path(full_path)) - if fg_probability.shape[-2:] != entry.image.size: - raise ValueError( - f"bad mask size: {fg_probability.shape[-2:]} vs {entry.image.size}!" - ) - - return fg_probability, full_path - - def _postprocess_image( - self, - image_np: np.ndarray, - image_size: Tuple[int, int], - fg_probability: Optional[torch.Tensor], - ) -> torch.Tensor: - image_rgb = safe_as_tensor(image_np, torch.float) - - if image_rgb.shape[-2:] != image_size: - raise ValueError(f"bad image size: {image_rgb.shape[-2:]} vs {image_size}!") - - if self.mask_images: - assert fg_probability is not None - image_rgb *= fg_probability - - return image_rgb - - def _load_mask_depth( - self, - entry: types.FrameAnnotation, - fg_mask: Optional[np.ndarray], - ) -> Tuple[torch.Tensor, str, torch.Tensor]: - entry_depth = entry.depth - dataset_root = self.dataset_root - assert dataset_root is not None - assert entry_depth is not None and entry_depth.path is not None - path = os.path.join(dataset_root, entry_depth.path) - depth_map = load_depth(self._local_path(path), entry_depth.scale_adjustment) - - if self.mask_depths: - assert fg_mask is not None - depth_map *= fg_mask - - mask_path = entry_depth.mask_path - if self.load_depth_masks and mask_path is not None: - mask_path = os.path.join(dataset_root, mask_path) - depth_mask = load_depth_mask(self._local_path(mask_path)) - else: - depth_mask = (depth_map > 0.0).astype(np.float32) - - return torch.tensor(depth_map), path, torch.tensor(depth_mask) - - def _get_pytorch3d_camera( - self, - entry: types.FrameAnnotation, - ) -> PerspectiveCameras: - entry_viewpoint = entry.viewpoint - assert entry_viewpoint is not None - # principal point and focal length - principal_point = torch.tensor( - entry_viewpoint.principal_point, dtype=torch.float - ) - focal_length = torch.tensor(entry_viewpoint.focal_length, dtype=torch.float) - - format = entry_viewpoint.intrinsics_format - if entry_viewpoint.intrinsics_format == "ndc_norm_image_bounds": - # legacy PyTorch3D NDC format - # convert to pixels unequally and convert to ndc equally - image_size_as_list = list(reversed(entry.image.size)) - image_size_wh = torch.tensor(image_size_as_list, dtype=torch.float) - per_axis_scale = image_size_wh / image_size_wh.min() - focal_length = focal_length * per_axis_scale - principal_point = principal_point * per_axis_scale - elif entry_viewpoint.intrinsics_format != "ndc_isotropic": - raise ValueError(f"Unknown intrinsics format: {format}") - - return PerspectiveCameras( - focal_length=focal_length[None], - principal_point=principal_point[None], - R=torch.tensor(entry_viewpoint.R, dtype=torch.float)[None], - T=torch.tensor(entry_viewpoint.T, dtype=torch.float)[None], - ) - - def _fix_point_cloud_path(self, path: str) -> str: - """ - Fix up a point cloud path from the dataset. - Some files in Co3Dv2 have an accidental absolute path stored. - """ - unwanted_prefix = ( - "/large_experiments/p3/replay/datasets/co3d/co3d45k_220512/export_v23/" - ) - if path.startswith(unwanted_prefix): - path = path[len(unwanted_prefix) :] - assert self.dataset_root is not None - return os.path.join(self.dataset_root, path) - - def _local_path(self, path: str) -> str: - if self.path_manager is None: - return path - return self.path_manager.get_local_path(path) - - def _exists_in_dataset_root(self, relpath) -> bool: - if not self.dataset_root: - return False - - full_path = os.path.join(self.dataset_root, relpath) - if self.path_manager is None: - return os.path.exists(full_path) - else: - return self.path_manager.exists(full_path) - - -@registry.register -class FrameDataBuilder(GenericWorkaround, GenericFrameDataBuilder[FrameData]): - """ - A concrete class to build a FrameData object, load and process the binary data (crop - and resize). Beware that modifications of frame data are done in-place. Please see - the documentation for `GenericFrameDataBuilder` for the description of parameters - and methods. - """ - - frame_data_type: ClassVar[Type[FrameData]] = FrameData diff --git a/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset.py deleted file mode 100644 index 8caf581dfae6511acd41da01ecf835ec06c43c10..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset.py +++ /dev/null @@ -1,669 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import copy -import functools -import gzip -import hashlib -import json -import logging -import os -import random -import warnings -from collections import defaultdict -from itertools import islice -from typing import ( - Any, - ClassVar, - Dict, - Iterable, - List, - Optional, - Sequence, - Tuple, - Type, - TYPE_CHECKING, - Union, -) - -from pytorch3d.implicitron.dataset import types -from pytorch3d.implicitron.dataset.dataset_base import DatasetBase -from pytorch3d.implicitron.dataset.frame_data import FrameData, FrameDataBuilder -from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar -from pytorch3d.implicitron.tools.config import registry, ReplaceableBase -from pytorch3d.renderer.camera_utils import join_cameras_as_batch -from pytorch3d.renderer.cameras import CamerasBase - -from tqdm import tqdm - - -logger = logging.getLogger(__name__) - - -if TYPE_CHECKING: - from typing import TypedDict - - class FrameAnnotsEntry(TypedDict): - subset: Optional[str] - frame_annotation: types.FrameAnnotation - -else: - FrameAnnotsEntry = dict - - -@registry.register -class JsonIndexDataset(DatasetBase, ReplaceableBase): - """ - A dataset with annotations in json files like the Common Objects in 3D - (CO3D) dataset. - - Metadata-related args:: - frame_annotations_file: A zipped json file containing metadata of the - frames in the dataset, serialized List[types.FrameAnnotation]. - sequence_annotations_file: A zipped json file containing metadata of the - sequences in the dataset, serialized List[types.SequenceAnnotation]. - subset_lists_file: A json file containing the lists of frames corresponding - corresponding to different subsets (e.g. train/val/test) of the dataset; - format: {subset: (sequence_name, frame_id, file_path)}. - subsets: Restrict frames/sequences only to the given list of subsets - as defined in subset_lists_file (see above). - limit_to: Limit the dataset to the first #limit_to frames (after other - filters have been applied). - limit_sequences_to: Limit the dataset to the first - #limit_sequences_to sequences (after other sequence filters have been - applied but before frame-based filters). - pick_sequence: A list of sequence names to restrict the dataset to. - exclude_sequence: A list of the names of the sequences to exclude. - limit_category_to: Restrict the dataset to the given list of categories. - remove_empty_masks: Removes the frames with no active foreground pixels - in the segmentation mask after thresholding (see box_crop_mask_thr). - n_frames_per_sequence: If > 0, randomly samples #n_frames_per_sequence - frames in each sequences uniformly without replacement if it has - more frames than that; applied before other frame-level filters. - seed: The seed of the random generator sampling #n_frames_per_sequence - random frames per sequence. - sort_frames: Enable frame annotations sorting to group frames from the - same sequences together and order them by timestamps - eval_batches: A list of batches that form the evaluation set; - list of batch-sized lists of indices corresponding to __getitem__ - of this class, thus it can be used directly as a batch sampler. - eval_batch_index: - ( Optional[List[List[Union[Tuple[str, int, str], Tuple[str, int]]]] ) - A list of batches of frames described as (sequence_name, frame_idx) - that can form the evaluation set, `eval_batches` will be set from this. - - Blob-loading parameters: - dataset_root: The root folder of the dataset; all the paths in jsons are - specified relative to this root (but not json paths themselves). - load_images: Enable loading the frame RGB data. - load_depths: Enable loading the frame depth maps. - load_depth_masks: Enable loading the frame depth map masks denoting the - depth values used for evaluation (the points consistent across views). - load_masks: Enable loading frame foreground masks. - load_point_clouds: Enable loading sequence-level point clouds. - max_points: Cap on the number of loaded points in the point cloud; - if reached, they are randomly sampled without replacement. - mask_images: Whether to mask the images with the loaded foreground masks; - 0 value is used for background. - mask_depths: Whether to mask the depth maps with the loaded foreground - masks; 0 value is used for background. - image_height: The height of the returned images, masks, and depth maps; - aspect ratio is preserved during cropping/resizing. - image_width: The width of the returned images, masks, and depth maps; - aspect ratio is preserved during cropping/resizing. - box_crop: Enable cropping of the image around the bounding box inferred - from the foreground region of the loaded segmentation mask; masks - and depth maps are cropped accordingly; cameras are corrected. - box_crop_mask_thr: The threshold used to separate pixels into foreground - and background based on the foreground_probability mask; if no value - is greater than this threshold, the loader lowers it and repeats. - box_crop_context: The amount of additional padding added to each - dimension of the cropping bounding box, relative to box size. - """ - - frame_annotations_type: ClassVar[ - Type[types.FrameAnnotation] - ] = types.FrameAnnotation - - path_manager: Any = None - frame_annotations_file: str = "" - sequence_annotations_file: str = "" - subset_lists_file: str = "" - subsets: Optional[List[str]] = None - limit_to: int = 0 - limit_sequences_to: int = 0 - pick_sequence: Tuple[str, ...] = () - exclude_sequence: Tuple[str, ...] = () - limit_category_to: Tuple[int, ...] = () - dataset_root: str = "" - load_images: bool = True - load_depths: bool = True - load_depth_masks: bool = True - load_masks: bool = True - load_point_clouds: bool = False - max_points: int = 0 - mask_images: bool = False - mask_depths: bool = False - image_height: Optional[int] = 800 - image_width: Optional[int] = 800 - box_crop: bool = True - box_crop_mask_thr: float = 0.4 - box_crop_context: float = 0.3 - remove_empty_masks: bool = True - n_frames_per_sequence: int = -1 - seed: int = 0 - sort_frames: bool = False - eval_batches: Any = None - eval_batch_index: Any = None - # initialised in __post_init__ - # commented because of OmegaConf (for tests to pass) - # _frame_data_builder: FrameDataBuilder = field(init=False) - # frame_annots: List[FrameAnnotsEntry] = field(init=False) - # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False) - # _seq_to_idx: Dict[str, List[int]] = field(init=False) - - def __post_init__(self) -> None: - self._load_frames() - self._load_sequences() - if self.sort_frames: - self._sort_frames() - self._load_subset_lists() - self._filter_db() # also computes sequence indices - self._extract_and_set_eval_batches() - - # pyre-ignore - self._frame_data_builder = FrameDataBuilder( - dataset_root=self.dataset_root, - load_images=self.load_images, - load_depths=self.load_depths, - load_depth_masks=self.load_depth_masks, - load_masks=self.load_masks, - load_point_clouds=self.load_point_clouds, - max_points=self.max_points, - mask_images=self.mask_images, - mask_depths=self.mask_depths, - image_height=self.image_height, - image_width=self.image_width, - box_crop=self.box_crop, - box_crop_mask_thr=self.box_crop_mask_thr, - box_crop_context=self.box_crop_context, - path_manager=self.path_manager, - ) - logger.info(str(self)) - - def _extract_and_set_eval_batches(self) -> None: - """ - Sets eval_batches based on input eval_batch_index. - """ - if self.eval_batch_index is not None: - if self.eval_batches is not None: - raise ValueError( - "Cannot define both eval_batch_index and eval_batches." - ) - self.eval_batches = self.seq_frame_index_to_dataset_index( - self.eval_batch_index - ) - - def join(self, other_datasets: Iterable[DatasetBase]) -> None: - """ - Join the dataset with other JsonIndexDataset objects. - - Args: - other_datasets: A list of JsonIndexDataset objects to be joined - into the current dataset. - """ - if not all(isinstance(d, JsonIndexDataset) for d in other_datasets): - raise ValueError("This function can only join a list of JsonIndexDataset") - # pyre-ignore[16] - self.frame_annots.extend([fa for d in other_datasets for fa in d.frame_annots]) - # pyre-ignore[16] - self.seq_annots.update( - # https://gist.github.com/treyhunner/f35292e676efa0be1728 - functools.reduce( - lambda a, b: {**a, **b}, - # pyre-ignore[16] - [d.seq_annots for d in other_datasets], - ) - ) - all_eval_batches = [ - self.eval_batches, - *[d.eval_batches for d in other_datasets], # pyre-ignore[16] - ] - if not ( - all(ba is None for ba in all_eval_batches) - or all(ba is not None for ba in all_eval_batches) - ): - raise ValueError( - "When joining datasets, either all joined datasets have to have their" - " eval_batches defined, or all should have their eval batches undefined." - ) - if self.eval_batches is not None: - self.eval_batches = sum(all_eval_batches, []) - self._invalidate_indexes(filter_seq_annots=True) - - def is_filtered(self) -> bool: - """ - Returns `True` in case the dataset has been filtered and thus some frame annotations - stored on the disk might be missing in the dataset object. - - Returns: - is_filtered: `True` if the dataset has been filtered, else `False`. - """ - return ( - self.remove_empty_masks - or self.limit_to > 0 - or self.limit_sequences_to > 0 - or len(self.pick_sequence) > 0 - or len(self.exclude_sequence) > 0 - or len(self.limit_category_to) > 0 - or self.n_frames_per_sequence > 0 - ) - - def seq_frame_index_to_dataset_index( - self, - seq_frame_index: List[List[Union[Tuple[str, int, str], Tuple[str, int]]]], - allow_missing_indices: bool = False, - remove_missing_indices: bool = False, - suppress_missing_index_warning: bool = True, - ) -> Union[List[List[Optional[int]]], List[List[int]]]: - """ - Obtain indices into the dataset object given a list of frame ids. - - Args: - seq_frame_index: The list of frame ids specified as - `List[List[Tuple[sequence_name:str, frame_number:int]]]`. Optionally, - Image paths relative to the dataset_root can be stored specified as well: - `List[List[Tuple[sequence_name:str, frame_number:int, image_path:str]]]` - allow_missing_indices: If `False`, throws an IndexError upon reaching the first - entry from `seq_frame_index` which is missing in the dataset. - Otherwise, depending on `remove_missing_indices`, either returns `None` - in place of missing entries or removes the indices of missing entries. - remove_missing_indices: Active when `allow_missing_indices=True`. - If `False`, returns `None` in place of `seq_frame_index` entries that - are not present in the dataset. - If `True` removes missing indices from the returned indices. - suppress_missing_index_warning: - Active if `allow_missing_indices==True`. Suppressess a warning message - in case an entry from `seq_frame_index` is missing in the dataset - (expected in certain cases - e.g. when setting - `self.remove_empty_masks=True`). - - Returns: - dataset_idx: Indices of dataset entries corresponding to`seq_frame_index`. - """ - _dataset_seq_frame_n_index = { - seq: { - # pyre-ignore[16] - self.frame_annots[idx]["frame_annotation"].frame_number: idx - for idx in seq_idx - } - # pyre-ignore[16] - for seq, seq_idx in self._seq_to_idx.items() - } - - def _get_dataset_idx( - seq_name: str, frame_no: int, path: Optional[str] = None - ) -> Optional[int]: - idx_seq = _dataset_seq_frame_n_index.get(seq_name, None) - idx = idx_seq.get(frame_no, None) if idx_seq is not None else None - if idx is None: - msg = ( - f"sequence_name={seq_name} / frame_number={frame_no}" - " not in the dataset!" - ) - if not allow_missing_indices: - raise IndexError(msg) - if not suppress_missing_index_warning: - warnings.warn(msg) - return idx - if path is not None: - # Check that the loaded frame path is consistent - # with the one stored in self.frame_annots. - assert os.path.normpath( - # pyre-ignore[16] - self.frame_annots[idx]["frame_annotation"].image.path - ) == os.path.normpath( - path - ), f"Inconsistent frame indices {seq_name, frame_no, path}." - return idx - - dataset_idx = [ - [_get_dataset_idx(*b) for b in batch] # pyre-ignore [6] - for batch in seq_frame_index - ] - - if allow_missing_indices and remove_missing_indices: - # remove all None indices, and also batches with only None entries - valid_dataset_idx = [ - [b for b in batch if b is not None] for batch in dataset_idx - ] - return [batch for batch in valid_dataset_idx if len(batch) > 0] - - return dataset_idx - - def subset_from_frame_index( - self, - frame_index: List[Union[Tuple[str, int], Tuple[str, int, str]]], - allow_missing_indices: bool = True, - ) -> "JsonIndexDataset": - """ - Generate a dataset subset given the list of frames specified in `frame_index`. - - Args: - frame_index: The list of frame indentifiers (as stored in the metadata) - specified as `List[Tuple[sequence_name:str, frame_number:int]]`. Optionally, - Image paths relative to the dataset_root can be stored specified as well: - `List[Tuple[sequence_name:str, frame_number:int, image_path:str]]`, - in the latter case, if imaga_path do not match the stored paths, an error - is raised. - allow_missing_indices: If `False`, throws an IndexError upon reaching the first - entry from `frame_index` which is missing in the dataset. - Otherwise, generates a subset consisting of frames entries that actually - exist in the dataset. - """ - # Get the indices into the frame annots. - dataset_indices = self.seq_frame_index_to_dataset_index( - [frame_index], - allow_missing_indices=self.is_filtered() and allow_missing_indices, - )[0] - valid_dataset_indices = [i for i in dataset_indices if i is not None] - - # Deep copy the whole dataset except frame_annots, which are large so we - # deep copy only the requested subset of frame_annots. - memo = {id(self.frame_annots): None} # pyre-ignore[16] - dataset_new = copy.deepcopy(self, memo) - dataset_new.frame_annots = copy.deepcopy( - [self.frame_annots[i] for i in valid_dataset_indices] - ) - - # This will kill all unneeded sequence annotations. - dataset_new._invalidate_indexes(filter_seq_annots=True) - - # Finally annotate the frame annotations with the name of the subset - # stored in meta. - for frame_annot in dataset_new.frame_annots: - frame_annotation = frame_annot["frame_annotation"] - if frame_annotation.meta is not None: - frame_annot["subset"] = frame_annotation.meta.get("frame_type", None) - - # A sanity check - this will crash in case some entries from frame_index are missing - # in dataset_new. - valid_frame_index = [ - fi for fi, di in zip(frame_index, dataset_indices) if di is not None - ] - dataset_new.seq_frame_index_to_dataset_index( - [valid_frame_index], allow_missing_indices=False - ) - - return dataset_new - - def __str__(self) -> str: - # pyre-ignore[16] - return f"JsonIndexDataset #frames={len(self.frame_annots)}" - - def __len__(self) -> int: - # pyre-ignore[16] - return len(self.frame_annots) - - def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]: - return entry["subset"] - - def get_all_train_cameras(self) -> CamerasBase: - """ - Returns the cameras corresponding to all the known frames. - """ - logger.info("Loading all train cameras.") - cameras = [] - # pyre-ignore[16] - for frame_idx, frame_annot in enumerate(tqdm(self.frame_annots)): - frame_type = self._get_frame_type(frame_annot) - if frame_type is None: - raise ValueError("subsets not loaded") - if is_known_frame_scalar(frame_type): - cameras.append(self[frame_idx].camera) - return join_cameras_as_batch(cameras) - - def __getitem__(self, index) -> FrameData: - # pyre-ignore[16] - if index >= len(self.frame_annots): - raise IndexError(f"index {index} out of range {len(self.frame_annots)}") - - entry = self.frame_annots[index]["frame_annotation"] - - # pyre-ignore - frame_data = self._frame_data_builder.build( - entry, - # pyre-ignore - self.seq_annots[entry.sequence_name], - ) - # Optional field - frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - - return frame_data - - def _load_frames(self) -> None: - logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.") - local_file = self._local_path(self.frame_annotations_file) - with gzip.open(local_file, "rt", encoding="utf8") as zipfile: - frame_annots_list = types.load_dataclass( - zipfile, List[self.frame_annotations_type] - ) - if not frame_annots_list: - raise ValueError("Empty dataset!") - # pyre-ignore[16] - self.frame_annots = [ - FrameAnnotsEntry(frame_annotation=a, subset=None) for a in frame_annots_list - ] - - def _load_sequences(self) -> None: - logger.info(f"Loading Co3D sequences from {self.sequence_annotations_file}.") - local_file = self._local_path(self.sequence_annotations_file) - with gzip.open(local_file, "rt", encoding="utf8") as zipfile: - seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation]) - if not seq_annots: - raise ValueError("Empty sequences file!") - # pyre-ignore[16] - self.seq_annots = {entry.sequence_name: entry for entry in seq_annots} - - def _load_subset_lists(self) -> None: - logger.info(f"Loading Co3D subset lists from {self.subset_lists_file}.") - if not self.subset_lists_file: - return - - with open(self._local_path(self.subset_lists_file), "r") as f: - subset_to_seq_frame = json.load(f) - - frame_path_to_subset = { - path: subset - for subset, frames in subset_to_seq_frame.items() - for _, _, path in frames - } - # pyre-ignore[16] - for frame in self.frame_annots: - frame["subset"] = frame_path_to_subset.get( - frame["frame_annotation"].image.path, None - ) - if frame["subset"] is None: - warnings.warn( - "Subset lists are given but don't include " - + frame["frame_annotation"].image.path - ) - - def _sort_frames(self) -> None: - # Sort frames to have them grouped by sequence, ordered by timestamp - # pyre-ignore[16] - self.frame_annots = sorted( - self.frame_annots, - key=lambda f: ( - f["frame_annotation"].sequence_name, - f["frame_annotation"].frame_timestamp or 0, - ), - ) - - def _filter_db(self) -> None: - if self.remove_empty_masks: - logger.info("Removing images with empty masks.") - # pyre-ignore[16] - old_len = len(self.frame_annots) - - msg = "remove_empty_masks needs every MaskAnnotation.mass to be set." - - def positive_mass(frame_annot: types.FrameAnnotation) -> bool: - mask = frame_annot.mask - if mask is None: - return False - if mask.mass is None: - raise ValueError(msg) - return mask.mass > 1 - - self.frame_annots = [ - frame - for frame in self.frame_annots - if positive_mass(frame["frame_annotation"]) - ] - logger.info("... filtered %d -> %d" % (old_len, len(self.frame_annots))) - - # this has to be called after joining with categories!! - subsets = self.subsets - if subsets: - if not self.subset_lists_file: - raise ValueError( - "Subset filter is on but subset_lists_file was not given" - ) - - logger.info(f"Limiting Co3D dataset to the '{subsets}' subsets.") - - # truncate the list of subsets to the valid one - self.frame_annots = [ - entry for entry in self.frame_annots if entry["subset"] in subsets - ] - if len(self.frame_annots) == 0: - raise ValueError(f"There are no frames in the '{subsets}' subsets!") - - self._invalidate_indexes(filter_seq_annots=True) - - if len(self.limit_category_to) > 0: - logger.info(f"Limiting dataset to categories: {self.limit_category_to}") - # pyre-ignore[16] - self.seq_annots = { - name: entry - for name, entry in self.seq_annots.items() - if entry.category in self.limit_category_to - } - - # sequence filters - for prefix in ("pick", "exclude"): - orig_len = len(self.seq_annots) - attr = f"{prefix}_sequence" - arr = getattr(self, attr) - if len(arr) > 0: - logger.info(f"{attr}: {str(arr)}") - self.seq_annots = { - name: entry - for name, entry in self.seq_annots.items() - if (name in arr) == (prefix == "pick") - } - logger.info("... filtered %d -> %d" % (orig_len, len(self.seq_annots))) - - if self.limit_sequences_to > 0: - self.seq_annots = dict( - islice(self.seq_annots.items(), self.limit_sequences_to) - ) - - # retain only frames from retained sequences - self.frame_annots = [ - f - for f in self.frame_annots - if f["frame_annotation"].sequence_name in self.seq_annots - ] - - self._invalidate_indexes() - - if self.n_frames_per_sequence > 0: - logger.info(f"Taking max {self.n_frames_per_sequence} per sequence.") - keep_idx = [] - # pyre-ignore[16] - for seq, seq_indices in self._seq_to_idx.items(): - # infer the seed from the sequence name, this is reproducible - # and makes the selection differ for different sequences - seed = _seq_name_to_seed(seq) + self.seed - seq_idx_shuffled = random.Random(seed).sample( - sorted(seq_indices), len(seq_indices) - ) - keep_idx.extend(seq_idx_shuffled[: self.n_frames_per_sequence]) - - logger.info( - "... filtered %d -> %d" % (len(self.frame_annots), len(keep_idx)) - ) - self.frame_annots = [self.frame_annots[i] for i in keep_idx] - self._invalidate_indexes(filter_seq_annots=False) - # sequences are not decimated, so self.seq_annots is valid - - if self.limit_to > 0 and self.limit_to < len(self.frame_annots): - logger.info( - "limit_to: filtered %d -> %d" % (len(self.frame_annots), self.limit_to) - ) - self.frame_annots = self.frame_annots[: self.limit_to] - self._invalidate_indexes(filter_seq_annots=True) - - def _invalidate_indexes(self, filter_seq_annots: bool = False) -> None: - # update _seq_to_idx and filter seq_meta according to frame_annots change - # if filter_seq_annots, also uldates seq_annots based on the changed _seq_to_idx - self._invalidate_seq_to_idx() - - if filter_seq_annots: - # pyre-ignore[16] - self.seq_annots = { - k: v - for k, v in self.seq_annots.items() - # pyre-ignore[16] - if k in self._seq_to_idx - } - - def _invalidate_seq_to_idx(self) -> None: - seq_to_idx = defaultdict(list) - # pyre-ignore[16] - for idx, entry in enumerate(self.frame_annots): - seq_to_idx[entry["frame_annotation"].sequence_name].append(idx) - # pyre-ignore[16] - self._seq_to_idx = seq_to_idx - - def _local_path(self, path: str) -> str: - if self.path_manager is None: - return path - return self.path_manager.get_local_path(path) - - def get_frame_numbers_and_timestamps( - self, idxs: Sequence[int], subset_filter: Optional[Sequence[str]] = None - ) -> List[Tuple[int, float]]: - out: List[Tuple[int, float]] = [] - for idx in idxs: - if ( - subset_filter is not None - # pyre-fixme[16]: `JsonIndexDataset` has no attribute `frame_annots`. - and self.frame_annots[idx]["subset"] not in subset_filter - ): - continue - - frame_annotation = self.frame_annots[idx]["frame_annotation"] - out.append( - (frame_annotation.frame_number, frame_annotation.frame_timestamp) - ) - return out - - def category_to_sequence_names(self) -> Dict[str, List[str]]: - c2seq = defaultdict(list) - # pyre-ignore - for sequence_name, sa in self.seq_annots.items(): - c2seq[sa.category].append(sequence_name) - return dict(c2seq) - - def get_eval_batches(self) -> Optional[List[List[int]]]: - return self.eval_batches - - -def _seq_name_to_seed(seq_name) -> int: - return int(hashlib.sha1(seq_name.encode("utf-8")).hexdigest(), 16) diff --git a/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider.py deleted file mode 100644 index 53170871b3f19cf3e0380ad89a688552ea090072..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider.py +++ /dev/null @@ -1,318 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import json -import os -from typing import Dict, List, Optional, Tuple, Type - -from omegaconf import DictConfig -from pytorch3d.implicitron.tools.config import ( - expand_args_fields, - registry, - run_auto_creation, -) -from pytorch3d.renderer.cameras import CamerasBase - -from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory -from .json_index_dataset import JsonIndexDataset - -from .utils import ( - DATASET_TYPE_KNOWN, - DATASET_TYPE_TEST, - DATASET_TYPE_TRAIN, - DATASET_TYPE_UNKNOWN, -) - - -# fmt: off -CO3D_CATEGORIES: List[str] = list(reversed([ - "baseballbat", "banana", "bicycle", "microwave", "tv", - "cellphone", "toilet", "hairdryer", "couch", "kite", "pizza", - "umbrella", "wineglass", "laptop", - "hotdog", "stopsign", "frisbee", "baseballglove", - "cup", "parkingmeter", "backpack", "toyplane", "toybus", - "handbag", "chair", "keyboard", "car", "motorcycle", - "carrot", "bottle", "sandwich", "remote", "bowl", "skateboard", - "toaster", "mouse", "toytrain", "book", "toytruck", - "orange", "broccoli", "plant", "teddybear", - "suitcase", "bench", "ball", "cake", - "vase", "hydrant", "apple", "donut", -])) -# fmt: on - -_CO3D_DATASET_ROOT: str = os.getenv("CO3D_DATASET_ROOT", "") - -# _NEED_CONTROL is a list of those elements of JsonIndexDataset which -# are not directly specified for it in the config but come from the -# DatasetMapProvider. -_NEED_CONTROL: Tuple[str, ...] = ( - "dataset_root", - "eval_batches", - "eval_batch_index", - "n_frames_per_sequence", - "path_manager", - "pick_sequence", - "subsets", - "frame_annotations_file", - "sequence_annotations_file", - "subset_lists_file", -) - - -@registry.register -class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13] - """ - Generates the training / validation and testing dataset objects for - a dataset laid out on disk like Co3D, with annotations in json files. - - Args: - category: The object category of the dataset. - task_str: "multisequence" or "singlesequence". - dataset_root: The root folder of the dataset. - n_frames_per_sequence: Randomly sample #n_frames_per_sequence frames - in each sequence. - test_on_train: Construct validation and test datasets from - the training subset. - restrict_sequence_name: Restrict the dataset sequences to the ones - present in the given list of names. - test_restrict_sequence_id: The ID of the loaded sequence. - Active for task_str='singlesequence'. - assert_single_seq: Assert that only frames from a single sequence - are present in all generated datasets. - only_test_set: Load only the test set. - dataset_class_type: name of class (JsonIndexDataset or a subclass) - to use for the dataset. - dataset_X_args (e.g. dataset_JsonIndexDataset_args): arguments passed - to all the dataset constructors. - path_manager_factory: (Optional) An object that generates an instance of - PathManager that can translate provided file paths. - path_manager_factory_class_type: The class type of `path_manager_factory`. - """ - - category: str - task_str: str = "singlesequence" - dataset_root: str = _CO3D_DATASET_ROOT - n_frames_per_sequence: int = -1 - test_on_train: bool = False - restrict_sequence_name: Tuple[str, ...] = () - test_restrict_sequence_id: int = -1 - assert_single_seq: bool = False - only_test_set: bool = False - dataset: JsonIndexDataset - dataset_class_type: str = "JsonIndexDataset" - path_manager_factory: PathManagerFactory - path_manager_factory_class_type: str = "PathManagerFactory" - - @classmethod - def dataset_tweak_args(cls, type, args: DictConfig) -> None: - """ - Called by get_default_args(JsonIndexDatasetMapProvider) to - not expose certain fields of each dataset class. - """ - for key in _NEED_CONTROL: - del args[key] - - def create_dataset(self): - """ - Prevent the member named dataset from being created. - """ - return - - def __post_init__(self): - super().__init__() - run_auto_creation(self) - if self.only_test_set and self.test_on_train: - raise ValueError("Cannot have only_test_set and test_on_train") - - path_manager = self.path_manager_factory.get() - - # TODO: - # - implement loading multiple categories - - frame_file = os.path.join( - self.dataset_root, self.category, "frame_annotations.jgz" - ) - sequence_file = os.path.join( - self.dataset_root, self.category, "sequence_annotations.jgz" - ) - subset_lists_file = os.path.join( - self.dataset_root, self.category, "set_lists.json" - ) - common_kwargs = { - "dataset_root": self.dataset_root, - "path_manager": path_manager, - "frame_annotations_file": frame_file, - "sequence_annotations_file": sequence_file, - "subset_lists_file": subset_lists_file, - **getattr(self, f"dataset_{self.dataset_class_type}_args"), - } - - # This maps the common names of the dataset subsets ("train"/"val"/"test") - # to the names of the subsets in the CO3D dataset. - set_names_mapping = _get_co3d_set_names_mapping( - self.task_str, - self.test_on_train, - self.only_test_set, - ) - - # load the evaluation batches - batch_indices_path = os.path.join( - self.dataset_root, - self.category, - f"eval_batches_{self.task_str}.json", - ) - if path_manager is not None: - batch_indices_path = path_manager.get_local_path(batch_indices_path) - if not os.path.isfile(batch_indices_path): - # The batch indices file does not exist. - # Most probably the user has not specified the root folder. - raise ValueError( - f"Looking for batch indices in {batch_indices_path}. " - + "Please specify a correct dataset_root folder." - ) - - with open(batch_indices_path, "r") as f: - eval_batch_index = json.load(f) - restrict_sequence_name = self.restrict_sequence_name - - if self.task_str == "singlesequence": - if ( - self.test_restrict_sequence_id is None - or self.test_restrict_sequence_id < 0 - ): - raise ValueError( - "Please specify an integer id 'test_restrict_sequence_id'" - + " of the sequence considered for 'singlesequence'" - + " training and evaluation." - ) - if len(self.restrict_sequence_name) > 0: - raise ValueError( - "For the 'singlesequence' task, the restrict_sequence_name has" - " to be unset while test_restrict_sequence_id has to be set to an" - " integer defining the order of the evaluation sequence." - ) - # a sort-stable set() equivalent: - eval_batches_sequence_names = list( - {b[0][0]: None for b in eval_batch_index}.keys() - ) - eval_sequence_name = eval_batches_sequence_names[ - self.test_restrict_sequence_id - ] - eval_batch_index = [ - b for b in eval_batch_index if b[0][0] == eval_sequence_name - ] - # overwrite the restrict_sequence_name - restrict_sequence_name = [eval_sequence_name] - if len(restrict_sequence_name) > 0: - eval_batch_index = [ - b for b in eval_batch_index if b[0][0] in restrict_sequence_name - ] - - dataset_type: Type[JsonIndexDataset] = registry.get( - JsonIndexDataset, self.dataset_class_type - ) - expand_args_fields(dataset_type) - train_dataset = None - if not self.only_test_set: - train_dataset = dataset_type( - n_frames_per_sequence=self.n_frames_per_sequence, - subsets=set_names_mapping["train"], - pick_sequence=restrict_sequence_name, - **common_kwargs, - ) - if self.test_on_train: - assert train_dataset is not None - val_dataset = test_dataset = train_dataset - else: - val_dataset = dataset_type( - n_frames_per_sequence=-1, - subsets=set_names_mapping["val"], - pick_sequence=restrict_sequence_name, - **common_kwargs, - ) - test_dataset = dataset_type( - n_frames_per_sequence=-1, - subsets=set_names_mapping["test"], - pick_sequence=restrict_sequence_name, - eval_batch_index=eval_batch_index, - **common_kwargs, - ) - dataset_map = DatasetMap( - train=train_dataset, val=val_dataset, test=test_dataset - ) - - if self.assert_single_seq: - # check there's only one sequence in all datasets - sequence_names = { - sequence_name - for dset in dataset_map.iter_datasets() - for sequence_name in dset.sequence_names() - } - if len(sequence_names) > 1: - raise ValueError("Multiple sequences loaded but expected one") - - self.dataset_map = dataset_map - - def get_dataset_map(self) -> DatasetMap: - # pyre-ignore[16] - return self.dataset_map - - def get_all_train_cameras(self) -> Optional[CamerasBase]: - if self.task_str == "multisequence": - return None - - assert self.task_str == "singlesequence" - - # pyre-ignore[16] - train_dataset = self.dataset_map.train - assert isinstance(train_dataset, JsonIndexDataset) - return train_dataset.get_all_train_cameras() - - -def _get_co3d_set_names_mapping( - task_str: str, - test_on_train: bool, - only_test: bool, -) -> Dict[str, List[str]]: - """ - Returns the mapping of the common dataset subset names ("train"/"val"/"test") - to the names of the corresponding subsets in the CO3D dataset - ("test_known"/"test_unseen"/"train_known"/"train_unseen"). - - The keys returned will be - - train (if not only_test) - - val (if not test_on_train) - - test (if not test_on_train) - """ - single_seq = task_str == "singlesequence" - - if only_test: - set_names_mapping = {} - else: - set_names_mapping = { - "train": [ - (DATASET_TYPE_TEST if single_seq else DATASET_TYPE_TRAIN) - + "_" - + DATASET_TYPE_KNOWN - ] - } - if not test_on_train: - prefixes = [DATASET_TYPE_TEST] - if not single_seq: - prefixes.append(DATASET_TYPE_TRAIN) - set_names_mapping.update( - { - dset: [ - p + "_" + t - for p in prefixes - for t in [DATASET_TYPE_KNOWN, DATASET_TYPE_UNKNOWN] - ] - for dset in ["val", "test"] - } - ) - - return set_names_mapping diff --git a/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py b/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py deleted file mode 100644 index d8790d35bcc8f05f128730c9ac0ebc39e324854a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py +++ /dev/null @@ -1,477 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import copy -import json -import logging -import multiprocessing -import os -import warnings -from collections import defaultdict -from typing import Dict, List, Optional, Tuple, Type, Union - -import numpy as np -from iopath.common.file_io import PathManager - -from omegaconf import DictConfig -from pytorch3d.implicitron.dataset.dataset_map_provider import ( - DatasetMap, - DatasetMapProviderBase, - PathManagerFactory, -) -from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset -from pytorch3d.implicitron.tools.config import ( - expand_args_fields, - registry, - run_auto_creation, -) - -from pytorch3d.renderer.cameras import CamerasBase -from tqdm import tqdm - - -_CO3DV2_DATASET_ROOT: str = os.getenv("CO3DV2_DATASET_ROOT", "") - -# _NEED_CONTROL is a list of those elements of JsonIndexDataset which -# are not directly specified for it in the config but come from the -# DatasetMapProvider. -_NEED_CONTROL: Tuple[str, ...] = ( - "dataset_root", - "eval_batches", - "eval_batch_index", - "path_manager", - "subsets", - "frame_annotations_file", - "sequence_annotations_file", - "subset_lists_file", -) - -logger = logging.getLogger(__name__) - - -@registry.register -class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13] - """ - Generates the training, validation, and testing dataset objects for - a dataset laid out on disk like CO3Dv2, with annotations in gzipped json files. - - The dataset is organized in the filesystem as follows:: - - self.dataset_root - β”œβ”€β”€ - β”‚ β”œβ”€β”€ - β”‚ β”‚ β”œβ”€β”€ depth_masks - β”‚ β”‚ β”œβ”€β”€ depths - β”‚ β”‚ β”œβ”€β”€ images - β”‚ β”‚ β”œβ”€β”€ masks - β”‚ β”‚ └── pointcloud.ply - β”‚ β”œβ”€β”€ - β”‚ β”‚ β”œβ”€β”€ depth_masks - β”‚ β”‚ β”œβ”€β”€ depths - β”‚ β”‚ β”œβ”€β”€ images - β”‚ β”‚ β”œβ”€β”€ masks - β”‚ β”‚ └── pointcloud.ply - β”‚ β”œβ”€β”€ ... - β”‚ β”œβ”€β”€ - β”‚ β”œβ”€β”€ set_lists - β”‚ β”œβ”€β”€ set_lists_.json - β”‚ β”œβ”€β”€ set_lists_.json - β”‚ β”œβ”€β”€ ... - β”‚ β”œβ”€β”€ set_lists_.json - β”‚ β”œβ”€β”€ eval_batches - β”‚ β”‚ β”œβ”€β”€ eval_batches_.json - β”‚ β”‚ β”œβ”€β”€ eval_batches_.json - β”‚ β”‚ β”œβ”€β”€ ... - β”‚ β”‚ β”œβ”€β”€ eval_batches_.json - β”‚ β”œβ”€β”€ frame_annotations.jgz - β”‚ β”œβ”€β”€ sequence_annotations.jgz - β”œβ”€β”€ - β”œβ”€β”€ ... - β”œβ”€β”€ - - The dataset contains sequences named `` from `K` categories with - names ``. Each category comprises sequence folders - `/` containing the list of sequence images, depth maps, - foreground masks, and valid-depth masks `images`, `depths`, `masks`, and `depth_masks` - respectively. Furthermore, `//set_lists/` stores `M` - json files `set_lists_.json`, each describing a certain sequence subset. - - Users specify the loaded dataset subset by setting `self.subset_name` to one of the - available subset names ``. - - `frame_annotations.jgz` and `sequence_annotations.jgz` are gzipped json files containing - the list of all frames and sequences of the given category stored as lists of - `FrameAnnotation` and `SequenceAnnotation` objects respectivelly. - - Each `set_lists_.json` file contains the following dictionary:: - - { - "train": [ - (sequence_name: str, frame_number: int, image_path: str), - ... - ], - "val": [ - (sequence_name: str, frame_number: int, image_path: str), - ... - ], - "test": [ - (sequence_name: str, frame_number: int, image_path: str), - ... - ], - ] - - defining the list of frames (identified with their `sequence_name` and `frame_number`) - in the "train", "val", and "test" subsets of the dataset. - Note that `frame_number` can be obtained only from `frame_annotations.jgz` and - does not necesarrily correspond to the numeric suffix of the corresponding image - file name (e.g. a file `//images/frame00005.jpg` can - have its frame number set to `20`, not 5). - - Each `eval_batches_.json` file contains a list of evaluation examples - in the following form:: - - [ - [ # batch 1 - (sequence_name: str, frame_number: int, image_path: str), - ... - ], - [ # batch 1 - (sequence_name: str, frame_number: int, image_path: str), - ... - ], - ] - - Note that the evaluation examples always come from the `"test"` subset of the dataset. - (test frames can repeat across batches). - - Args: - category: Dataset categories to load expressed as a string of comma-separated - category names (e.g. `"apple,car,orange"`). - subset_name: The name of the dataset subset. For CO3Dv2, these include - e.g. "manyview_dev_0", "fewview_test", ... - dataset_root: The root folder of the dataset. - test_on_train: Construct validation and test datasets from - the training subset. - only_test_set: Load only the test set. Incompatible with `test_on_train`. - load_eval_batches: Load the file containing eval batches pointing to the - test dataset. - n_known_frames_for_test: Add a certain number of known frames to each - eval batch. Useful for evaluating models that require - source views as input (e.g. NeRF-WCE / PixelNeRF). - dataset_args: Specifies additional arguments to the - JsonIndexDataset constructor call. - path_manager_factory: (Optional) An object that generates an instance of - PathManager that can translate provided file paths. - path_manager_factory_class_type: The class type of `path_manager_factory`. - """ - - category: str - subset_name: str - dataset_root: str = _CO3DV2_DATASET_ROOT - - test_on_train: bool = False - only_test_set: bool = False - load_eval_batches: bool = True - num_load_workers: int = 4 - - n_known_frames_for_test: int = 0 - - dataset_class_type: str = "JsonIndexDataset" - dataset: JsonIndexDataset - - path_manager_factory: PathManagerFactory - path_manager_factory_class_type: str = "PathManagerFactory" - - def __post_init__(self): - super().__init__() - run_auto_creation(self) - - if self.only_test_set and self.test_on_train: - raise ValueError("Cannot have only_test_set and test_on_train") - - if "," in self.category: - # a comma-separated list of categories to load - categories = [c.strip() for c in self.category.split(",")] - logger.info(f"Loading a list of categories: {str(categories)}.") - with multiprocessing.Pool( - processes=min(self.num_load_workers, len(categories)) - ) as pool: - category_dataset_maps = list( - tqdm( - pool.imap(self._load_category, categories), - total=len(categories), - ) - ) - dataset_map = category_dataset_maps[0] - dataset_map.join(category_dataset_maps[1:]) - - else: - # one category to load - dataset_map = self._load_category(self.category) - - self.dataset_map = dataset_map - - def _load_category(self, category: str) -> DatasetMap: - - frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz") - sequence_file = os.path.join( - self.dataset_root, category, "sequence_annotations.jgz" - ) - - path_manager = self.path_manager_factory.get() - - if path_manager is not None: - path_managed_frame_file = path_manager.get_local_path(frame_file) - else: - path_managed_frame_file = frame_file - if not os.path.isfile(path_managed_frame_file): - # The frame_file does not exist. - # Most probably the user has not specified the root folder. - raise ValueError( - f"Looking for frame annotations in {path_managed_frame_file}." - + " Please specify a correct dataset_root folder." - + " Note: By default the root folder is taken from the" - + " CO3DV2_DATASET_ROOT environment variable." - ) - - # setup the common dataset arguments - common_dataset_kwargs = getattr(self, f"dataset_{self.dataset_class_type}_args") - common_dataset_kwargs = { - **common_dataset_kwargs, - "dataset_root": self.dataset_root, - "frame_annotations_file": frame_file, - "sequence_annotations_file": sequence_file, - "subsets": None, - "subset_lists_file": "", - "path_manager": path_manager, - } - - # get the used dataset type - dataset_type: Type[JsonIndexDataset] = registry.get( - JsonIndexDataset, self.dataset_class_type - ) - expand_args_fields(dataset_type) - - dataset = dataset_type(**common_dataset_kwargs) - - available_subset_names = self._get_available_subset_names(category) - logger.debug(f"Available subset names: {str(available_subset_names)}.") - if self.subset_name not in available_subset_names: - raise ValueError( - f"Unknown subset name {self.subset_name}." - + f" Choose one of available subsets: {str(available_subset_names)}." - ) - - # load the list of train/val/test frames - subset_mapping = self._load_annotation_json( - os.path.join(category, "set_lists", f"set_lists_{self.subset_name}.json") - ) - - # load the evaluation batches - if self.load_eval_batches: - eval_batch_index = self._load_annotation_json( - os.path.join( - category, - "eval_batches", - f"eval_batches_{self.subset_name}.json", - ) - ) - else: - eval_batch_index = None - - train_dataset = None - if not self.only_test_set: - # load the training set - logger.debug("Extracting train dataset.") - train_dataset = dataset.subset_from_frame_index(subset_mapping["train"]) - logger.info(f"Train dataset: {str(train_dataset)}") - - if self.test_on_train: - assert train_dataset is not None - val_dataset = test_dataset = train_dataset - else: - # load the val and test sets - logger.debug("Extracting val dataset.") - val_dataset = dataset.subset_from_frame_index(subset_mapping["val"]) - logger.info(f"Val dataset: {str(val_dataset)}") - logger.debug("Extracting test dataset.") - - if (self.n_known_frames_for_test > 0) and self.load_eval_batches: - # extend the test subset mapping and the dataset with additional - # known views from the train dataset - ( - eval_batch_index, - subset_mapping["test"], - ) = self._extend_test_data_with_known_views( - subset_mapping, - eval_batch_index, - ) - - test_dataset = dataset.subset_from_frame_index(subset_mapping["test"]) - logger.info(f"Test dataset: {str(test_dataset)}") - if self.load_eval_batches: - # load the eval batches - logger.debug("Extracting eval batches.") - try: - test_dataset.eval_batches = ( - test_dataset.seq_frame_index_to_dataset_index( - eval_batch_index, - ) - ) - except IndexError: - warnings.warn( - "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n" - + "Some eval batches are missing from the test dataset.\n" - + "The evaluation results will be incomparable to the\n" - + "evaluation results calculated on the original dataset.\n" - + "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" - ) - test_dataset.eval_batches = ( - test_dataset.seq_frame_index_to_dataset_index( - eval_batch_index, - allow_missing_indices=True, - remove_missing_indices=True, - ) - ) - logger.info(f"# eval batches: {len(test_dataset.eval_batches)}") - - return DatasetMap(train=train_dataset, val=val_dataset, test=test_dataset) - - @classmethod - def dataset_tweak_args(cls, type, args: DictConfig) -> None: - """ - Called by get_default_args(JsonIndexDatasetMapProviderV2) to - not expose certain fields of each dataset class. - """ - for key in _NEED_CONTROL: - del args[key] - - def create_dataset(self): - # The dataset object is created inside `self.get_dataset_map` - pass - - def get_dataset_map(self) -> DatasetMap: - return self.dataset_map # pyre-ignore [16] - - def get_category_to_subset_name_list(self) -> Dict[str, List[str]]: - """ - Returns a global dataset index containing the available subset names per category - as a dictionary. - - Returns: - category_to_subset_name_list: A dictionary containing subset names available - per category of the following form:: - - { - category_0: [category_0_subset_name_0, category_0_subset_name_1, ...], - category_1: [category_1_subset_name_0, category_1_subset_name_1, ...], - ... - } - - """ - category_to_subset_name_list_json = "category_to_subset_name_list.json" - category_to_subset_name_list = self._load_annotation_json( - category_to_subset_name_list_json - ) - return category_to_subset_name_list - - def get_all_train_cameras(self) -> Optional[CamerasBase]: - # pyre-ignore[16] - train_dataset = self.dataset_map.train - assert isinstance(train_dataset, JsonIndexDataset) - return train_dataset.get_all_train_cameras() - - def _load_annotation_json(self, json_filename: str): - full_path = os.path.join( - self.dataset_root, - json_filename, - ) - logger.info(f"Loading frame index json from {full_path}.") - path_manager = self.path_manager_factory.get() - if path_manager is not None: - full_path = path_manager.get_local_path(full_path) - if not os.path.isfile(full_path): - # The batch indices file does not exist. - # Most probably the user has not specified the root folder. - raise ValueError( - f"Looking for dataset json file in {full_path}. " - + "Please specify a correct dataset_root folder." - ) - with open(full_path, "r") as f: - data = json.load(f) - return data - - def _get_available_subset_names(self, category: str): - return get_available_subset_names( - self.dataset_root, - category, - path_manager=self.path_manager_factory.get(), - ) - - def _extend_test_data_with_known_views( - self, - subset_mapping: Dict[str, List[Union[Tuple[str, int], Tuple[str, int, str]]]], - eval_batch_index: List[List[Union[Tuple[str, int, str], Tuple[str, int]]]], - ): - # convert the train subset mapping to a dict: - # sequence_to_train_frames: {sequence_name: frame_index} - sequence_to_train_frames = defaultdict(list) - for frame_entry in subset_mapping["train"]: - sequence_name = frame_entry[0] - sequence_to_train_frames[sequence_name].append(frame_entry) - sequence_to_train_frames = dict(sequence_to_train_frames) - test_subset_mapping_set = {tuple(s) for s in subset_mapping["test"]} - - # extend the eval batches / subset mapping with the additional examples - eval_batch_index_out = copy.deepcopy(eval_batch_index) - generator = np.random.default_rng(seed=0) - for batch in eval_batch_index_out: - sequence_name = batch[0][0] - sequence_known_entries = sequence_to_train_frames[sequence_name] - idx_to_add = generator.permutation(len(sequence_known_entries))[ - : self.n_known_frames_for_test - ] - entries_to_add = [sequence_known_entries[a] for a in idx_to_add] - assert all(e in subset_mapping["train"] for e in entries_to_add) - - # extend the eval batch with the known views - batch.extend(entries_to_add) - - # also add these new entries to the test subset mapping - test_subset_mapping_set.update(tuple(e) for e in entries_to_add) - - return eval_batch_index_out, list(test_subset_mapping_set) - - -def get_available_subset_names( - dataset_root: str, - category: str, - path_manager: Optional[PathManager] = None, -) -> List[str]: - """ - Get the available subset names for a given category folder inside a root dataset - folder `dataset_root`. - """ - category_dir = os.path.join(dataset_root, category) - category_dir_exists = ( - (path_manager is not None) and path_manager.isdir(category_dir) - ) or os.path.isdir(category_dir) - if not category_dir_exists: - raise ValueError( - f"Looking for dataset files in {category_dir}. " - + "Please specify a correct dataset_root folder." - ) - - set_list_dir = os.path.join(category_dir, "set_lists") - set_list_jsons = (os.listdir if path_manager is None else path_manager.ls)( - set_list_dir - ) - - return [ - json_file.replace("set_lists_", "").replace(".json", "") - for json_file in set_list_jsons - ] diff --git a/pytorch3d/pytorch3d/implicitron/dataset/llff_dataset_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/llff_dataset_map_provider.py deleted file mode 100644 index 8a4993e1391e284c83d5d82714531521022e32e8..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/llff_dataset_map_provider.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import numpy as np -import torch -from pytorch3d.implicitron.tools.config import registry - -from .load_llff import load_llff_data - -from .single_sequence_dataset import ( - _interpret_blender_cameras, - SingleSceneDatasetMapProviderBase, -) - - -@registry.register -class LlffDatasetMapProvider(SingleSceneDatasetMapProviderBase): - """ - Provides data for one scene from the LLFF dataset. - - Members: - base_dir: directory holding the data for the scene. - object_name: The name of the scene (e.g. "fern"). This is just used as a label. - It will typically be equal to the name of the directory self.base_dir. - path_manager_factory: Creates path manager which may be used for - interpreting paths. - n_known_frames_for_test: If set, training frames are included in the val - and test datasets, and this many random training frames are added to - each test batch. If not set, test batches each contain just a single - testing frame. - downscale_factor: determines image sizes. - """ - - downscale_factor: int = 4 - - def _load_data(self) -> None: - path_manager = self.path_manager_factory.get() - images, poses, _ = load_llff_data( - self.base_dir, factor=self.downscale_factor, path_manager=path_manager - ) - hwf = poses[0, :3, -1] - poses = poses[:, :3, :4] - - llffhold = 8 - i_test = np.arange(images.shape[0])[::llffhold] - i_test_index = set(i_test.tolist()) - i_train = np.array( - [i for i in np.arange(images.shape[0]) if i not in i_test_index] - ) - i_split = (i_train, i_test, i_test) - H, W, focal = hwf - focal_ndc = 2 * focal / min(H, W) - images = torch.from_numpy(images).permute(0, 3, 1, 2) - poses = torch.from_numpy(poses) - - # pyre-ignore[16] - self.poses = _interpret_blender_cameras(poses, focal_ndc) - # pyre-ignore[16] - self.images = images - # pyre-ignore[16] - self.fg_probabilities = None - # pyre-ignore[16] - self.i_split = i_split diff --git a/pytorch3d/pytorch3d/implicitron/dataset/load_blender.py b/pytorch3d/pytorch3d/implicitron/dataset/load_blender.py deleted file mode 100644 index 42b9cb530baf2981407bfc7d7a914cfcfccecc18..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/load_blender.py +++ /dev/null @@ -1,141 +0,0 @@ -# @lint-ignore-every LICENSELINT -# Adapted from https://github.com/bmild/nerf/blob/master/load_blender.py -# Copyright (c) 2020 bmild -import json -import os - -import numpy as np -import torch -from PIL import Image - - -def translate_by_t_along_z(t): - tform = np.eye(4).astype(np.float32) - tform[2][3] = t - return tform - - -def rotate_by_phi_along_x(phi): - tform = np.eye(4).astype(np.float32) - tform[1, 1] = tform[2, 2] = np.cos(phi) - tform[1, 2] = -np.sin(phi) - tform[2, 1] = -tform[1, 2] - return tform - - -def rotate_by_theta_along_y(theta): - tform = np.eye(4).astype(np.float32) - tform[0, 0] = tform[2, 2] = np.cos(theta) - tform[0, 2] = -np.sin(theta) - tform[2, 0] = -tform[0, 2] - return tform - - -def pose_spherical(theta, phi, radius): - c2w = translate_by_t_along_z(radius) - c2w = rotate_by_phi_along_x(phi / 180.0 * np.pi) @ c2w - c2w = rotate_by_theta_along_y(theta / 180 * np.pi) @ c2w - c2w = np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) @ c2w - return c2w - - -def _local_path(path_manager, path): - if path_manager is None: - return path - return path_manager.get_local_path(path) - - -def load_blender_data( - basedir, - half_res=False, - testskip=1, - debug=False, - path_manager=None, - focal_length_in_screen_space=False, -): - splits = ["train", "val", "test"] - metas = {} - for s in splits: - path = os.path.join(basedir, f"transforms_{s}.json") - with open(_local_path(path_manager, path)) as fp: - metas[s] = json.load(fp) - - all_imgs = [] - all_poses = [] - counts = [0] - for s in splits: - meta = metas[s] - imgs = [] - poses = [] - if s == "train" or testskip == 0: - skip = 1 - else: - skip = testskip - - for frame in meta["frames"][::skip]: - fname = os.path.join(basedir, frame["file_path"] + ".png") - imgs.append(np.array(Image.open(_local_path(path_manager, fname)))) - poses.append(np.array(frame["transform_matrix"])) - imgs = (np.array(imgs) / 255.0).astype(np.float32) - poses = np.array(poses).astype(np.float32) - counts.append(counts[-1] + imgs.shape[0]) - all_imgs.append(imgs) - all_poses.append(poses) - - i_split = [np.arange(counts[i], counts[i + 1]) for i in range(3)] - - imgs = np.concatenate(all_imgs, 0) - poses = np.concatenate(all_poses, 0) - - H, W = imgs[0].shape[:2] - camera_angle_x = float(meta["camera_angle_x"]) - if focal_length_in_screen_space: - focal = 0.5 * W / np.tan(0.5 * camera_angle_x) - else: - focal = 1 / np.tan(0.5 * camera_angle_x) - - render_poses = torch.stack( - [ - torch.from_numpy(pose_spherical(angle, -30.0, 4.0)) - for angle in np.linspace(-180, 180, 40 + 1)[:-1] - ], - 0, - ) - - # In debug mode, return extremely tiny images - if debug: - import cv2 - - H = H // 32 - W = W // 32 - if focal_length_in_screen_space: - focal = focal / 32.0 - imgs = [ - torch.from_numpy( - cv2.resize(imgs[i], dsize=(25, 25), interpolation=cv2.INTER_AREA) - ) - for i in range(imgs.shape[0]) - ] - imgs = torch.stack(imgs, 0) - poses = torch.from_numpy(poses) - return imgs, poses, render_poses, [H, W, focal], i_split - - if half_res: - import cv2 - - # TODO: resize images using INTER_AREA (cv2) - H = H // 2 - W = W // 2 - if focal_length_in_screen_space: - focal = focal / 2.0 - imgs = [ - torch.from_numpy( - cv2.resize(imgs[i], dsize=(400, 400), interpolation=cv2.INTER_AREA) - ) - for i in range(imgs.shape[0]) - ] - imgs = torch.stack(imgs, 0) - - poses = torch.from_numpy(poses) - - return imgs, poses, render_poses, [H, W, focal], i_split diff --git a/pytorch3d/pytorch3d/implicitron/dataset/load_llff.py b/pytorch3d/pytorch3d/implicitron/dataset/load_llff.py deleted file mode 100644 index d19337a9151916c01951f45bcb3b10c2e59a1873..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/load_llff.py +++ /dev/null @@ -1,339 +0,0 @@ -# @lint-ignore-every LICENSELINT -# Adapted from https://github.com/bmild/nerf/blob/master/load_llff.py -# Copyright (c) 2020 bmild -import logging -import os -import warnings - -import numpy as np - -from PIL import Image - - -# Slightly modified version of LLFF data loading code -# see https://github.com/Fyusion/LLFF for original - -logger = logging.getLogger(__name__) - - -def _minify(basedir, path_manager, factors=(), resolutions=()): - needtoload = False - for r in factors: - imgdir = os.path.join(basedir, "images_{}".format(r)) - if not _exists(path_manager, imgdir): - needtoload = True - for r in resolutions: - imgdir = os.path.join(basedir, "images_{}x{}".format(r[1], r[0])) - if not _exists(path_manager, imgdir): - needtoload = True - if not needtoload: - return - assert path_manager is None - - from subprocess import check_output - - imgdir = os.path.join(basedir, "images") - imgs = [os.path.join(imgdir, f) for f in sorted(_ls(path_manager, imgdir))] - imgs = [f for f in imgs if f.endswith("JPG", "jpg", "png", "jpeg", "PNG")] - imgdir_orig = imgdir - - wd = os.getcwd() - - for r in factors + resolutions: - if isinstance(r, int): - name = "images_{}".format(r) - resizearg = "{}%".format(100.0 / r) - else: - name = "images_{}x{}".format(r[1], r[0]) - resizearg = "{}x{}".format(r[1], r[0]) - imgdir = os.path.join(basedir, name) - if os.path.exists(imgdir): - continue - - logger.info(f"Minifying {r}, {basedir}") - - os.makedirs(imgdir) - check_output("cp {}/* {}".format(imgdir_orig, imgdir), shell=True) - - ext = imgs[0].split(".")[-1] - args = " ".join( - ["mogrify", "-resize", resizearg, "-format", "png", "*.{}".format(ext)] - ) - logger.info(args) - os.chdir(imgdir) - check_output(args, shell=True) - os.chdir(wd) - - if ext != "png": - check_output("rm {}/*.{}".format(imgdir, ext), shell=True) - logger.info("Removed duplicates") - logger.info("Done") - - -def _load_data( - basedir, factor=None, width=None, height=None, load_imgs=True, path_manager=None -): - - poses_arr = np.load( - _local_path(path_manager, os.path.join(basedir, "poses_bounds.npy")) - ) - poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1, 2, 0]) - bds = poses_arr[:, -2:].transpose([1, 0]) - - img0 = [ - os.path.join(basedir, "images", f) - for f in sorted(_ls(path_manager, os.path.join(basedir, "images"))) - if f.endswith("JPG") or f.endswith("jpg") or f.endswith("png") - ][0] - - def imread(f): - return np.array(Image.open(f)) - - sh = imread(_local_path(path_manager, img0)).shape - - sfx = "" - - if factor is not None: - sfx = "_{}".format(factor) - _minify(basedir, path_manager, factors=[factor]) - factor = factor - elif height is not None: - factor = sh[0] / float(height) - width = int(sh[1] / factor) - _minify(basedir, path_manager, resolutions=[[height, width]]) - sfx = "_{}x{}".format(width, height) - elif width is not None: - factor = sh[1] / float(width) - height = int(sh[0] / factor) - _minify(basedir, path_manager, resolutions=[[height, width]]) - sfx = "_{}x{}".format(width, height) - else: - factor = 1 - - imgdir = os.path.join(basedir, "images" + sfx) - if not _exists(path_manager, imgdir): - raise ValueError(f"{imgdir} does not exist, returning") - - imgfiles = [ - _local_path(path_manager, os.path.join(imgdir, f)) - for f in sorted(_ls(path_manager, imgdir)) - if f.endswith("JPG") or f.endswith("jpg") or f.endswith("png") - ] - if poses.shape[-1] != len(imgfiles): - raise ValueError( - "Mismatch between imgs {} and poses {} !!!!".format( - len(imgfiles), poses.shape[-1] - ) - ) - - sh = imread(imgfiles[0]).shape - poses[:2, 4, :] = np.array(sh[:2]).reshape([2, 1]) - poses[2, 4, :] = poses[2, 4, :] * 1.0 / factor - - if not load_imgs: - return poses, bds - - imgs = imgs = [imread(f)[..., :3] / 255.0 for f in imgfiles] - imgs = np.stack(imgs, -1) - - logger.info(f"Loaded image data, shape {imgs.shape}") - return poses, bds, imgs - - -def normalize(x): - denom = np.linalg.norm(x) - if denom < 0.001: - warnings.warn("unsafe normalize()") - return x / denom - - -def viewmatrix(z, up, pos): - vec2 = normalize(z) - vec1_avg = up - vec0 = normalize(np.cross(vec1_avg, vec2)) - vec1 = normalize(np.cross(vec2, vec0)) - m = np.stack([vec0, vec1, vec2, pos], 1) - return m - - -def ptstocam(pts, c2w): - tt = np.matmul(c2w[:3, :3].T, (pts - c2w[:3, 3])[..., np.newaxis])[..., 0] - return tt - - -def poses_avg(poses): - - hwf = poses[0, :3, -1:] - - center = poses[:, :3, 3].mean(0) - vec2 = normalize(poses[:, :3, 2].sum(0)) - up = poses[:, :3, 1].sum(0) - c2w = np.concatenate([viewmatrix(vec2, up, center), hwf], 1) - - return c2w - - -def render_path_spiral(c2w, up, rads, focal, zdelta, zrate, rots, N): - render_poses = [] - rads = np.array(list(rads) + [1.0]) - hwf = c2w[:, 4:5] - - for theta in np.linspace(0.0, 2.0 * np.pi * rots, N + 1)[:-1]: - c = np.dot( - c2w[:3, :4], - np.array([np.cos(theta), -np.sin(theta), -np.sin(theta * zrate), 1.0]) - * rads, - ) - z = normalize(c - np.dot(c2w[:3, :4], np.array([0, 0, -focal, 1.0]))) - render_poses.append(np.concatenate([viewmatrix(z, up, c), hwf], 1)) - return render_poses - - -def recenter_poses(poses): - - poses_ = poses + 0 - bottom = np.reshape([0, 0, 0, 1.0], [1, 4]) - c2w = poses_avg(poses) - c2w = np.concatenate([c2w[:3, :4], bottom], -2) - bottom = np.tile(np.reshape(bottom, [1, 1, 4]), [poses.shape[0], 1, 1]) - poses = np.concatenate([poses[:, :3, :4], bottom], -2) - - poses = np.linalg.inv(c2w) @ poses - poses_[:, :3, :4] = poses[:, :3, :4] - poses = poses_ - return poses - - -def spherify_poses(poses, bds): - def add_row_to_homogenize_transform(p): - r"""Add the last row to homogenize 3 x 4 transformation matrices.""" - return np.concatenate( - [p, np.tile(np.reshape(np.eye(4)[-1, :], [1, 1, 4]), [p.shape[0], 1, 1])], 1 - ) - - # p34_to_44 = lambda p: np.concatenate( - # [p, np.tile(np.reshape(np.eye(4)[-1, :], [1, 1, 4]), [p.shape[0], 1, 1])], 1 - # ) - - p34_to_44 = add_row_to_homogenize_transform - - rays_d = poses[:, :3, 2:3] - rays_o = poses[:, :3, 3:4] - - def min_line_dist(rays_o, rays_d): - A_i = np.eye(3) - rays_d * np.transpose(rays_d, [0, 2, 1]) - b_i = -A_i @ rays_o - pt_mindist = np.squeeze( - -np.linalg.inv((np.transpose(A_i, [0, 2, 1]) @ A_i).mean(0)) @ (b_i).mean(0) - ) - return pt_mindist - - pt_mindist = min_line_dist(rays_o, rays_d) - - center = pt_mindist - up = (poses[:, :3, 3] - center).mean(0) - - vec0 = normalize(up) - vec1 = normalize(np.cross([0.1, 0.2, 0.3], vec0)) - vec2 = normalize(np.cross(vec0, vec1)) - pos = center - c2w = np.stack([vec1, vec2, vec0, pos], 1) - - poses_reset = np.linalg.inv(p34_to_44(c2w[None])) @ p34_to_44(poses[:, :3, :4]) - - rad = np.sqrt(np.mean(np.sum(np.square(poses_reset[:, :3, 3]), -1))) - - sc = 1.0 / rad - poses_reset[:, :3, 3] *= sc - bds *= sc - rad *= sc - - centroid = np.mean(poses_reset[:, :3, 3], 0) - zh = centroid[2] - radcircle = np.sqrt(rad**2 - zh**2) - new_poses = [] - - for th in np.linspace(0.0, 2.0 * np.pi, 120): - - camorigin = np.array([radcircle * np.cos(th), radcircle * np.sin(th), zh]) - up = np.array([0, 0, -1.0]) - - vec2 = normalize(camorigin) - vec0 = normalize(np.cross(vec2, up)) - vec1 = normalize(np.cross(vec2, vec0)) - pos = camorigin - p = np.stack([vec0, vec1, vec2, pos], 1) - - new_poses.append(p) - - new_poses = np.stack(new_poses, 0) - - new_poses = np.concatenate( - [new_poses, np.broadcast_to(poses[0, :3, -1:], new_poses[:, :3, -1:].shape)], -1 - ) - poses_reset = np.concatenate( - [ - poses_reset[:, :3, :4], - np.broadcast_to(poses[0, :3, -1:], poses_reset[:, :3, -1:].shape), - ], - -1, - ) - - return poses_reset, new_poses, bds - - -def _local_path(path_manager, path): - if path_manager is None: - return path - return path_manager.get_local_path(path) - - -def _ls(path_manager, path): - if path_manager is None: - return os.listdir(path) - return path_manager.ls(path) - - -def _exists(path_manager, path): - if path_manager is None: - return os.path.exists(path) - return path_manager.exists(path) - - -def load_llff_data( - basedir, - factor=8, - recenter=True, - bd_factor=0.75, - spherify=False, - path_zflat=False, - path_manager=None, -): - - poses, bds, imgs = _load_data( - basedir, factor=factor, path_manager=path_manager - ) # factor=8 downsamples original imgs by 8x - logger.info(f"Loaded {basedir}, {bds.min()}, {bds.max()}") - - # Correct rotation matrix ordering and move variable dim to axis 0 - poses = np.concatenate([poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1) - poses = np.moveaxis(poses, -1, 0).astype(np.float32) - imgs = np.moveaxis(imgs, -1, 0).astype(np.float32) - images = imgs - bds = np.moveaxis(bds, -1, 0).astype(np.float32) - - # Rescale if bd_factor is provided - sc = 1.0 if bd_factor is None else 1.0 / (bds.min() * bd_factor) - poses[:, :3, 3] *= sc - bds *= sc - - if recenter: - poses = recenter_poses(poses) - - if spherify: - poses, render_poses, bds = spherify_poses(poses, bds) - - images = images.astype(np.float32) - poses = poses.astype(np.float32) - - return images, poses, bds diff --git a/pytorch3d/pytorch3d/implicitron/dataset/orm_types.py b/pytorch3d/pytorch3d/implicitron/dataset/orm_types.py deleted file mode 100644 index 2e916021a9a80d48ae8a9741694ca8f5bce38c56..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/orm_types.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# This functionality requires SQLAlchemy 2.0 or later. - -import math -import struct -from typing import Optional, Tuple - -import numpy as np - -from pytorch3d.implicitron.dataset.types import ( - DepthAnnotation, - ImageAnnotation, - MaskAnnotation, - PointCloudAnnotation, - VideoAnnotation, - ViewpointAnnotation, -) - -from sqlalchemy import LargeBinary -from sqlalchemy.orm import ( - composite, - DeclarativeBase, - Mapped, - mapped_column, - MappedAsDataclass, -) -from sqlalchemy.types import TypeDecorator - - -# these produce policies to serialize structured types to blobs -def ArrayTypeFactory(shape=None): - if shape is None: - - class VariableShapeNumpyArrayType(TypeDecorator): - impl = LargeBinary - - def process_bind_param(self, value, dialect): - if value is None: - return None - - ndim_bytes = np.int32(value.ndim).tobytes() - shape_bytes = np.array(value.shape, dtype=np.int64).tobytes() - value_bytes = value.astype(np.float32).tobytes() - return ndim_bytes + shape_bytes + value_bytes - - def process_result_value(self, value, dialect): - if value is None: - return None - - ndim = np.frombuffer(value[:4], dtype=np.int32)[0] - value_start = 4 + 8 * ndim - shape = np.frombuffer(value[4:value_start], dtype=np.int64) - assert shape.shape == (ndim,) - return np.frombuffer(value[value_start:], dtype=np.float32).reshape( - shape - ) - - return VariableShapeNumpyArrayType - - class NumpyArrayType(TypeDecorator): - impl = LargeBinary - - def process_bind_param(self, value, dialect): - if value is not None: - if value.shape != shape: - raise ValueError(f"Passed an array of wrong shape: {value.shape}") - return value.astype(np.float32).tobytes() - return None - - def process_result_value(self, value, dialect): - if value is not None: - return np.frombuffer(value, dtype=np.float32).reshape(shape) - return None - - return NumpyArrayType - - -def TupleTypeFactory(dtype=float, shape: Tuple[int, ...] = (2,)): - format_symbol = { - float: "f", # float32 - int: "i", # int32 - }[dtype] - - class TupleType(TypeDecorator): - impl = LargeBinary - _format = format_symbol * math.prod(shape) - - def process_bind_param(self, value, _): - if value is None: - return None - - if len(shape) > 1: - value = np.array(value, dtype=dtype).reshape(-1) - - return struct.pack(TupleType._format, *value) - - def process_result_value(self, value, _): - if value is None: - return None - - loaded = struct.unpack(TupleType._format, value) - if len(shape) > 1: - loaded = _rec_totuple( - np.array(loaded, dtype=dtype).reshape(shape).tolist() - ) - - return loaded - - return TupleType - - -def _rec_totuple(t): - if isinstance(t, list): - return tuple(_rec_totuple(x) for x in t) - - return t - - -class Base(MappedAsDataclass, DeclarativeBase): - """subclasses will be converted to dataclasses""" - - -class SqlFrameAnnotation(Base): - __tablename__ = "frame_annots" - - sequence_name: Mapped[str] = mapped_column(primary_key=True) - frame_number: Mapped[int] = mapped_column(primary_key=True) - frame_timestamp: Mapped[float] = mapped_column(index=True) - - image: Mapped[ImageAnnotation] = composite( - mapped_column("_image_path"), - mapped_column("_image_size", TupleTypeFactory(int)), - ) - - depth: Mapped[DepthAnnotation] = composite( - mapped_column("_depth_path", nullable=True), - mapped_column("_depth_scale_adjustment", nullable=True), - mapped_column("_depth_mask_path", nullable=True), - ) - - mask: Mapped[MaskAnnotation] = composite( - mapped_column("_mask_path", nullable=True), - mapped_column("_mask_mass", index=True, nullable=True), - mapped_column( - "_mask_bounding_box_xywh", - TupleTypeFactory(float, shape=(4,)), - nullable=True, - ), - ) - - viewpoint: Mapped[ViewpointAnnotation] = composite( - mapped_column( - "_viewpoint_R", TupleTypeFactory(float, shape=(3, 3)), nullable=True - ), - mapped_column( - "_viewpoint_T", TupleTypeFactory(float, shape=(3,)), nullable=True - ), - mapped_column( - "_viewpoint_focal_length", TupleTypeFactory(float), nullable=True - ), - mapped_column( - "_viewpoint_principal_point", TupleTypeFactory(float), nullable=True - ), - mapped_column("_viewpoint_intrinsics_format", nullable=True), - ) - - -class SqlSequenceAnnotation(Base): - __tablename__ = "sequence_annots" - - sequence_name: Mapped[str] = mapped_column(primary_key=True) - category: Mapped[str] = mapped_column(index=True) - - video: Mapped[VideoAnnotation] = composite( - mapped_column("_video_path", nullable=True), - mapped_column("_video_length", nullable=True), - ) - point_cloud: Mapped[PointCloudAnnotation] = composite( - mapped_column("_point_cloud_path", nullable=True), - mapped_column("_point_cloud_quality_score", nullable=True), - mapped_column("_point_cloud_n_points", nullable=True), - ) - # the bigger the better - viewpoint_quality_score: Mapped[Optional[float]] = mapped_column() diff --git a/pytorch3d/pytorch3d/implicitron/dataset/rendered_mesh_dataset_map_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/rendered_mesh_dataset_map_provider.py deleted file mode 100644 index 3ce99fb5d9ef4600f5f065b015626cd6e56668c1..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/rendered_mesh_dataset_map_provider.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from os.path import dirname, join, realpath -from typing import Optional, Tuple - -import torch -from pytorch3d.implicitron.tools.config import registry, run_auto_creation -from pytorch3d.io import IO -from pytorch3d.renderer import ( - AmbientLights, - BlendParams, - CamerasBase, - FoVPerspectiveCameras, - HardPhongShader, - look_at_view_transform, - MeshRasterizer, - MeshRendererWithFragments, - PointLights, - RasterizationSettings, -) -from pytorch3d.structures.meshes import Meshes - -from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory -from .single_sequence_dataset import SingleSceneDataset -from .utils import DATASET_TYPE_KNOWN - - -@registry.register -class RenderedMeshDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13] - """ - A simple single-scene dataset based on PyTorch3D renders of a mesh. - Provides `num_views` renders of the mesh as train, with no val - and test. The renders are generated from viewpoints sampled at uniformly - distributed azimuth intervals. The elevation is kept constant so that the - camera's vertical position coincides with the equator. - - By default, uses Keenan Crane's cow model, and the camera locations are - set to make sense for that. - - Although the rendering used to generate this dataset will use a GPU - if one is available, the data it produces is on the CPU just like - the data returned by implicitron's other dataset map providers. - This is because both datasets and models can be large, so implicitron's - training loop expects data on the CPU and only moves - what it needs to the device. - - For a more detailed explanation of this code, please refer to the - docs/tutorials/fit_textured_mesh.ipynb notebook. - - Members: - num_views: The number of generated renders. - data_file: The folder that contains the mesh file. By default, finds - the cow mesh in the same repo as this code. - azimuth_range: number of degrees on each side of the start position to - take samples - distance: distance from camera centres to the origin. - resolution: the common height and width of the output images. - use_point_light: whether to use a particular point light as opposed - to ambient white. - gpu_idx: which gpu to use for rendering the mesh. - path_manager_factory: (Optional) An object that generates an instance of - PathManager that can translate provided file paths. - path_manager_factory_class_type: The class type of `path_manager_factory`. - """ - - num_views: int = 40 - data_file: Optional[str] = None - azimuth_range: float = 180 - distance: float = 2.7 - resolution: int = 128 - use_point_light: bool = True - gpu_idx: Optional[int] = 0 - path_manager_factory: PathManagerFactory - path_manager_factory_class_type: str = "PathManagerFactory" - - def get_dataset_map(self) -> DatasetMap: - # pyre-ignore[16] - return DatasetMap(train=self.train_dataset, val=None, test=None) - - def get_all_train_cameras(self) -> CamerasBase: - # pyre-ignore[16] - return self.poses - - def __post_init__(self) -> None: - super().__init__() - run_auto_creation(self) - if torch.cuda.is_available() and self.gpu_idx is not None: - device = torch.device(f"cuda:{self.gpu_idx}") - else: - device = torch.device("cpu") - if self.data_file is None: - data_file = join( - dirname(dirname(dirname(dirname(realpath(__file__))))), - "docs", - "tutorials", - "data", - "cow_mesh", - "cow.obj", - ) - else: - data_file = self.data_file - io = IO(path_manager=self.path_manager_factory.get()) - mesh = io.load_mesh(data_file, device=device) - poses, images, masks = _generate_cow_renders( - num_views=self.num_views, - mesh=mesh, - azimuth_range=self.azimuth_range, - distance=self.distance, - resolution=self.resolution, - device=device, - use_point_light=self.use_point_light, - ) - # pyre-ignore[16] - self.poses = poses.cpu() - # pyre-ignore[16] - self.train_dataset = SingleSceneDataset( # pyre-ignore[28] - object_name="cow", - images=list(images.permute(0, 3, 1, 2).cpu()), - fg_probabilities=list(masks[:, None].cpu()), - poses=[self.poses[i] for i in range(len(poses))], - frame_types=[DATASET_TYPE_KNOWN] * len(poses), - eval_batches=None, - ) - - -@torch.no_grad() -def _generate_cow_renders( - *, - num_views: int, - mesh: Meshes, - azimuth_range: float, - distance: float, - resolution: int, - device: torch.device, - use_point_light: bool, -) -> Tuple[CamerasBase, torch.Tensor, torch.Tensor]: - """ - Returns: - cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the - images are rendered. - images: A tensor of shape `(num_views, height, width, 3)` containing - the rendered images. - silhouettes: A tensor of shape `(num_views, height, width)` containing - the rendered silhouettes. - """ - - # Load obj file - - # We scale normalize and center the target mesh to fit in a sphere of radius 1 - # centered at (0,0,0). (scale, center) will be used to bring the predicted mesh - # to its original center and scale. Note that normalizing the target mesh, - # speeds up the optimization but is not necessary! - verts = mesh.verts_packed() - N = verts.shape[0] - center = verts.mean(0) - scale = max((verts - center).abs().max(0)[0]) - mesh.offset_verts_(-(center.expand(N, 3))) - mesh.scale_verts_((1.0 / float(scale))) - - # Get a batch of viewing angles. - elev = torch.linspace(0, 0, num_views) # keep constant - azim = torch.linspace(-azimuth_range, azimuth_range, num_views) + 180.0 - - # Place a point light in front of the object. As mentioned above, the front of - # the cow is facing the -z direction. - if use_point_light: - lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]]) - else: - lights = AmbientLights(device=device) - - # Initialize a perspective camera that represents a batch of different - # viewing angles. All the cameras helper methods support mixed type inputs and - # broadcasting. So we can view the camera from a fixed distance, and - # then specify elevation and azimuth angles for each viewpoint as tensors. - R, T = look_at_view_transform(dist=distance, elev=elev, azim=azim) - cameras = FoVPerspectiveCameras(device=device, R=R, T=T) - - # Define the settings for rasterization and shading. - # As we are rendering images for visualization - # purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to - # rasterize_meshes.py for explanations of these parameters. We also leave - # bin_size and max_faces_per_bin to their default values of None, which sets - # their values using heuristics and ensures that the faster coarse-to-fine - # rasterization method is used. Refer to docs/notes/renderer.md for an - # explanation of the difference between naive and coarse-to-fine rasterization. - raster_settings = RasterizationSettings( - image_size=resolution, blur_radius=0.0, faces_per_pixel=1 - ) - - # Create a Phong renderer by composing a rasterizer and a shader. The textured - # Phong shader will interpolate the texture uv coordinates for each vertex, - # sample from a texture image and apply the Phong lighting model - blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.0, 0.0, 0.0)) - rasterizer_type = MeshRasterizer - renderer = MeshRendererWithFragments( - rasterizer=rasterizer_type(cameras=cameras, raster_settings=raster_settings), - shader=HardPhongShader( - device=device, cameras=cameras, lights=lights, blend_params=blend_params - ), - ) - - # Create a batch of meshes by repeating the cow mesh and associated textures. - # Meshes has a useful `extend` method which allows us do this very easily. - # This also extends the textures. - meshes = mesh.extend(num_views) - - # Render the cow mesh from each viewing angle - target_images, fragments = renderer(meshes, cameras=cameras, lights=lights) - silhouette_binary = (fragments.pix_to_face[..., 0] >= 0).float() - - return cameras, target_images[..., :3], silhouette_binary diff --git a/pytorch3d/pytorch3d/implicitron/dataset/scene_batch_sampler.py b/pytorch3d/pytorch3d/implicitron/dataset/scene_batch_sampler.py deleted file mode 100644 index f724fd07fd5cde4d218cbcb91fdf9e14d648f339..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/scene_batch_sampler.py +++ /dev/null @@ -1,214 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import warnings -from collections import Counter -from dataclasses import dataclass, field -from typing import Dict, Iterable, Iterator, List, Sequence, Tuple - -import numpy as np -from torch.utils.data.sampler import Sampler - -from .dataset_base import DatasetBase - - -@dataclass(eq=False) # TODO: do we need this if not init from config? -class SceneBatchSampler(Sampler[List[int]]): - """ - A class for sampling training batches with a controlled composition - of sequences. - """ - - dataset: DatasetBase - batch_size: int - num_batches: int - # the sampler first samples a random element k from this list and then - # takes k random frames per sequence - images_per_seq_options: Sequence[int] - - # if True, will sample a contiguous interval of frames in the sequence - # it first finds the connected segments within the sequence of sufficient length, - # then samples a random pivot element among them and ideally uses it as a middle - # of the temporal window, shifting the borders where necessary. - # This strategy mitigates the bias against shorter segments and their boundaries. - sample_consecutive_frames: bool = False - # if a number > 0, then used to define the maximum difference in frame_number - # of neighbouring frames when forming connected segments; otherwise the whole - # sequence is considered a segment regardless of frame numbers - consecutive_frames_max_gap: int = 0 - # same but for timestamps if they are available - consecutive_frames_max_gap_seconds: float = 0.1 - - # if True, the sampler first reads from the dataset the mapping between - # sequence names and their categories. - # During batch sampling, the sampler ensures uniform distribution over the categories - # of the sampled sequences. - category_aware: bool = True - - seq_names: List[str] = field(init=False) - - category_to_sequence_names: Dict[str, List[str]] = field(init=False) - categories: List[str] = field(init=False) - - def __post_init__(self) -> None: - if self.batch_size <= 0: - raise ValueError( - "batch_size should be a positive integral value, " - f"but got batch_size={self.batch_size}" - ) - - if len(self.images_per_seq_options) < 1: - raise ValueError("n_per_seq_posibilities list cannot be empty") - - self.seq_names = list(self.dataset.sequence_names()) - - if self.category_aware: - self.category_to_sequence_names = self.dataset.category_to_sequence_names() - self.categories = list(self.category_to_sequence_names.keys()) - - def __len__(self) -> int: - return self.num_batches - - def __iter__(self) -> Iterator[List[int]]: - for batch_idx in range(len(self)): - batch = self._sample_batch(batch_idx) - yield batch - - def _sample_batch(self, batch_idx) -> List[int]: - n_per_seq = np.random.choice(self.images_per_seq_options) - n_seqs = -(-self.batch_size // n_per_seq) # round up - - if self.category_aware: - # first sample categories at random, these can be repeated in the batch - chosen_cat = _capped_random_choice(self.categories, n_seqs, replace=True) - # then randomly sample a set of unique sequences within each category - chosen_seq = [] - for cat, n_per_category in Counter(chosen_cat).items(): - category_chosen_seq = _capped_random_choice( - self.category_to_sequence_names[cat], - n_per_category, - replace=False, - ) - chosen_seq.extend([str(s) for s in category_chosen_seq]) - else: - chosen_seq = _capped_random_choice( - self.seq_names, - n_seqs, - replace=False, - ) - - if self.sample_consecutive_frames: - frame_idx = [] - for seq in chosen_seq: - segment_index = self._build_segment_index(seq, n_per_seq) - - segment, idx = segment_index[np.random.randint(len(segment_index))] - if len(segment) <= n_per_seq: - frame_idx.append(segment) - else: - start = np.clip(idx - n_per_seq // 2, 0, len(segment) - n_per_seq) - frame_idx.append(segment[start : start + n_per_seq]) - - else: - frame_idx = [ - _capped_random_choice( - list(self.dataset.sequence_indices_in_order(seq)), - n_per_seq, - replace=False, - ) - for seq in chosen_seq - ] - frame_idx = np.concatenate(frame_idx)[: self.batch_size].tolist() - if len(frame_idx) < self.batch_size: - warnings.warn( - "Batch size smaller than self.batch_size!" - + " (This is fine for experiments with a single scene and viewpooling)" - ) - return frame_idx - - def _build_segment_index(self, seq: str, size: int) -> List[Tuple[List[int], int]]: - """ - Returns a list of (segment, index) tuples, one per eligible frame, where - segment is a list of frame indices in the contiguous segment the frame - belongs to index is the frame's index within that segment. - Segment references are repeated but the memory is shared. - """ - if ( - self.consecutive_frames_max_gap > 0 - or self.consecutive_frames_max_gap_seconds > 0.0 - ): - segments = self._split_to_segments( - self.dataset.sequence_frames_in_order(seq) - ) - segments = _cull_short_segments(segments, size) - if not segments: - raise AssertionError("Empty segments after culling") - else: - segments = [list(self.dataset.sequence_indices_in_order(seq))] - - # build an index of segment for random selection of a pivot frame - segment_index = [ - (segment, i) for segment in segments for i in range(len(segment)) - ] - - return segment_index - - def _split_to_segments( - self, sequence_timestamps: Iterable[Tuple[float, int, int]] - ) -> List[List[int]]: - if ( - self.consecutive_frames_max_gap <= 0 - and self.consecutive_frames_max_gap_seconds <= 0.0 - ): - raise AssertionError("This function is only needed for non-trivial max_gap") - - segments = [] - last_no = -self.consecutive_frames_max_gap - 1 # will trigger a new segment - last_ts = -self.consecutive_frames_max_gap_seconds - 1.0 - for ts, no, idx in sequence_timestamps: - if ts <= 0.0 and no <= last_no: - raise AssertionError( - "Sequence frames are not ordered while timestamps are not given" - ) - - if ( - no - last_no > self.consecutive_frames_max_gap > 0 - or ts - last_ts > self.consecutive_frames_max_gap_seconds > 0.0 - ): # new group - segments.append([idx]) - else: - segments[-1].append(idx) - - last_no = no - last_ts = ts - - return segments - - -def _cull_short_segments(segments: List[List[int]], min_size: int) -> List[List[int]]: - lengths = [(len(segment), segment) for segment in segments] - max_len, longest_segment = max(lengths) - - if max_len < min_size: - return [longest_segment] - - return [segment for segment in segments if len(segment) >= min_size] - - -def _capped_random_choice(x, size, replace: bool = True): - """ - if replace==True - randomly chooses from x `size` elements without replacement if len(x)>size - else allows replacement and selects `size` elements again. - if replace==False - randomly chooses from x `min(len(x), size)` elements without replacement - """ - len_x = x if isinstance(x, int) else len(x) - if replace: - return np.random.choice(x, size=size, replace=len_x < size) - else: - return np.random.choice(x, size=min(size, len_x), replace=False) diff --git a/pytorch3d/pytorch3d/implicitron/dataset/single_sequence_dataset.py b/pytorch3d/pytorch3d/implicitron/dataset/single_sequence_dataset.py deleted file mode 100644 index 1090faa1e5734caad18e3af5b9dcc53c1edeef12..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/single_sequence_dataset.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -# This file defines a base class for dataset map providers which -# provide data for a single scene. - -from dataclasses import field -from typing import Iterable, Iterator, List, Optional, Sequence, Tuple - -import numpy as np -import torch -from pytorch3d.implicitron.tools.config import ( - Configurable, - expand_args_fields, - run_auto_creation, -) -from pytorch3d.renderer import CamerasBase, join_cameras_as_batch, PerspectiveCameras - -from .dataset_base import DatasetBase -from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory -from .frame_data import FrameData -from .utils import DATASET_TYPE_KNOWN, DATASET_TYPE_UNKNOWN - -_SINGLE_SEQUENCE_NAME: str = "one_sequence" - - -@expand_args_fields -class SingleSceneDataset(DatasetBase, Configurable): - """ - A dataset from images from a single scene. - """ - - images: List[torch.Tensor] = field() - fg_probabilities: Optional[List[torch.Tensor]] = field() - poses: List[PerspectiveCameras] = field() - object_name: str = field() - frame_types: List[str] = field() - eval_batches: Optional[List[List[int]]] = field() - - def sequence_names(self) -> Iterable[str]: - return [_SINGLE_SEQUENCE_NAME] - - def __len__(self) -> int: - return len(self.poses) - - def sequence_frames_in_order( - self, seq_name: str, subset_filter: Optional[Sequence[str]] = None - ) -> Iterator[Tuple[float, int, int]]: - for i in range(len(self)): - if subset_filter is None or self.frame_types[i] in subset_filter: - yield 0.0, i, i - - def __getitem__(self, index) -> FrameData: - if index >= len(self): - raise IndexError(f"index {index} out of range {len(self)}") - image = self.images[index] - pose = self.poses[index] - frame_type = self.frame_types[index] - fg_probability = ( - None if self.fg_probabilities is None else self.fg_probabilities[index] - ) - - frame_data = FrameData( - frame_number=index, - sequence_name=_SINGLE_SEQUENCE_NAME, - sequence_category=self.object_name, - camera=pose, - # pyre-ignore - image_size_hw=torch.tensor(image.shape[1:], dtype=torch.long), - image_rgb=image, - fg_probability=fg_probability, - frame_type=frame_type, - ) - return frame_data - - def get_eval_batches(self) -> Optional[List[List[int]]]: - return self.eval_batches - - -# pyre-fixme[13]: Uninitialized attribute -class SingleSceneDatasetMapProviderBase(DatasetMapProviderBase): - """ - Base for provider of data for one scene from LLFF or blender datasets. - - Members: - base_dir: directory holding the data for the scene. - object_name: The name of the scene (e.g. "lego"). This is just used as a label. - It will typically be equal to the name of the directory self.base_dir. - path_manager_factory: Creates path manager which may be used for - interpreting paths. - n_known_frames_for_test: If set, training frames are included in the val - and test datasets, and this many random training frames are added to - each test batch. If not set, test batches each contain just a single - testing frame. - """ - - base_dir: str - object_name: str - path_manager_factory: PathManagerFactory - path_manager_factory_class_type: str = "PathManagerFactory" - n_known_frames_for_test: Optional[int] = None - - def __post_init__(self) -> None: - run_auto_creation(self) - self._load_data() - - def _load_data(self) -> None: - # This must be defined by each subclass, - # and should set the following on self. - # - poses: a list of length-1 camera objects - # - images: [N, 3, H, W] tensor of rgb images - floats in [0,1] - # - fg_probabilities: None or [N, 1, H, W] of floats in [0,1] - # - splits: List[List[int]] of indices for train/val/test subsets. - raise NotImplementedError() - - def _get_dataset( - self, split_idx: int, frame_type: str, set_eval_batches: bool = False - ) -> SingleSceneDataset: - # pyre-ignore[16] - split = self.i_split[split_idx] - frame_types = [frame_type] * len(split) - fg_probabilities = ( - None - # pyre-ignore[16] - if self.fg_probabilities is None - else self.fg_probabilities[split] - ) - eval_batches = [[i] for i in range(len(split))] - if split_idx != 0 and self.n_known_frames_for_test is not None: - train_split = self.i_split[0] - if set_eval_batches: - generator = np.random.default_rng(seed=0) - for batch in eval_batches: - # using permutation so that changes to n_known_frames_for_test - # result in consistent batches. - to_add = generator.permutation(len(train_split))[ - : self.n_known_frames_for_test - ] - batch.extend((to_add + len(split)).tolist()) - split = np.concatenate([split, train_split]) - frame_types.extend([DATASET_TYPE_KNOWN] * len(train_split)) - - # pyre-ignore[28] - return SingleSceneDataset( - object_name=self.object_name, - # pyre-ignore[16] - images=self.images[split], - fg_probabilities=fg_probabilities, - # pyre-ignore[16] - poses=[self.poses[i] for i in split], - frame_types=frame_types, - eval_batches=eval_batches if set_eval_batches else None, - ) - - def get_dataset_map(self) -> DatasetMap: - return DatasetMap( - train=self._get_dataset(0, DATASET_TYPE_KNOWN), - val=self._get_dataset(1, DATASET_TYPE_UNKNOWN), - test=self._get_dataset(2, DATASET_TYPE_UNKNOWN, True), - ) - - def get_all_train_cameras(self) -> Optional[CamerasBase]: - # pyre-ignore[16] - cameras = [self.poses[i] for i in self.i_split[0]] - return join_cameras_as_batch(cameras) - - -def _interpret_blender_cameras( - poses: torch.Tensor, focal: float -) -> List[PerspectiveCameras]: - """ - Convert 4x4 matrices representing cameras in blender format - to PyTorch3D format. - - Args: - poses: N x 3 x 4 camera matrices - focal: ndc space focal length - """ - pose_target_cameras = [] - for pose_target in poses: - pose_target = pose_target[:3, :4] - mtx = torch.eye(4, dtype=pose_target.dtype) - mtx[:3, :3] = pose_target[:3, :3].t() - mtx[3, :3] = pose_target[:, 3] - mtx = mtx.inverse() - - # flip the XZ coordinates. - mtx[:, [0, 2]] *= -1.0 - - Rpt3, Tpt3 = mtx[:, :3].split([3, 1], dim=0) - - focal_length_pt3 = torch.FloatTensor([[focal, focal]]) - principal_point_pt3 = torch.FloatTensor([[0.0, 0.0]]) - - cameras = PerspectiveCameras( - focal_length=focal_length_pt3, - principal_point=principal_point_pt3, - R=Rpt3[None], - T=Tpt3, - ) - pose_target_cameras.append(cameras) - return pose_target_cameras diff --git a/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset.py b/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset.py deleted file mode 100644 index 470f5a95bf100595659918ee979d8e350aa71480..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset.py +++ /dev/null @@ -1,768 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import hashlib -import json -import logging -import os -from dataclasses import dataclass -from typing import ( - Any, - ClassVar, - Dict, - Iterable, - Iterator, - List, - Optional, - Sequence, - Tuple, - Type, - Union, -) - -import numpy as np -import pandas as pd -import sqlalchemy as sa -import torch -from pytorch3d.implicitron.dataset.dataset_base import DatasetBase - -from pytorch3d.implicitron.dataset.frame_data import ( # noqa - FrameData, - FrameDataBuilder, - FrameDataBuilderBase, -) -from pytorch3d.implicitron.tools.config import ( - registry, - ReplaceableBase, - run_auto_creation, -) -from sqlalchemy.orm import Session - -from .orm_types import SqlFrameAnnotation, SqlSequenceAnnotation - - -logger = logging.getLogger(__name__) - - -_SET_LISTS_TABLE: str = "set_lists" - - -@registry.register -class SqlIndexDataset(DatasetBase, ReplaceableBase): # pyre-ignore - """ - A dataset with annotations stored as SQLite tables. This is an index-based dataset. - The length is returned after all sequence and frame filters are applied (see param - definitions below). Indices can either be ordinal in [0, len), or pairs of - (sequence_name, frame_number); with the performance of `dataset[i]` and - `dataset[sequence_name, frame_number]` being same. A faster way to get metadata only - (without blobs) is `dataset.meta[idx]` indexing; it requires box_crop==False. - With ordinal indexing, the sequences are NOT guaranteed to span contiguous index - ranges, and frame numbers are NOT guaranteed to be increasing within a sequence. - Sequence-aware batch samplers have to use `sequence_[frames|indices]_in_order` - iterators, which are efficient. - - This functionality requires SQLAlchemy 2.0 or later. - - Metadata-related args: - sqlite_metadata_file: A SQLite file containing frame and sequence annotation - tables (mapping to SqlFrameAnnotation and SqlSequenceAnnotation, - respectively). - dataset_root: A root directory to look for images, masks, etc. It can be - alternatively set in `frame_data_builder` args, but this takes precedence. - subset_lists_file: A JSON/sqlite file containing the lists of frames - corresponding to different subsets (e.g. train/val/test) of the dataset; - format: {subset: [(sequence_name, frame_id, file_path)]}. All entries - must be present in frame_annotation metadata table. - path_manager: a facade for non-POSIX filesystems. - subsets: Restrict frames/sequences only to the given list of subsets - as defined in subset_lists_file (see above). Applied before all other - filters. - remove_empty_masks: Removes the frames with no active foreground pixels - in the segmentation mask (needs frame_annotation.mask.mass to be set; - null values are retained). - pick_frames_sql_clause: SQL WHERE clause to constrain frame annotations - NOTE: This is a potential security risk! The string is passed to the SQL - engine verbatim. Don’t expose it to end users of your application! - pick_categories: Restrict the dataset to the given list of categories. - pick_sequences: A Sequence of sequence names to restrict the dataset to. - exclude_sequences: A Sequence of the names of the sequences to exclude. - limit_sequences_per_category_to: Limit the dataset to the first up to N - sequences within each category (applies after all other sequence filters - but before `limit_sequences_to`). - limit_sequences_to: Limit the dataset to the first `limit_sequences_to` - sequences (after other sequence filters have been applied but before - frame-based filters). - limit_to: Limit the dataset to the first #limit_to frames (after other - filters have been applied, except n_frames_per_sequence). - n_frames_per_sequence: If > 0, randomly samples `n_frames_per_sequence` - frames in each sequences uniformly without replacement if it has - more frames than that; applied after other frame-level filters. - seed: The seed of the random generator sampling `n_frames_per_sequence` - random frames per sequence. - """ - - frame_annotations_type: ClassVar[Type[SqlFrameAnnotation]] = SqlFrameAnnotation - - sqlite_metadata_file: str = "" - dataset_root: Optional[str] = None - subset_lists_file: str = "" - eval_batches_file: Optional[str] = None - path_manager: Any = None - subsets: Optional[List[str]] = None - remove_empty_masks: bool = True - pick_frames_sql_clause: Optional[str] = None - pick_categories: Tuple[str, ...] = () - - pick_sequences: Tuple[str, ...] = () - exclude_sequences: Tuple[str, ...] = () - limit_sequences_per_category_to: int = 0 - limit_sequences_to: int = 0 - limit_to: int = 0 - n_frames_per_sequence: int = -1 - seed: int = 0 - remove_empty_masks_poll_whole_table_threshold: int = 300_000 - # we set it manually in the constructor - # _index: pd.DataFrame = field(init=False) - - frame_data_builder: FrameDataBuilderBase - frame_data_builder_class_type: str = "FrameDataBuilder" - - def __post_init__(self) -> None: - if sa.__version__ < "2.0": - raise ImportError("This class requires SQL Alchemy 2.0 or later") - - if not self.sqlite_metadata_file: - raise ValueError("sqlite_metadata_file must be set") - - if self.dataset_root: - frame_builder_type = self.frame_data_builder_class_type - getattr(self, f"frame_data_builder_{frame_builder_type}_args")[ - "dataset_root" - ] = self.dataset_root - - run_auto_creation(self) - self.frame_data_builder.path_manager = self.path_manager - - # pyre-ignore # NOTE: sqlite-specific args (read-only mode). - self._sql_engine = sa.create_engine( - f"sqlite:///file:{self.sqlite_metadata_file}?mode=ro&uri=true" - ) - - sequences = self._get_filtered_sequences_if_any() - - if self.subsets: - index = self._build_index_from_subset_lists(sequences) - else: - # TODO: if self.subset_lists_file and not self.subsets, it might be faster to - # still use the concatenated lists, assuming they cover the whole dataset - index = self._build_index_from_db(sequences) - - if self.n_frames_per_sequence >= 0: - index = self._stratified_sample_index(index) - - if len(index) == 0: - raise ValueError(f"There are no frames in the subsets: {self.subsets}!") - - self._index = index.set_index(["sequence_name", "frame_number"]) # pyre-ignore - - self.eval_batches = None # pyre-ignore - if self.eval_batches_file: - self.eval_batches = self._load_filter_eval_batches() - - logger.info(str(self)) - - def __len__(self) -> int: - # pyre-ignore[16] - return len(self._index) - - def __getitem__(self, frame_idx: Union[int, Tuple[str, int]]) -> FrameData: - """ - Fetches FrameData by either iloc in the index or by (sequence, frame_no) pair - """ - return self._get_item(frame_idx, True) - - @property - def meta(self): - """ - Allows accessing metadata only without loading blobs using `dataset.meta[idx]`. - Requires box_crop==False, since in that case, cameras cannot be adjusted - without loading masks. - - Returns: - FrameData objects with blob fields like `image_rgb` set to None. - - Raises: - ValueError if dataset.box_crop is set. - """ - return SqlIndexDataset._MetadataAccessor(self) - - @dataclass - class _MetadataAccessor: - dataset: "SqlIndexDataset" - - def __getitem__(self, frame_idx: Union[int, Tuple[str, int]]) -> FrameData: - return self.dataset._get_item(frame_idx, False) - - def _get_item( - self, frame_idx: Union[int, Tuple[str, int]], load_blobs: bool = True - ) -> FrameData: - if isinstance(frame_idx, int): - if frame_idx >= len(self._index): - raise IndexError(f"index {frame_idx} out of range {len(self._index)}") - - seq, frame = self._index.index[frame_idx] - else: - seq, frame, *rest = frame_idx - if isinstance(frame, torch.LongTensor): - frame = frame.item() - - if (seq, frame) not in self._index.index: - raise IndexError( - f"Sequence-frame index {frame_idx} not found; was it filtered out?" - ) - - if rest and rest[0] != self._index.loc[(seq, frame), "_image_path"]: - raise IndexError(f"Non-matching image path in {frame_idx}.") - - stmt = sa.select(self.frame_annotations_type).where( - self.frame_annotations_type.sequence_name == seq, - self.frame_annotations_type.frame_number - == int(frame), # cast from np.int64 - ) - seq_stmt = sa.select(SqlSequenceAnnotation).where( - SqlSequenceAnnotation.sequence_name == seq - ) - with Session(self._sql_engine) as session: - entry = session.scalars(stmt).one() - seq_metadata = session.scalars(seq_stmt).one() - - assert entry.image.path == self._index.loc[(seq, frame), "_image_path"] - - frame_data = self.frame_data_builder.build( - entry, seq_metadata, load_blobs=load_blobs - ) - - # The rest of the fields are optional - frame_data.frame_type = self._get_frame_type(entry) - return frame_data - - def __str__(self) -> str: - # pyre-ignore[16] - return f"SqlIndexDataset #frames={len(self._index)}" - - def sequence_names(self) -> Iterable[str]: - """Returns an iterator over sequence names in the dataset.""" - return self._index.index.unique("sequence_name") - - # override - def category_to_sequence_names(self) -> Dict[str, List[str]]: - stmt = sa.select( - SqlSequenceAnnotation.category, SqlSequenceAnnotation.sequence_name - ).where( # we limit results to sequences that have frames after all filters - SqlSequenceAnnotation.sequence_name.in_(self.sequence_names()) - ) - with self._sql_engine.connect() as connection: - cat_to_seqs = pd.read_sql(stmt, connection) - - return cat_to_seqs.groupby("category")["sequence_name"].apply(list).to_dict() - - # override - def get_frame_numbers_and_timestamps( - self, idxs: Sequence[int], subset_filter: Optional[Sequence[str]] = None - ) -> List[Tuple[int, float]]: - """ - Implements the DatasetBase method. - - NOTE: Avoid this function as there are more efficient alternatives such as - querying `dataset[idx]` directly or getting all sequence frames with - `sequence_[frames|indices]_in_order`. - - Return the index and timestamp in their videos of the frames whose - indices are given in `idxs`. They need to belong to the same sequence! - If timestamps are absent, they are replaced with zeros. - This is used for letting SceneBatchSampler identify consecutive - frames. - - Args: - idxs: a sequence int frame index in the dataset (it can be a slice) - subset_filter: must remain None - - Returns: - list of tuples of - - frame index in video - - timestamp of frame in video, coalesced with 0s - - Raises: - ValueError if idxs belong to more than one sequence. - """ - - if subset_filter is not None: - raise NotImplementedError( - "Subset filters are not supported in SQL Dataset. " - "We encourage creating a dataset per subset." - ) - - index_slice, _ = self._get_frame_no_coalesced_ts_by_row_indices(idxs) - # alternatively, we can use `.values.tolist()`, which may be faster - # but returns a list of lists - return list(index_slice.itertuples()) - - # override - def sequence_frames_in_order( - self, seq_name: str, subset_filter: Optional[Sequence[str]] = None - ) -> Iterator[Tuple[float, int, int]]: - """ - Overrides the default DatasetBase implementation (we don’t use `_seq_to_idx`). - Returns an iterator over the frame indices in a given sequence. - We attempt to first sort by timestamp (if they are available), - then by frame number. - - Args: - seq_name: the name of the sequence. - subset_filter: subset names to filter to - - Returns: - an iterator over triplets `(timestamp, frame_no, dataset_idx)`, - where `frame_no` is the index within the sequence, and - `dataset_idx` is the index within the dataset. - `None` timestamps are replaced with 0s. - """ - # TODO: implement sort_timestamp_first? (which would matter if the orders - # of frame numbers and timestamps are different) - rows = self._index.index.get_loc(seq_name) - if isinstance(rows, slice): - assert rows.stop is not None, "Unexpected result from pandas" - rows = range(rows.start or 0, rows.stop, rows.step or 1) - else: - rows = np.where(rows)[0] - - index_slice, idx = self._get_frame_no_coalesced_ts_by_row_indices( - rows, seq_name, subset_filter - ) - index_slice["idx"] = idx - - yield from index_slice.itertuples(index=False) - - # override - def get_eval_batches(self) -> Optional[List[Any]]: - """ - This class does not support eval batches with ordinal indices. You can pass - eval_batches as a batch_sampler to a data_loader since the dataset supports - `dataset[seq_name, frame_no]` indexing. - """ - return self.eval_batches - - # override - def join(self, other_datasets: Iterable[DatasetBase]) -> None: - raise ValueError("Not supported! Preprocess the data by merging them instead.") - - # override - @property - def frame_data_type(self) -> Type[FrameData]: - return self.frame_data_builder.frame_data_type - - def is_filtered(self) -> bool: - """ - Returns `True` in case the dataset has been filtered and thus some frame - annotations stored on the disk might be missing in the dataset object. - Does not account for subsets. - - Returns: - is_filtered: `True` if the dataset has been filtered, else `False`. - """ - return ( - self.remove_empty_masks - or self.limit_to > 0 - or self.limit_sequences_to > 0 - or self.limit_sequences_per_category_to > 0 - or len(self.pick_sequences) > 0 - or len(self.exclude_sequences) > 0 - or len(self.pick_categories) > 0 - or self.n_frames_per_sequence > 0 - ) - - def _get_filtered_sequences_if_any(self) -> Optional[pd.Series]: - # maximum possible filter (if limit_sequences_per_category_to == 0): - # WHERE category IN 'self.pick_categories' - # AND sequence_name IN 'self.pick_sequences' - # AND sequence_name NOT IN 'self.exclude_sequences' - # LIMIT 'self.limit_sequence_to' - - where_conditions = [ - *self._get_category_filters(), - *self._get_pick_filters(), - *self._get_exclude_filters(), - ] - - def add_where(stmt): - return stmt.where(*where_conditions) if where_conditions else stmt - - if self.limit_sequences_per_category_to <= 0: - stmt = add_where(sa.select(SqlSequenceAnnotation.sequence_name)) - else: - subquery = sa.select( - SqlSequenceAnnotation.sequence_name, - sa.func.row_number() - .over( - order_by=sa.text("ROWID"), # NOTE: ROWID is SQLite-specific - partition_by=SqlSequenceAnnotation.category, - ) - .label("row_number"), - ) - - subquery = add_where(subquery).subquery() - stmt = sa.select(subquery.c.sequence_name).where( - subquery.c.row_number <= self.limit_sequences_per_category_to - ) - - if self.limit_sequences_to > 0: - logger.info( - f"Limiting dataset to first {self.limit_sequences_to} sequences" - ) - # NOTE: ROWID is SQLite-specific - stmt = stmt.order_by(sa.text("ROWID")).limit(self.limit_sequences_to) - - if ( - not where_conditions - and self.limit_sequences_to <= 0 - and self.limit_sequences_per_category_to <= 0 - ): - # we will not need to filter by sequences - return None - - with self._sql_engine.connect() as connection: - sequences = pd.read_sql_query(stmt, connection)["sequence_name"] - logger.info("... retained %d sequences" % len(sequences)) - - return sequences - - def _get_category_filters(self) -> List[sa.ColumnElement]: - if not self.pick_categories: - return [] - - logger.info(f"Limiting dataset to categories: {self.pick_categories}") - return [SqlSequenceAnnotation.category.in_(self.pick_categories)] - - def _get_pick_filters(self) -> List[sa.ColumnElement]: - if not self.pick_sequences: - return [] - - logger.info(f"Limiting dataset to sequences: {self.pick_sequences}") - return [SqlSequenceAnnotation.sequence_name.in_(self.pick_sequences)] - - def _get_exclude_filters(self) -> List[sa.ColumnOperators]: - if not self.exclude_sequences: - return [] - - logger.info(f"Removing sequences from the dataset: {self.exclude_sequences}") - return [SqlSequenceAnnotation.sequence_name.notin_(self.exclude_sequences)] - - def _load_subsets_from_json(self, subset_lists_path: str) -> pd.DataFrame: - assert self.subsets is not None - with open(subset_lists_path, "r") as f: - subset_to_seq_frame = json.load(f) - - seq_frame_list = sum( - ( - [(*row, subset) for row in subset_to_seq_frame[subset]] - for subset in self.subsets - ), - [], - ) - index = pd.DataFrame( - seq_frame_list, - columns=["sequence_name", "frame_number", "_image_path", "subset"], - ) - return index - - def _load_subsets_from_sql(self, subset_lists_path: str) -> pd.DataFrame: - subsets = self.subsets - assert subsets is not None - # we need a new engine since we store the subsets in a separate DB - engine = sa.create_engine(f"sqlite:///{subset_lists_path}") - table = sa.Table(_SET_LISTS_TABLE, sa.MetaData(), autoload_with=engine) - stmt = sa.select(table).where(table.c.subset.in_(subsets)) - with engine.connect() as connection: - index = pd.read_sql(stmt, connection) - - return index - - def _build_index_from_subset_lists( - self, sequences: Optional[pd.Series] - ) -> pd.DataFrame: - if not self.subset_lists_file: - raise ValueError("Requested subsets but subset_lists_file not given") - - logger.info(f"Loading subset lists from {self.subset_lists_file}.") - - subset_lists_path = self._local_path(self.subset_lists_file) - if subset_lists_path.lower().endswith(".json"): - index = self._load_subsets_from_json(subset_lists_path) - else: - index = self._load_subsets_from_sql(subset_lists_path) - index = index.set_index(["sequence_name", "frame_number"]) - logger.info(f" -> loaded {len(index)} samples of {self.subsets}.") - - if sequences is not None: - logger.info("Applying filtered sequences.") - sequence_values = index.index.get_level_values("sequence_name") - index = index.loc[sequence_values.isin(sequences)] - logger.info(f" -> retained {len(index)} samples.") - - pick_frames_criteria = [] - if self.remove_empty_masks: - logger.info("Culling samples with empty masks.") - - if len(index) > self.remove_empty_masks_poll_whole_table_threshold: - # APPROACH 1: find empty masks and drop indices. - # dev load: 17s / 15 s (3.1M / 500K) - stmt = sa.select( - self.frame_annotations_type.sequence_name, - self.frame_annotations_type.frame_number, - ).where(self.frame_annotations_type._mask_mass == 0) - with Session(self._sql_engine) as session: - to_remove = session.execute(stmt).all() - - # Pandas uses np.int64 for integer types, so we have to case - # we might want to read it to pandas DataFrame directly to avoid the loop - to_remove = [(seq, np.int64(fr)) for seq, fr in to_remove] - index.drop(to_remove, errors="ignore", inplace=True) - else: - # APPROACH 3: load index into a temp table and join with annotations - # dev load: 94 s / 23 s (3.1M / 500K) - pick_frames_criteria.append( - sa.or_( - self.frame_annotations_type._mask_mass.is_(None), - self.frame_annotations_type._mask_mass != 0, - ) - ) - - if self.pick_frames_sql_clause: - logger.info("Applying the custom SQL clause.") - pick_frames_criteria.append(sa.text(self.pick_frames_sql_clause)) - - if pick_frames_criteria: - index = self._pick_frames_by_criteria(index, pick_frames_criteria) - - logger.info(f" -> retained {len(index)} samples.") - - if self.limit_to > 0: - logger.info(f"Limiting dataset to first {self.limit_to} frames") - index = index.sort_index().iloc[: self.limit_to] - - return index.reset_index() - - def _pick_frames_by_criteria(self, index: pd.DataFrame, criteria) -> pd.DataFrame: - IndexTable = self._get_temp_index_table_instance() - with self._sql_engine.connect() as connection: - IndexTable.create(connection) - # we don’t let pandas’s `to_sql` create the table automatically as - # the table would be permanent, so we create it and append with pandas - n_rows = index.to_sql(IndexTable.name, connection, if_exists="append") - assert n_rows == len(index) - sa_type = self.frame_annotations_type - stmt = ( - sa.select(IndexTable) - .select_from( - IndexTable.join( - self.frame_annotations_type, - sa.and_( - sa_type.sequence_name == IndexTable.c.sequence_name, - sa_type.frame_number == IndexTable.c.frame_number, - ), - ) - ) - .where(*criteria) - ) - return pd.read_sql_query(stmt, connection).set_index( - ["sequence_name", "frame_number"] - ) - - def _build_index_from_db(self, sequences: Optional[pd.Series]): - logger.info("Loading sequcence-frame index from the database") - stmt = sa.select( - self.frame_annotations_type.sequence_name, - self.frame_annotations_type.frame_number, - self.frame_annotations_type._image_path, - sa.null().label("subset"), - ) - where_conditions = [] - if sequences is not None: - logger.info(" applying filtered sequences") - where_conditions.append( - self.frame_annotations_type.sequence_name.in_(sequences.tolist()) - ) - - if self.remove_empty_masks: - logger.info(" excluding samples with empty masks") - where_conditions.append( - sa.or_( - self.frame_annotations_type._mask_mass.is_(None), - self.frame_annotations_type._mask_mass != 0, - ) - ) - - if self.pick_frames_sql_clause: - logger.info(" applying custom SQL clause") - where_conditions.append(sa.text(self.pick_frames_sql_clause)) - - if where_conditions: - stmt = stmt.where(*where_conditions) - - if self.limit_to > 0: - logger.info(f"Limiting dataset to first {self.limit_to} frames") - stmt = stmt.order_by( - self.frame_annotations_type.sequence_name, - self.frame_annotations_type.frame_number, - ).limit(self.limit_to) - - with self._sql_engine.connect() as connection: - index = pd.read_sql_query(stmt, connection) - - logger.info(f" -> loaded {len(index)} samples.") - return index - - def _sort_index_(self, index): - logger.info("Sorting the index by sequence and frame number.") - index.sort_values(["sequence_name", "frame_number"], inplace=True) - logger.info(" -> Done.") - - def _load_filter_eval_batches(self): - assert self.eval_batches_file - logger.info(f"Loading eval batches from {self.eval_batches_file}") - - if not os.path.isfile(self.eval_batches_file): - # The batch indices file does not exist. - # Most probably the user has not specified the root folder. - raise ValueError( - f"Looking for dataset json file in {self.eval_batches_file}. " - + "Please specify a correct dataset_root folder." - ) - - with open(self.eval_batches_file, "r") as f: - eval_batches = json.load(f) - - # limit the dataset to sequences to allow multiple evaluations in one file - pick_sequences = set(self.pick_sequences) - if self.pick_categories: - cat_to_seq = self.category_to_sequence_names() - pick_sequences.update( - seq for cat in self.pick_categories for seq in cat_to_seq[cat] - ) - - if pick_sequences: - old_len = len(eval_batches) - eval_batches = [b for b in eval_batches if b[0][0] in pick_sequences] - logger.warn( - f"Picked eval batches by sequence/cat: {old_len} -> {len(eval_batches)}" - ) - - if self.exclude_sequences: - old_len = len(eval_batches) - exclude_sequences = set(self.exclude_sequences) - eval_batches = [b for b in eval_batches if b[0][0] not in exclude_sequences] - logger.warn( - f"Excluded eval batches by sequence: {old_len} -> {len(eval_batches)}" - ) - - return eval_batches - - def _stratified_sample_index(self, index): - # NOTE this stratified sampling can be done more efficiently in - # the no-subset case above if it is added to the SQL query. - # We keep this generic implementation since no-subset case is uncommon - index = index.groupby("sequence_name", group_keys=False).apply( - lambda seq_frames: seq_frames.sample( - min(len(seq_frames), self.n_frames_per_sequence), - random_state=( - _seq_name_to_seed(seq_frames.iloc[0]["sequence_name"]) + self.seed - ), - ) - ) - logger.info(f" -> retained {len(index)} samples aster stratified sampling.") - return index - - def _get_frame_type(self, entry: SqlFrameAnnotation) -> Optional[str]: - return self._index.loc[(entry.sequence_name, entry.frame_number), "subset"] - - def _get_frame_no_coalesced_ts_by_row_indices( - self, - idxs: Sequence[int], - seq_name: Optional[str] = None, - subset_filter: Union[Sequence[str], str, None] = None, - ) -> Tuple[pd.DataFrame, Sequence[int]]: - """ - Loads timestamps for given index rows belonging to the same sequence. - If seq_name is known, it speeds up the computation. - Raises ValueError if `idxs` do not all belong to a single sequences . - """ - index_slice = self._index.iloc[idxs] - if subset_filter is not None: - if isinstance(subset_filter, str): - subset_filter = [subset_filter] - indicator = index_slice["subset"].isin(subset_filter) - index_slice = index_slice.loc[indicator] - idxs = [i for i, isin in zip(idxs, indicator) if isin] - - frames = index_slice.index.get_level_values("frame_number").tolist() - if seq_name is None: - seq_name_list = index_slice.index.get_level_values("sequence_name").tolist() - seq_name_set = set(seq_name_list) - if len(seq_name_set) > 1: - raise ValueError("Given indices belong to more than one sequence.") - elif len(seq_name_set) == 1: - seq_name = seq_name_list[0] - - coalesced_ts = sa.sql.functions.coalesce( - self.frame_annotations_type.frame_timestamp, 0 - ) - stmt = sa.select( - coalesced_ts.label("frame_timestamp"), - self.frame_annotations_type.frame_number, - ).where( - self.frame_annotations_type.sequence_name == seq_name, - self.frame_annotations_type.frame_number.in_(frames), - ) - - with self._sql_engine.connect() as connection: - frame_no_ts = pd.read_sql_query(stmt, connection) - - if len(frame_no_ts) != len(index_slice): - raise ValueError( - "Not all indices are found in the database; " - "do they belong to more than one sequence?" - ) - - return frame_no_ts, idxs - - def _local_path(self, path: str) -> str: - if self.path_manager is None: - return path - return self.path_manager.get_local_path(path) - - def _get_temp_index_table_instance(self, table_name: str = "__index"): - CachedTable = self.frame_annotations_type.metadata.tables.get(table_name) - if CachedTable is not None: # table definition is not idempotent - return CachedTable - - return sa.Table( - table_name, - self.frame_annotations_type.metadata, - sa.Column("sequence_name", sa.String, primary_key=True), - sa.Column("frame_number", sa.Integer, primary_key=True), - sa.Column("_image_path", sa.String), - sa.Column("subset", sa.String), - prefixes=["TEMP"], # NOTE SQLite specific! - ) - - -def _seq_name_to_seed(seq_name) -> int: - """Generates numbers in [0, 2 ** 28)""" - return int(hashlib.sha1(seq_name.encode("utf-8")).hexdigest()[:7], 16) - - -def _safe_as_tensor(data, dtype): - return torch.tensor(data, dtype=dtype) if data is not None else None diff --git a/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset_provider.py deleted file mode 100644 index ab161e8d73b3c1e7f0f00195fd6fdf94bf9b7af1..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/sql_dataset_provider.py +++ /dev/null @@ -1,424 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import logging -import os -from typing import List, Optional, Tuple, Type - -import numpy as np - -from omegaconf import DictConfig, OmegaConf - -from pytorch3d.implicitron.dataset.dataset_map_provider import ( - DatasetMap, - DatasetMapProviderBase, - PathManagerFactory, -) -from pytorch3d.implicitron.tools.config import ( - expand_args_fields, - registry, - run_auto_creation, -) - -from .sql_dataset import SqlIndexDataset - - -_CO3D_SQL_DATASET_ROOT: str = os.getenv("CO3D_SQL_DATASET_ROOT", "") - -# _NEED_CONTROL is a list of those elements of SqlIndexDataset which -# are not directly specified for it in the config but come from the -# DatasetMapProvider. -_NEED_CONTROL: Tuple[str, ...] = ( - "path_manager", - "subsets", - "sqlite_metadata_file", - "subset_lists_file", -) - -logger = logging.getLogger(__name__) - - -@registry.register -class SqlIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13] - """ - Generates the training, validation, and testing dataset objects for - a dataset laid out on disk like SQL-CO3D, with annotations in an SQLite data base. - - The dataset is organized in the filesystem as follows:: - - self.dataset_root - β”œβ”€β”€ - β”‚ β”œβ”€β”€ - β”‚ β”‚ β”œβ”€β”€ depth_masks - β”‚ β”‚ β”œβ”€β”€ depths - β”‚ β”‚ β”œβ”€β”€ images - β”‚ β”‚ β”œβ”€β”€ masks - β”‚ β”‚ └── pointcloud.ply - β”‚ β”œβ”€β”€ - β”‚ β”‚ β”œβ”€β”€ depth_masks - β”‚ β”‚ β”œβ”€β”€ depths - β”‚ β”‚ β”œβ”€β”€ images - β”‚ β”‚ β”œβ”€β”€ masks - β”‚ β”‚ └── pointcloud.ply - β”‚ β”œβ”€β”€ ... - β”‚ β”œβ”€β”€ - β”‚ β”œβ”€β”€ set_lists - β”‚ β”œβ”€β”€ .json - β”‚ β”œβ”€β”€ .json - β”‚ β”œβ”€β”€ ... - β”‚ β”œβ”€β”€ .json - β”‚ β”œβ”€β”€ eval_batches - β”‚ β”‚ β”œβ”€β”€ .json - β”‚ β”‚ β”œβ”€β”€ .json - β”‚ β”‚ β”œβ”€β”€ ... - β”‚ β”‚ β”œβ”€β”€ .json - β”‚ β”œβ”€β”€ frame_annotations.jgz - β”‚ β”œβ”€β”€ sequence_annotations.jgz - β”œβ”€β”€ - β”œβ”€β”€ ... - β”œβ”€β”€ - β”œβ”€β”€ set_lists - β”œβ”€β”€ .sqlite - β”œβ”€β”€ .sqlite - β”œβ”€β”€ ... - β”œβ”€β”€ .sqlite - β”œβ”€β”€ eval_batches - β”‚ β”œβ”€β”€ .json - β”‚ β”œβ”€β”€ .json - β”‚ β”œβ”€β”€ ... - β”‚ β”œβ”€β”€ .json - - The dataset contains sequences named `` that may be partitioned by - directories such as `` e.g. representing categories but they - can also be stored in a flat structure. Each sequence folder contains the list of - sequence images, depth maps, foreground masks, and valid-depth masks - `images`, `depths`, `masks`, and `depth_masks` respectively. Furthermore, - `set_lists/` dirtectories (with partitions or global) store json or sqlite files - `.`, each describing a certain sequence subset. - These subset path conventions are not hard-coded and arbitrary relative path can be - specified by setting `self.subset_lists_path` to the relative path w.r.t. - dataset root. - - Each `.json` file contains the following dictionary:: - - { - "train": [ - (sequence_name: str, frame_number: int, image_path: str), - ... - ], - "val": [ - (sequence_name: str, frame_number: int, image_path: str), - ... - ], - "test": [ - (sequence_name: str, frame_number: int, image_path: str), - ... - ], - ] - - defining the list of frames (identified with their `sequence_name` and - `frame_number`) in the "train", "val", and "test" subsets of the dataset. In case of - SQLite format, `.sqlite` contains a table with the header:: - - | sequence_name | frame_number | image_path | subset | - - Note that `frame_number` can be obtained only from the metadata and - does not necesarrily correspond to the numeric suffix of the corresponding image - file name (e.g. a file `//images/frame00005.jpg` can - have its frame number set to `20`, not 5). - - Each `.json` file contains a list of evaluation examples - in the following form:: - - [ - [ # batch 1 - (sequence_name: str, frame_number: int, image_path: str), - ... - ], - [ # batch 2 - (sequence_name: str, frame_number: int, image_path: str), - ... - ], - ] - - Note that the evaluation examples always come from the `"test"` subset of the dataset. - (test frames can repeat across batches). The batches can contain single element, - which is typical in case of regular radiance field fitting. - - Args: - subset_lists_path: The relative path to the dataset subset definition. - For CO3D, these include e.g. "skateboard/set_lists/set_lists_manyview_dev_0.json". - By default (None), dataset is not partitioned to subsets (in that case, setting - `ignore_subsets` will speed up construction) - dataset_root: The root folder of the dataset. - metadata_basename: name of the SQL metadata file in dataset_root; - not expected to be changed by users - test_on_train: Construct validation and test datasets from - the training subset; note that in practice, in this - case all subset dataset objects will be same - only_test_set: Load only the test set. Incompatible with `test_on_train`. - ignore_subsets: Don’t filter by subsets in the dataset; note that in this - case all subset datasets will be same - eval_batch_num_training_frames: Add a certain number of training frames to each - eval batch. Useful for evaluating models that require - source views as input (e.g. NeRF-WCE / PixelNeRF). - dataset_args: Specifies additional arguments to the - JsonIndexDataset constructor call. - path_manager_factory: (Optional) An object that generates an instance of - PathManager that can translate provided file paths. - path_manager_factory_class_type: The class type of `path_manager_factory`. - """ - - category: Optional[str] = None - subset_list_name: Optional[str] = None # TODO: docs - # OR - subset_lists_path: Optional[str] = None - eval_batches_path: Optional[str] = None - - dataset_root: str = _CO3D_SQL_DATASET_ROOT - metadata_basename: str = "metadata.sqlite" - - test_on_train: bool = False - only_test_set: bool = False - ignore_subsets: bool = False - train_subsets: Tuple[str, ...] = ("train",) - val_subsets: Tuple[str, ...] = ("val",) - test_subsets: Tuple[str, ...] = ("test",) - - eval_batch_num_training_frames: int = 0 - - # this is a mould that is never constructed, used to build self._dataset_map values - dataset_class_type: str = "SqlIndexDataset" - dataset: SqlIndexDataset - - path_manager_factory: PathManagerFactory - path_manager_factory_class_type: str = "PathManagerFactory" - - def __post_init__(self): - super().__init__() - run_auto_creation(self) - - if self.only_test_set and self.test_on_train: - raise ValueError("Cannot have only_test_set and test_on_train") - - if self.ignore_subsets and not self.only_test_set: - self.test_on_train = True # no point in loading same data 3 times - - path_manager = self.path_manager_factory.get() - - sqlite_metadata_file = os.path.join(self.dataset_root, self.metadata_basename) - sqlite_metadata_file = _local_path(path_manager, sqlite_metadata_file) - - if not os.path.isfile(sqlite_metadata_file): - # The sqlite_metadata_file does not exist. - # Most probably the user has not specified the root folder. - raise ValueError( - f"Looking for frame annotations in {sqlite_metadata_file}." - + " Please specify a correct dataset_root folder." - + " Note: By default the root folder is taken from the" - + " CO3D_SQL_DATASET_ROOT environment variable." - ) - - if self.subset_lists_path and self.subset_list_name: - raise ValueError( - "subset_lists_path and subset_list_name cannot be both set" - ) - - subset_lists_file = self._get_lists_file("set_lists") - - # setup the common dataset arguments - common_dataset_kwargs = { - **getattr(self, f"dataset_{self.dataset_class_type}_args"), - "sqlite_metadata_file": sqlite_metadata_file, - "dataset_root": self.dataset_root, - "subset_lists_file": subset_lists_file, - "path_manager": path_manager, - } - - if self.category: - logger.info(f"Forcing category filter in the datasets to {self.category}") - common_dataset_kwargs["pick_categories"] = self.category.split(",") - - # get the used dataset type - dataset_type: Type[SqlIndexDataset] = registry.get( - SqlIndexDataset, self.dataset_class_type - ) - expand_args_fields(dataset_type) - - if subset_lists_file is not None and not os.path.isfile(subset_lists_file): - available_subsets = self._get_available_subsets( - OmegaConf.to_object(common_dataset_kwargs["pick_categories"]) - ) - msg = f"Cannot find subset list file {self.subset_lists_path}." - if available_subsets: - msg += f" Some of the available subsets: {str(available_subsets)}." - raise ValueError(msg) - - train_dataset = None - val_dataset = None - if not self.only_test_set: - # load the training set - logger.debug("Constructing train dataset.") - train_dataset = dataset_type( - **common_dataset_kwargs, subsets=self._get_subsets(self.train_subsets) - ) - logger.info(f"Train dataset: {str(train_dataset)}") - - if self.test_on_train: - assert train_dataset is not None - val_dataset = test_dataset = train_dataset - else: - # load the val and test sets - if not self.only_test_set: - # NOTE: this is always loaded in JsonProviderV2 - logger.debug("Extracting val dataset.") - val_dataset = dataset_type( - **common_dataset_kwargs, subsets=self._get_subsets(self.val_subsets) - ) - logger.info(f"Val dataset: {str(val_dataset)}") - - logger.debug("Extracting test dataset.") - eval_batches_file = self._get_lists_file("eval_batches") - del common_dataset_kwargs["eval_batches_file"] - test_dataset = dataset_type( - **common_dataset_kwargs, - subsets=self._get_subsets(self.test_subsets, True), - eval_batches_file=eval_batches_file, - ) - logger.info(f"Test dataset: {str(test_dataset)}") - - if ( - eval_batches_file is not None - and self.eval_batch_num_training_frames > 0 - ): - self._extend_eval_batches(test_dataset) - - self._dataset_map = DatasetMap( - train=train_dataset, val=val_dataset, test=test_dataset - ) - - def _get_subsets(self, subsets, is_eval: bool = False): - if self.ignore_subsets: - return None - - if is_eval and self.eval_batch_num_training_frames > 0: - # we will need to have training frames for extended batches - return list(subsets) + list(self.train_subsets) - - return subsets - - def _extend_eval_batches(self, test_dataset: SqlIndexDataset) -> None: - rng = np.random.default_rng(seed=0) - eval_batches = test_dataset.get_eval_batches() - if eval_batches is None: - raise ValueError("Eval batches were not loaded!") - - for batch in eval_batches: - sequence = batch[0][0] - seq_frames = list( - test_dataset.sequence_frames_in_order(sequence, self.train_subsets) - ) - idx_to_add = rng.permutation(len(seq_frames))[ - : self.eval_batch_num_training_frames - ] - batch.extend((sequence, seq_frames[a][1]) for a in idx_to_add) - - @classmethod - def dataset_tweak_args(cls, type, args: DictConfig) -> None: - """ - Called by get_default_args. - Certain fields are not exposed on each dataset class - but rather are controlled by this provider class. - """ - for key in _NEED_CONTROL: - del args[key] - - def create_dataset(self): - # No `dataset` member of this class is created. - # The dataset(s) live in `self.get_dataset_map`. - pass - - def get_dataset_map(self) -> DatasetMap: - return self._dataset_map # pyre-ignore [16] - - def _get_available_subsets(self, categories: List[str]): - """ - Get the available subset names for a given category folder (if given) inside - a root dataset folder `dataset_root`. - """ - path_manager = self.path_manager_factory.get() - - subsets: List[str] = [] - for prefix in [""] + categories: - set_list_dir = os.path.join(self.dataset_root, prefix, "set_lists") - if not ( - (path_manager is not None) and path_manager.isdir(set_list_dir) - ) and not os.path.isdir(set_list_dir): - continue - - set_list_files = (os.listdir if path_manager is None else path_manager.ls)( - set_list_dir - ) - subsets.extend(os.path.join(prefix, "set_lists", f) for f in set_list_files) - - return subsets - - def _get_lists_file(self, flavor: str) -> Optional[str]: - if flavor == "eval_batches": - subset_lists_path = self.eval_batches_path - else: - subset_lists_path = self.subset_lists_path - - if not subset_lists_path and not self.subset_list_name: - return None - - category_elem = "" - if self.category and "," not in self.category: - # if multiple categories are given, looking for global set lists - category_elem = self.category - - subset_lists_path = subset_lists_path or ( - os.path.join( - category_elem, f"{flavor}", f"{flavor}_{self.subset_list_name}" - ) - ) - - assert subset_lists_path - path_manager = self.path_manager_factory.get() - # try absolute path first - subset_lists_file = _get_local_path_check_extensions( - subset_lists_path, path_manager - ) - if subset_lists_file: - return subset_lists_file - - full_path = os.path.join(self.dataset_root, subset_lists_path) - subset_lists_file = _get_local_path_check_extensions(full_path, path_manager) - - if not subset_lists_file: - raise FileNotFoundError( - f"Subset lists path given but not found: {full_path}" - ) - - return subset_lists_file - - -def _get_local_path_check_extensions( - path, path_manager, extensions=("", ".sqlite", ".json") -) -> Optional[str]: - for ext in extensions: - local = _local_path(path_manager, path + ext) - if os.path.isfile(local): - return local - - return None - - -def _local_path(path_manager, path: str) -> str: - if path_manager is None: - return path - return path_manager.get_local_path(path) diff --git a/pytorch3d/pytorch3d/implicitron/dataset/train_eval_data_loader_provider.py b/pytorch3d/pytorch3d/implicitron/dataset/train_eval_data_loader_provider.py deleted file mode 100644 index 4640feb232878eb1578419eaebd8e0bc7163c4e4..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/train_eval_data_loader_provider.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import logging -from typing import Any, Dict, Optional, Tuple - -from pytorch3d.implicitron.dataset.data_loader_map_provider import ( - DataLoaderMap, - SceneBatchSampler, - SequenceDataLoaderMapProvider, -) -from pytorch3d.implicitron.dataset.dataset_base import DatasetBase -from pytorch3d.implicitron.dataset.dataset_map_provider import DatasetMap -from pytorch3d.implicitron.dataset.frame_data import FrameData -from pytorch3d.implicitron.tools.config import registry, run_auto_creation - -from torch.utils.data import DataLoader - -logger = logging.getLogger(__name__) - - -# TODO: we can merge it with SequenceDataLoaderMapProvider in PyTorch3D -# and support both eval_batches protocols -@registry.register -class TrainEvalDataLoaderMapProvider(SequenceDataLoaderMapProvider): - """ - Implementation of DataLoaderMapProviderBase that may use internal eval batches for - the test dataset. In particular, if `eval_batches_relpath` is set, it loads - eval batches from that json file, otherwise test set is treated in the same way as - train and val, i.e. the parameters `dataset_length_test` and `test_conditioning_type` - are respected. - - If conditioning is not required, then the batch size should - be set as 1, and most of the fields do not matter. - - If conditioning is required, each batch will contain one main - frame first to predict and the, rest of the elements are for - conditioning. - - If images_per_seq_options is left empty, the conditioning - frames are picked according to the conditioning type given. - This does not have regard to the order of frames in a - scene, or which frames belong to what scene. - - If images_per_seq_options is given, then the conditioning types - must be SAME and the remaining fields are used. - - Members: - batch_size: The size of the batch of the data loader. - num_workers: Number of data-loading threads in each data loader. - dataset_length_train: The number of batches in a training epoch. Or 0 to mean - an epoch is the length of the training set. - dataset_length_val: The number of batches in a validation epoch. Or 0 to mean - an epoch is the length of the validation set. - dataset_length_test: used if test_dataset.eval_batches is NOT set. The number of - batches in a testing epoch. Or 0 to mean an epoch is the length of the test - set. - images_per_seq_options: Possible numbers of frames sampled per sequence in a batch. - If a conditioning_type is KNOWN or TRAIN, then this must be left at its initial - value. Empty (the default) means that we are not careful about which frames - come from which scene. - sample_consecutive_frames: if True, will sample a contiguous interval of frames - in the sequence. It first sorts the frames by timestimps when available, - otherwise by frame numbers, finds the connected segments within the sequence - of sufficient length, then samples a random pivot element among them and - ideally uses it as a middle of the temporal window, shifting the borders - where necessary. This strategy mitigates the bias against shorter segments - and their boundaries. - consecutive_frames_max_gap: if a number > 0, then used to define the maximum - difference in frame_number of neighbouring frames when forming connected - segments; if both this and consecutive_frames_max_gap_seconds are 0s, - the whole sequence is considered a segment regardless of frame numbers. - consecutive_frames_max_gap_seconds: if a number > 0.0, then used to define the - maximum difference in frame_timestamp of neighbouring frames when forming - connected segments; if both this and consecutive_frames_max_gap are 0s, - the whole sequence is considered a segment regardless of frame timestamps. - """ - - batch_size: int = 1 - num_workers: int = 0 - - dataset_length_train: int = 0 - dataset_length_val: int = 0 - dataset_length_test: int = 0 - - images_per_seq_options: Tuple[int, ...] = () - sample_consecutive_frames: bool = False - consecutive_frames_max_gap: int = 0 - consecutive_frames_max_gap_seconds: float = 0.1 - - def __post_init__(self): - run_auto_creation(self) - - def get_data_loader_map(self, datasets: DatasetMap) -> DataLoaderMap: - """ - Returns a collection of data loaders for a given collection of datasets. - """ - train = self._make_generic_data_loader( - datasets.train, - self.dataset_length_train, - datasets.train, - ) - - val = self._make_generic_data_loader( - datasets.val, - self.dataset_length_val, - datasets.train, - ) - - if datasets.test is not None and datasets.test.get_eval_batches() is not None: - test = self._make_eval_data_loader(datasets.test) - else: - test = self._make_generic_data_loader( - datasets.test, - self.dataset_length_test, - datasets.train, - ) - - return DataLoaderMap(train=train, val=val, test=test) - - def _make_eval_data_loader( - self, - dataset: Optional[DatasetBase], - ) -> Optional[DataLoader[FrameData]]: - if dataset is None: - return None - - return DataLoader( - dataset, - batch_sampler=dataset.get_eval_batches(), - **self._get_data_loader_common_kwargs(dataset), - ) - - def _make_generic_data_loader( - self, - dataset: Optional[DatasetBase], - num_batches: int, - train_dataset: Optional[DatasetBase], - ) -> Optional[DataLoader[FrameData]]: - """ - Returns the dataloader for a dataset. - - Args: - dataset: the dataset - num_batches: possible ceiling on number of batches per epoch - train_dataset: the training dataset, used if conditioning_type==TRAIN - conditioning_type: source for padding of batches - """ - if dataset is None: - return None - - data_loader_kwargs = self._get_data_loader_common_kwargs(dataset) - - if len(self.images_per_seq_options) > 0: - # this is a typical few-view setup - # conditioning comes from the same subset since subsets are split by seqs - batch_sampler = SceneBatchSampler( - dataset, - self.batch_size, - num_batches=len(dataset) if num_batches <= 0 else num_batches, - images_per_seq_options=self.images_per_seq_options, - sample_consecutive_frames=self.sample_consecutive_frames, - consecutive_frames_max_gap=self.consecutive_frames_max_gap, - consecutive_frames_max_gap_seconds=self.consecutive_frames_max_gap_seconds, - ) - return DataLoader( - dataset, - batch_sampler=batch_sampler, - **data_loader_kwargs, - ) - - if self.batch_size == 1: - # this is a typical many-view setup (without conditioning) - return self._simple_loader(dataset, num_batches, data_loader_kwargs) - - # edge case: conditioning on train subset, typical for Nerformer-like many-view - # there is only one sequence in all datasets, so we condition on another subset - return self._train_loader( - dataset, train_dataset, num_batches, data_loader_kwargs - ) - - def _get_data_loader_common_kwargs(self, dataset: DatasetBase) -> Dict[str, Any]: - return { - "num_workers": self.num_workers, - "collate_fn": dataset.frame_data_type.collate, - } diff --git a/pytorch3d/pytorch3d/implicitron/dataset/types.py b/pytorch3d/pytorch3d/implicitron/dataset/types.py deleted file mode 100644 index 58eac677affe7f879bfc78271b90d74df9507329..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/types.py +++ /dev/null @@ -1,355 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import dataclasses -import gzip -import json -from dataclasses import dataclass, Field, MISSING -from typing import ( - Any, - cast, - Dict, - get_args, - get_origin, - IO, - Optional, - Tuple, - Type, - TypeVar, - Union, -) - -import numpy as np - - -_X = TypeVar("_X") - -TF3 = Tuple[float, float, float] - - -@dataclass -class ImageAnnotation: - # path to jpg file, relative w.r.t. dataset_root - path: str - # H x W - size: Tuple[int, int] # TODO: rename size_hw? - - -@dataclass -class DepthAnnotation: - # path to png file, relative w.r.t. dataset_root, storing `depth / scale_adjustment` - path: str - # a factor to convert png values to actual depth: `depth = png * scale_adjustment` - scale_adjustment: float - # path to png file, relative w.r.t. dataset_root, storing binary `depth` mask - mask_path: Optional[str] - - -@dataclass -class MaskAnnotation: - # path to png file storing (Prob(fg | pixel) * 255) - path: str - # (soft) number of pixels in the mask; sum(Prob(fg | pixel)) - mass: Optional[float] = None - # tight bounding box around the foreground mask - bounding_box_xywh: Optional[Tuple[float, float, float, float]] = None - - -@dataclass -class ViewpointAnnotation: - # In right-multiply (PyTorch3D) format. X_cam = X_world @ R + T - R: Tuple[TF3, TF3, TF3] - T: TF3 - - focal_length: Tuple[float, float] - principal_point: Tuple[float, float] - - intrinsics_format: str = "ndc_norm_image_bounds" - # Defines the co-ordinate system where focal_length and principal_point live. - # Possible values: ndc_isotropic | ndc_norm_image_bounds (default) - # ndc_norm_image_bounds: legacy PyTorch3D NDC format, where image boundaries - # correspond to [-1, 1] x [-1, 1], and the scale along x and y may differ - # ndc_isotropic: PyTorch3D 0.5+ NDC convention where the shorter side has - # the range [-1, 1], and the longer one has the range [-s, s]; s >= 1, - # where s is the aspect ratio. The scale is same along x and y. - - -@dataclass -class FrameAnnotation: - """A dataclass used to load annotations from json.""" - - # can be used to join with `SequenceAnnotation` - sequence_name: str - # 0-based, continuous frame number within sequence - frame_number: int - # timestamp in seconds from the video start - frame_timestamp: float - - image: ImageAnnotation - depth: Optional[DepthAnnotation] = None - mask: Optional[MaskAnnotation] = None - viewpoint: Optional[ViewpointAnnotation] = None - meta: Optional[Dict[str, Any]] = None - - -@dataclass -class PointCloudAnnotation: - # path to ply file with points only, relative w.r.t. dataset_root - path: str - # the bigger the better - quality_score: float - n_points: Optional[int] - - -@dataclass -class VideoAnnotation: - # path to the original video file, relative w.r.t. dataset_root - path: str - # length of the video in seconds - length: float - - -@dataclass -class SequenceAnnotation: - sequence_name: str - category: str - video: Optional[VideoAnnotation] = None - point_cloud: Optional[PointCloudAnnotation] = None - # the bigger the better - viewpoint_quality_score: Optional[float] = None - - -def dump_dataclass(obj: Any, f: IO, binary: bool = False) -> None: - """ - Args: - f: Either a path to a file, or a file opened for writing. - obj: A @dataclass or collection hierarchy including dataclasses. - binary: Set to True if `f` is a file handle, else False. - """ - if binary: - f.write(json.dumps(_asdict_rec(obj)).encode("utf8")) - else: - json.dump(_asdict_rec(obj), f) - - -def load_dataclass(f: IO, cls: Type[_X], binary: bool = False) -> _X: - """ - Loads to a @dataclass or collection hierarchy including dataclasses - from a json recursively. - Call it like load_dataclass(f, typing.List[FrameAnnotationAnnotation]). - raises KeyError if json has keys not mapping to the dataclass fields. - - Args: - f: Either a path to a file, or a file opened for writing. - cls: The class of the loaded dataclass. - binary: Set to True if `f` is a file handle, else False. - """ - if binary: - asdict = json.loads(f.read().decode("utf8")) - else: - asdict = json.load(f) - - if isinstance(asdict, list): - # in the list case, run a faster "vectorized" version - cls = get_args(cls)[0] - res = list(_dataclass_list_from_dict_list(asdict, cls)) - else: - res = _dataclass_from_dict(asdict, cls) - - return res - - -def _dataclass_list_from_dict_list(dlist, typeannot): - """ - Vectorised version of `_dataclass_from_dict`. - The output should be equivalent to - `[_dataclass_from_dict(d, typeannot) for d in dlist]`. - - Args: - dlist: list of objects to convert. - typeannot: type of each of those objects. - Returns: - iterator or list over converted objects of the same length as `dlist`. - - Raises: - ValueError: it assumes the objects have None's in consistent places across - objects, otherwise it would ignore some values. This generally holds for - auto-generated annotations, but otherwise use `_dataclass_from_dict`. - """ - - cls = get_origin(typeannot) or typeannot - - if typeannot is Any: - return dlist - if all(obj is None for obj in dlist): # 1st recursion base: all None nodes - return dlist - if any(obj is None for obj in dlist): - # filter out Nones and recurse on the resulting list - idx_notnone = [(i, obj) for i, obj in enumerate(dlist) if obj is not None] - idx, notnone = zip(*idx_notnone) - converted = _dataclass_list_from_dict_list(notnone, typeannot) - res = [None] * len(dlist) - for i, obj in zip(idx, converted): - res[i] = obj - return res - - is_optional, contained_type = _resolve_optional(typeannot) - if is_optional: - return _dataclass_list_from_dict_list(dlist, contained_type) - - # otherwise, we dispatch by the type of the provided annotation to convert to - if issubclass(cls, tuple) and hasattr(cls, "_fields"): # namedtuple - # For namedtuple, call the function recursively on the lists of corresponding keys - types = cls.__annotations__.values() - dlist_T = zip(*dlist) - res_T = [ - _dataclass_list_from_dict_list(key_list, tp) - for key_list, tp in zip(dlist_T, types) - ] - return [cls(*converted_as_tuple) for converted_as_tuple in zip(*res_T)] - elif issubclass(cls, (list, tuple)): - # For list/tuple, call the function recursively on the lists of corresponding positions - types = get_args(typeannot) - if len(types) == 1: # probably List; replicate for all items - types = types * len(dlist[0]) - dlist_T = zip(*dlist) - res_T = ( - _dataclass_list_from_dict_list(pos_list, tp) - for pos_list, tp in zip(dlist_T, types) - ) - if issubclass(cls, tuple): - return list(zip(*res_T)) - else: - return [cls(converted_as_tuple) for converted_as_tuple in zip(*res_T)] - elif issubclass(cls, dict): - # For the dictionary, call the function recursively on concatenated keys and vertices - key_t, val_t = get_args(typeannot) - all_keys_res = _dataclass_list_from_dict_list( - [k for obj in dlist for k in obj.keys()], key_t - ) - all_vals_res = _dataclass_list_from_dict_list( - [k for obj in dlist for k in obj.values()], val_t - ) - indices = np.cumsum([len(obj) for obj in dlist]) - assert indices[-1] == len(all_keys_res) - - keys = np.split(list(all_keys_res), indices[:-1]) - all_vals_res_iter = iter(all_vals_res) - return [cls(zip(k, all_vals_res_iter)) for k in keys] - elif not dataclasses.is_dataclass(typeannot): - return dlist - - # dataclass node: 2nd recursion base; call the function recursively on the lists - # of the corresponding fields - assert dataclasses.is_dataclass(cls) - fieldtypes = { - f.name: (_unwrap_type(f.type), _get_dataclass_field_default(f)) - for f in dataclasses.fields(typeannot) - } - - # NOTE the default object is shared here - key_lists = ( - _dataclass_list_from_dict_list([obj.get(k, default) for obj in dlist], type_) - for k, (type_, default) in fieldtypes.items() - ) - transposed = zip(*key_lists) - return [cls(*vals_as_tuple) for vals_as_tuple in transposed] - - -def _dataclass_from_dict(d, typeannot): - if d is None or typeannot is Any: - return d - is_optional, contained_type = _resolve_optional(typeannot) - if is_optional: - # an Optional not set to None, just use the contents of the Optional. - return _dataclass_from_dict(d, contained_type) - - cls = get_origin(typeannot) or typeannot - if issubclass(cls, tuple) and hasattr(cls, "_fields"): # namedtuple - types = cls.__annotations__.values() - return cls(*[_dataclass_from_dict(v, tp) for v, tp in zip(d, types)]) - elif issubclass(cls, (list, tuple)): - types = get_args(typeannot) - if len(types) == 1: # probably List; replicate for all items - types = types * len(d) - return cls(_dataclass_from_dict(v, tp) for v, tp in zip(d, types)) - elif issubclass(cls, dict): - key_t, val_t = get_args(typeannot) - return cls( - (_dataclass_from_dict(k, key_t), _dataclass_from_dict(v, val_t)) - for k, v in d.items() - ) - elif not dataclasses.is_dataclass(typeannot): - return d - - assert dataclasses.is_dataclass(cls) - fieldtypes = {f.name: _unwrap_type(f.type) for f in dataclasses.fields(typeannot)} - return cls(**{k: _dataclass_from_dict(v, fieldtypes[k]) for k, v in d.items()}) - - -def _unwrap_type(tp): - # strips Optional wrapper, if any - if get_origin(tp) is Union: - args = get_args(tp) - if len(args) == 2 and any(a is type(None) for a in args): # noqa: E721 - # this is typing.Optional - return args[0] if args[1] is type(None) else args[1] # noqa: E721 - return tp - - -def _get_dataclass_field_default(field: Field) -> Any: - if field.default_factory is not MISSING: - # pyre-fixme[29]: `Union[dataclasses._MISSING_TYPE, - # dataclasses._DefaultFactory[typing.Any]]` is not a function. - return field.default_factory() - elif field.default is not MISSING: - return field.default - else: - return None - - -def _asdict_rec(obj): - return dataclasses._asdict_inner(obj, dict) - - -def dump_dataclass_jgzip(outfile: str, obj: Any) -> None: - """ - Dumps obj to a gzipped json outfile. - - Args: - obj: A @dataclass or collection hiererchy including dataclasses. - outfile: The path to the output file. - """ - with gzip.GzipFile(outfile, "wb") as f: - dump_dataclass(obj, cast(IO, f), binary=True) - - -def load_dataclass_jgzip(outfile, cls): - """ - Loads a dataclass from a gzipped json outfile. - - Args: - outfile: The path to the loaded file. - cls: The type annotation of the loaded dataclass. - - Returns: - loaded_dataclass: The loaded dataclass. - """ - with gzip.GzipFile(outfile, "rb") as f: - return load_dataclass(cast(IO, f), cls, binary=True) - - -def _resolve_optional(type_: Any) -> Tuple[bool, Any]: - """Check whether `type_` is equivalent to `typing.Optional[T]` for some T.""" - if get_origin(type_) is Union: - args = get_args(type_) - if len(args) == 2 and args[1] == type(None): # noqa E721 - return True, args[0] - if type_ is Any: - return True, Any - - return False, type_ diff --git a/pytorch3d/pytorch3d/implicitron/dataset/utils.py b/pytorch3d/pytorch3d/implicitron/dataset/utils.py deleted file mode 100644 index 01573a1a772eac6284e986b2aa4cb045d274b9ea..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/utils.py +++ /dev/null @@ -1,380 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import functools -import warnings -from pathlib import Path -from typing import List, Optional, Tuple, TypeVar, Union - -import numpy as np -import torch -from PIL import Image - -from pytorch3d.io import IO -from pytorch3d.renderer.cameras import PerspectiveCameras -from pytorch3d.structures.pointclouds import Pointclouds - -DATASET_TYPE_TRAIN = "train" -DATASET_TYPE_TEST = "test" -DATASET_TYPE_KNOWN = "known" -DATASET_TYPE_UNKNOWN = "unseen" - - -class GenericWorkaround: - """ - OmegaConf.structured has a weirdness when you try to apply - it to a dataclass whose first base class is a Generic which is not - Dict. The issue is with a function called get_dict_key_value_types - in omegaconf/_utils.py. - For example this fails: - - @dataclass(eq=False) - class D(torch.utils.data.Dataset[int]): - a: int = 3 - - OmegaConf.structured(D) - - We avoid the problem by adding this class as an extra base class. - """ - - pass - - -def is_known_frame_scalar(frame_type: str) -> bool: - """ - Given a single frame type corresponding to a single frame, return whether - the frame is a known frame. - """ - return frame_type.endswith(DATASET_TYPE_KNOWN) - - -def is_known_frame( - frame_type: List[str], device: Optional[str] = None -) -> torch.BoolTensor: - """ - Given a list `frame_type` of frame types in a batch, return a tensor - of boolean flags expressing whether the corresponding frame is a known frame. - """ - # pyre-fixme[7]: Expected `BoolTensor` but got `Tensor`. - return torch.tensor( - [is_known_frame_scalar(ft) for ft in frame_type], - dtype=torch.bool, - device=device, - ) - - -def is_train_frame( - frame_type: List[str], device: Optional[str] = None -) -> torch.BoolTensor: - """ - Given a list `frame_type` of frame types in a batch, return a tensor - of boolean flags expressing whether the corresponding frame is a training frame. - """ - # pyre-fixme[7]: Expected `BoolTensor` but got `Tensor`. - return torch.tensor( - [ft.startswith(DATASET_TYPE_TRAIN) for ft in frame_type], - dtype=torch.bool, - device=device, - ) - - -def get_bbox_from_mask( - mask: np.ndarray, thr: float, decrease_quant: float = 0.05 -) -> Tuple[int, int, int, int]: - # bbox in xywh - masks_for_box = np.zeros_like(mask) - while masks_for_box.sum() <= 1.0: - masks_for_box = (mask > thr).astype(np.float32) - thr -= decrease_quant - if thr <= 0.0: - warnings.warn( - f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1 - ) - - x0, x1 = get_1d_bounds(masks_for_box.sum(axis=-2)) - y0, y1 = get_1d_bounds(masks_for_box.sum(axis=-1)) - - return x0, y0, x1 - x0, y1 - y0 - - -def crop_around_box( - tensor: torch.Tensor, bbox: torch.Tensor, impath: str = "" -) -> torch.Tensor: - # bbox is xyxy, where the upper bound is corrected with +1 - bbox = clamp_box_to_image_bounds_and_round( - bbox, - image_size_hw=tuple(tensor.shape[-2:]), - ) - tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]] - assert all(c > 0 for c in tensor.shape), f"squashed image {impath}" - return tensor - - -def clamp_box_to_image_bounds_and_round( - bbox_xyxy: torch.Tensor, - image_size_hw: Tuple[int, int], -) -> torch.LongTensor: - bbox_xyxy = bbox_xyxy.clone() - bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1]) - bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2]) - if not isinstance(bbox_xyxy, torch.LongTensor): - bbox_xyxy = bbox_xyxy.round().long() - return bbox_xyxy # pyre-ignore [7] - - -T = TypeVar("T", bound=torch.Tensor) - - -def bbox_xyxy_to_xywh(xyxy: T) -> T: - wh = xyxy[2:] - xyxy[:2] - xywh = torch.cat([xyxy[:2], wh]) - return xywh # pyre-ignore - - -def get_clamp_bbox( - bbox: torch.Tensor, - box_crop_context: float = 0.0, - image_path: str = "", -) -> torch.Tensor: - # box_crop_context: rate of expansion for bbox - # returns possibly expanded bbox xyxy as float - - bbox = bbox.clone() # do not edit bbox in place - - # increase box size - if box_crop_context > 0.0: - c = box_crop_context - bbox = bbox.float() - bbox[0] -= bbox[2] * c / 2 - bbox[1] -= bbox[3] * c / 2 - bbox[2] += bbox[2] * c - bbox[3] += bbox[3] * c - - if (bbox[2:] <= 1.0).any(): - raise ValueError( - f"squashed image {image_path}!! The bounding box contains no pixels." - ) - - bbox[2:] = torch.clamp(bbox[2:], 2) # set min height, width to 2 along both axes - bbox_xyxy = bbox_xywh_to_xyxy(bbox, clamp_size=2) - - return bbox_xyxy - - -def rescale_bbox( - bbox: torch.Tensor, - orig_res: Union[Tuple[int, int], torch.LongTensor], - new_res: Union[Tuple[int, int], torch.LongTensor], -) -> torch.Tensor: - assert bbox is not None - assert np.prod(orig_res) > 1e-8 - # average ratio of dimensions - # pyre-ignore - rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0 - return bbox * rel_size - - -def bbox_xywh_to_xyxy( - xywh: torch.Tensor, clamp_size: Optional[int] = None -) -> torch.Tensor: - xyxy = xywh.clone() - if clamp_size is not None: - xyxy[2:] = torch.clamp(xyxy[2:], clamp_size) - xyxy[2:] += xyxy[:2] - return xyxy - - -def get_1d_bounds(arr: np.ndarray) -> Tuple[int, int]: - nz = np.flatnonzero(arr) - return nz[0], nz[-1] + 1 - - -def resize_image( - image: Union[np.ndarray, torch.Tensor], - image_height: Optional[int], - image_width: Optional[int], - mode: str = "bilinear", -) -> Tuple[torch.Tensor, float, torch.Tensor]: - - if isinstance(image, np.ndarray): - image = torch.from_numpy(image) - - if image_height is None or image_width is None: - # skip the resizing - return image, 1.0, torch.ones_like(image[:1]) - # takes numpy array or tensor, returns pytorch tensor - minscale = min( - image_height / image.shape[-2], - image_width / image.shape[-1], - ) - imre = torch.nn.functional.interpolate( - image[None], - scale_factor=minscale, - mode=mode, - align_corners=False if mode == "bilinear" else None, - recompute_scale_factor=True, - )[0] - imre_ = torch.zeros(image.shape[0], image_height, image_width) - imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre - mask = torch.zeros(1, image_height, image_width) - mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 - return imre_, minscale, mask - - -def transpose_normalize_image(image: np.ndarray) -> np.ndarray: - im = np.atleast_3d(image).transpose((2, 0, 1)) - return im.astype(np.float32) / 255.0 - - -def load_image(path: str) -> np.ndarray: - with Image.open(path) as pil_im: - im = np.array(pil_im.convert("RGB")) - - return transpose_normalize_image(im) - - -def load_mask(path: str) -> np.ndarray: - with Image.open(path) as pil_im: - mask = np.array(pil_im) - - return transpose_normalize_image(mask) - - -def load_depth(path: str, scale_adjustment: float) -> np.ndarray: - if path.lower().endswith(".exr"): - # NOTE: environment variable OPENCV_IO_ENABLE_OPENEXR must be set to 1 - # You will have to accept these vulnerabilities by using OpenEXR: - # https://github.com/opencv/opencv/issues/21326 - import cv2 - - d = cv2.imread(path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)[..., 0] - d[d > 1e9] = 0.0 - elif path.lower().endswith(".png"): - d = load_16big_png_depth(path) - else: - raise ValueError('unsupported depth file name "%s"' % path) - - d = d * scale_adjustment - - d[~np.isfinite(d)] = 0.0 - return d[None] # fake feature channel - - -def load_16big_png_depth(depth_png: str) -> np.ndarray: - with Image.open(depth_png) as depth_pil: - # the image is stored with 16-bit depth but PIL reads it as I (32 bit). - # we cast it to uint16, then reinterpret as float16, then cast to float32 - depth = ( - np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16) - .astype(np.float32) - .reshape((depth_pil.size[1], depth_pil.size[0])) - ) - return depth - - -def load_1bit_png_mask(file: str) -> np.ndarray: - with Image.open(file) as pil_im: - mask = (np.array(pil_im.convert("L")) > 0.0).astype(np.float32) - return mask - - -def load_depth_mask(path: str) -> np.ndarray: - if not path.lower().endswith(".png"): - raise ValueError('unsupported depth mask file name "%s"' % path) - m = load_1bit_png_mask(path) - return m[None] # fake feature channel - - -def safe_as_tensor(data, dtype): - return torch.tensor(data, dtype=dtype) if data is not None else None - - -def _convert_ndc_to_pixels( - focal_length: torch.Tensor, - principal_point: torch.Tensor, - image_size_wh: torch.Tensor, -) -> Tuple[torch.Tensor, torch.Tensor]: - half_image_size = image_size_wh / 2 - rescale = half_image_size.min() - principal_point_px = half_image_size - principal_point * rescale - focal_length_px = focal_length * rescale - return focal_length_px, principal_point_px - - -def _convert_pixels_to_ndc( - focal_length_px: torch.Tensor, - principal_point_px: torch.Tensor, - image_size_wh: torch.Tensor, -) -> Tuple[torch.Tensor, torch.Tensor]: - half_image_size = image_size_wh / 2 - rescale = half_image_size.min() - principal_point = (half_image_size - principal_point_px) / rescale - focal_length = focal_length_px / rescale - return focal_length, principal_point - - -def adjust_camera_to_bbox_crop_( - camera: PerspectiveCameras, - image_size_wh: torch.Tensor, - clamp_bbox_xywh: torch.Tensor, -) -> None: - if len(camera) != 1: - raise ValueError("Adjusting currently works with singleton cameras camera only") - - focal_length_px, principal_point_px = _convert_ndc_to_pixels( - camera.focal_length[0], - camera.principal_point[0], - image_size_wh, - ) - principal_point_px_cropped = principal_point_px - clamp_bbox_xywh[:2] - - focal_length, principal_point_cropped = _convert_pixels_to_ndc( - focal_length_px, - principal_point_px_cropped, - clamp_bbox_xywh[2:], - ) - - camera.focal_length = focal_length[None] - camera.principal_point = principal_point_cropped[None] - - -def adjust_camera_to_image_scale_( - camera: PerspectiveCameras, - original_size_wh: torch.Tensor, - new_size_wh: torch.LongTensor, -) -> PerspectiveCameras: - focal_length_px, principal_point_px = _convert_ndc_to_pixels( - camera.focal_length[0], - camera.principal_point[0], - original_size_wh, - ) - - # now scale and convert from pixels to NDC - image_size_wh_output = new_size_wh.float() - scale = (image_size_wh_output / original_size_wh).min(dim=-1, keepdim=True).values - focal_length_px_scaled = focal_length_px * scale - principal_point_px_scaled = principal_point_px * scale - - focal_length_scaled, principal_point_scaled = _convert_pixels_to_ndc( - focal_length_px_scaled, - principal_point_px_scaled, - image_size_wh_output, - ) - camera.focal_length = focal_length_scaled[None] - camera.principal_point = principal_point_scaled[None] # pyre-ignore - - -# NOTE this cache is per-worker; they are implemented as processes. -# each batch is loaded and collated by a single worker; -# since sequences tend to co-occur within batches, this is useful. -@functools.lru_cache(maxsize=256) -def load_pointcloud(pcl_path: Union[str, Path], max_points: int = 0) -> Pointclouds: - pcl = IO().load_pointcloud(pcl_path) - if max_points > 0: - pcl = pcl.subsample(max_points) - - return pcl diff --git a/pytorch3d/pytorch3d/implicitron/dataset/visualize.py b/pytorch3d/pytorch3d/implicitron/dataset/visualize.py deleted file mode 100644 index 4ac633f6e763bd02665d5c0ebbb4aa655861a93b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/dataset/visualize.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import cast, Optional, Tuple - -import torch -from pytorch3d.implicitron.tools.point_cloud_utils import get_rgbd_point_cloud -from pytorch3d.structures import Pointclouds - -from .frame_data import FrameData -from .json_index_dataset import JsonIndexDataset - - -def get_implicitron_sequence_pointcloud( - dataset: JsonIndexDataset, - sequence_name: Optional[str] = None, - mask_points: bool = True, - max_frames: int = -1, - num_workers: int = 0, - load_dataset_point_cloud: bool = False, -) -> Tuple[Pointclouds, FrameData]: - """ - Make a point cloud by sampling random points from each frame the dataset. - """ - - if len(dataset) == 0: - raise ValueError("The dataset is empty.") - - if not dataset.load_depths: - raise ValueError("The dataset has to load depths (dataset.load_depths=True).") - - if mask_points and not dataset.load_masks: - raise ValueError( - "For mask_points=True, the dataset has to load masks" - + " (dataset.load_masks=True)." - ) - - # setup the indices of frames loaded from the dataset db - sequence_entries = list(range(len(dataset))) - if sequence_name is not None: - sequence_entries = [ - ei - for ei in sequence_entries - # pyre-ignore[16] - if dataset.frame_annots[ei]["frame_annotation"].sequence_name - == sequence_name - ] - if len(sequence_entries) == 0: - raise ValueError( - f'There are no dataset entries for sequence name "{sequence_name}".' - ) - - # subsample loaded frames if needed - if (max_frames > 0) and (len(sequence_entries) > max_frames): - sequence_entries = [ - sequence_entries[i] - for i in torch.randperm(len(sequence_entries))[:max_frames].sort().values - ] - - # take only the part of the dataset corresponding to the sequence entries - sequence_dataset = torch.utils.data.Subset(dataset, sequence_entries) - - # load the required part of the dataset - loader = torch.utils.data.DataLoader( - sequence_dataset, - batch_size=len(sequence_dataset), - shuffle=False, - num_workers=num_workers, - collate_fn=dataset.frame_data_type.collate, - ) - - frame_data = next(iter(loader)) # there's only one batch - - # scene point cloud - if load_dataset_point_cloud: - if not dataset.load_point_clouds: - raise ValueError( - "For load_dataset_point_cloud=True, the dataset has to" - + " load point clouds (dataset.load_point_clouds=True)." - ) - point_cloud = frame_data.sequence_point_cloud - - else: - point_cloud = get_rgbd_point_cloud( - frame_data.camera, - frame_data.image_rgb, - frame_data.depth_map, - (cast(torch.Tensor, frame_data.fg_probability) > 0.5).float() - if mask_points and frame_data.fg_probability is not None - else None, - ) - - return point_cloud, frame_data diff --git a/pytorch3d/pytorch3d/implicitron/eval_demo.py b/pytorch3d/pytorch3d/implicitron/eval_demo.py deleted file mode 100644 index 91e696945b1625a7889870ea5ceb3ed2efd24311..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/eval_demo.py +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import dataclasses -import os -from enum import Enum -from typing import Any, cast, Dict, List, Optional, Tuple - -import lpips -import torch -from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource -from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset -from pytorch3d.implicitron.dataset.json_index_dataset_map_provider import ( - CO3D_CATEGORIES, -) -from pytorch3d.implicitron.evaluation.evaluate_new_view_synthesis import ( - aggregate_nvs_results, - eval_batch, - pretty_print_nvs_metrics, - summarize_nvs_eval_results, -) -from pytorch3d.implicitron.models.model_dbir import ModelDBIR -from pytorch3d.implicitron.tools.utils import dataclass_to_cuda_ -from tqdm import tqdm - - -class Task(Enum): - SINGLE_SEQUENCE = "singlesequence" - MULTI_SEQUENCE = "multisequence" - - -def main() -> None: - """ - Evaluates new view synthesis metrics of a simple depth-based image rendering - (DBIR) model for multisequence/singlesequence tasks for several categories. - - The evaluation is conducted on the same data as in [1] and, hence, the results - are directly comparable to the numbers reported in [1]. - - References: - [1] J. Reizenstein, R. Shapovalov, P. Henzler, L. Sbordone, - P. Labatut, D. Novotny: - Common Objects in 3D: Large-Scale Learning - and Evaluation of Real-life 3D Category Reconstruction - """ - - task_results = {} - for task in (Task.SINGLE_SEQUENCE, Task.MULTI_SEQUENCE): - task_results[task] = [] - for category in CO3D_CATEGORIES[: (20 if task == Task.SINGLE_SEQUENCE else 10)]: - for single_sequence_id in ( - (0, 1) if task == Task.SINGLE_SEQUENCE else (None,) - ): - category_result = evaluate_dbir_for_category( - category, task=task, single_sequence_id=single_sequence_id - ) - print("") - print( - f"Results for task={task}; category={category};" - + ( - f" sequence={single_sequence_id}:" - if single_sequence_id is not None - else ":" - ) - ) - pretty_print_nvs_metrics(category_result) - print("") - - task_results[task].append(category_result) - _print_aggregate_results(task, task_results) - - for task in task_results: - _print_aggregate_results(task, task_results) - - -def evaluate_dbir_for_category( - category: str, - task: Task, - bg_color: Tuple[float, float, float] = (0.0, 0.0, 0.0), - single_sequence_id: Optional[int] = None, - num_workers: int = 16, -): - """ - Evaluates new view synthesis metrics of a simple depth-based image rendering - (DBIR) model for a given task, category, and sequence (in case task=='singlesequence'). - - Args: - category: Object category. - bg_color: Background color of the renders. - task: Evaluation task. Either singlesequence or multisequence. - single_sequence_id: The ID of the evaluiation sequence for the singlesequence task. - num_workers: The number of workers for the employed dataloaders. - path_manager: (optional) Used for interpreting paths. - - Returns: - category_result: A dictionary of quantitative metrics. - """ - - single_sequence_id = single_sequence_id if single_sequence_id is not None else -1 - - torch.manual_seed(42) - - dataset_map_provider_args = { - "category": category, - "dataset_root": os.environ["CO3D_DATASET_ROOT"], - "assert_single_seq": task == Task.SINGLE_SEQUENCE, - "task_str": task.value, - "test_on_train": False, - "test_restrict_sequence_id": single_sequence_id, - "dataset_JsonIndexDataset_args": {"load_point_clouds": True}, - } - data_source = ImplicitronDataSource( - dataset_map_provider_JsonIndexDatasetMapProvider_args=dataset_map_provider_args - ) - - datasets, dataloaders = data_source.get_datasets_and_dataloaders() - - test_dataset = datasets.test - test_dataloader = dataloaders.test - if test_dataset is None or test_dataloader is None: - raise ValueError("must have a test dataset.") - - image_size = cast(JsonIndexDataset, test_dataset).image_width - - if image_size is None: - raise ValueError("Image size should be set in the dataset") - - # init the simple DBIR model - model = ModelDBIR( - render_image_width=image_size, - render_image_height=image_size, - bg_color=bg_color, - max_points=int(1e5), - ) - model.cuda() - - # init the lpips model for eval - lpips_model = lpips.LPIPS(net="vgg") - lpips_model = lpips_model.cuda() - - per_batch_eval_results = [] - print("Evaluating DBIR model ...") - for frame_data in tqdm(test_dataloader): - frame_data = dataclass_to_cuda_(frame_data) - preds = model(**dataclasses.asdict(frame_data)) - per_batch_eval_results.append( - eval_batch( - frame_data, - preds["implicitron_render"], - bg_color=bg_color, - lpips_model=lpips_model, - ) - ) - - category_result_flat, category_result = summarize_nvs_eval_results( - per_batch_eval_results, - is_multisequence=task != Task.SINGLE_SEQUENCE, - ) - - return category_result["results"] - - -def _print_aggregate_results( - task: Task, task_results: Dict[Task, List[List[Dict[str, Any]]]] -) -> None: - """ - Prints the aggregate metrics for a given task. - """ - aggregate_task_result = aggregate_nvs_results(task_results[task]) - print("") - print(f"Aggregate results for task={task}:") - pretty_print_nvs_metrics(aggregate_task_result) - print("") - - -if __name__ == "__main__": - main() diff --git a/pytorch3d/pytorch3d/implicitron/evaluation/__init__.py b/pytorch3d/pytorch3d/implicitron/evaluation/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/evaluation/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py b/pytorch3d/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py deleted file mode 100644 index decf938b2cdc6195f29b280066cb541692978b5f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py +++ /dev/null @@ -1,596 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import copy -import warnings -from collections import OrderedDict -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union - -import numpy as np -import torch -import torch.nn.functional as F -from pytorch3d.implicitron.dataset.frame_data import FrameData -from pytorch3d.implicitron.dataset.utils import is_train_frame -from pytorch3d.implicitron.models.base_model import ImplicitronRender -from pytorch3d.implicitron.tools import vis_utils -from pytorch3d.implicitron.tools.image_utils import mask_background -from pytorch3d.implicitron.tools.metric_utils import calc_psnr, eval_depth, iou, rgb_l1 -from pytorch3d.implicitron.tools.point_cloud_utils import get_rgbd_point_cloud -from pytorch3d.implicitron.tools.vis_utils import make_depth_image -from pytorch3d.renderer.cameras import PerspectiveCameras -from pytorch3d.vis.plotly_vis import plot_scene -from tabulate import tabulate - -if TYPE_CHECKING: - from visdom import Visdom - - -EVAL_N_SRC_VIEWS = [1, 3, 5, 7, 9] - - -@dataclass -class _Visualizer: - image_render: torch.Tensor - image_rgb_masked: torch.Tensor - depth_render: torch.Tensor - depth_map: Optional[torch.Tensor] - depth_mask: Optional[torch.Tensor] - - visdom_env: str = "eval_debug" - - _viz: Optional["Visdom"] = field(init=False) - - def __post_init__(self): - self._viz = vis_utils.get_visdom_connection() - - def show_rgb( - self, loss_value: float, metric_name: str, loss_mask_now: torch.Tensor - ): - if self._viz is None: - return - self._viz.images( - torch.cat( - ( - self.image_render, - self.image_rgb_masked, - loss_mask_now.repeat(1, 3, 1, 1), - ), - dim=3, - ), - env=self.visdom_env, - win=metric_name, - opts={"title": f"{metric_name}_{loss_value:1.2f}"}, - ) - - def show_depth( - self, depth_loss: float, name_postfix: str, loss_mask_now: torch.Tensor - ): - if self._viz is None: - return - viz = self._viz - viz.images( - torch.cat( - (make_depth_image(self.depth_render, loss_mask_now),) - + ( - (make_depth_image(self.depth_map, loss_mask_now),) - if self.depth_map is not None - else () - ), - dim=3, - ), - env=self.visdom_env, - win="depth_abs" + name_postfix, - opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}"}, - ) - viz.images( - loss_mask_now, - env=self.visdom_env, - win="depth_abs" + name_postfix + "_mask", - opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}_mask"}, - ) - if self.depth_mask is not None: - viz.images( - self.depth_mask, - env=self.visdom_env, - win="depth_abs" + name_postfix + "_maskd", - opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}_maskd"}, - ) - - # show the 3D plot - # pyre-fixme[9]: viewpoint_trivial has type `PerspectiveCameras`; used as - # `TensorProperties`. - viewpoint_trivial: PerspectiveCameras = PerspectiveCameras().to( - loss_mask_now.device - ) - _pcls = { - "pred_depth": get_rgbd_point_cloud( - viewpoint_trivial, - self.image_render, - self.depth_render, - # mask_crop, - torch.ones_like(self.depth_render), - # loss_mask_now, - ) - } - if self.depth_map is not None: - _pcls["gt_depth"] = get_rgbd_point_cloud( - viewpoint_trivial, - self.image_rgb_masked, - self.depth_map, - # mask_crop, - torch.ones_like(self.depth_map), - # loss_mask_now, - ) - - _pcls = {pn: p for pn, p in _pcls.items() if int(p.num_points_per_cloud()) > 0} - - plotlyplot = plot_scene( - {f"pcl{name_postfix}": _pcls}, # pyre-ignore - camera_scale=1.0, - pointcloud_max_points=10000, - pointcloud_marker_size=1, - ) - viz.plotlyplot( - plotlyplot, - env=self.visdom_env, - win=f"pcl{name_postfix}", - ) - - -def eval_batch( - frame_data: FrameData, - implicitron_render: ImplicitronRender, - bg_color: Union[torch.Tensor, Sequence, str, float] = "black", - mask_thr: float = 0.5, - lpips_model=None, - visualize: bool = False, - visualize_visdom_env: str = "eval_debug", - break_after_visualising: bool = True, -) -> Dict[str, Any]: - """ - Produce performance metrics for a single batch of new-view synthesis - predictions. - - Given a set of known views (for which frame_data.frame_type.endswith('known') - is True), a new-view synthesis method (NVS) is tasked to generate new views - of the scene from the viewpoint of the target views (for which - frame_data.frame_type.endswith('known') is False). The resulting - synthesized new views, stored in `implicitron_render`, are compared to the - target ground truth in `frame_data` in terms of geometry and appearance - resulting in a dictionary of metrics returned by the `eval_batch` function. - - Args: - frame_data: A FrameData object containing the input to the new view - synthesis method. - implicitron_render: The data describing the synthesized new views. - bg_color: The background color of the generated new views and the - ground truth. - lpips_model: A pre-trained model for evaluating the LPIPS metric. - visualize: If True, visualizes the results to Visdom. - - Returns: - results: A dictionary holding evaluation metrics. - - Throws: - ValueError if frame_data does not have frame_type, camera, or image_rgb - ValueError if the batch has a mix of training and test samples - ValueError if the batch frames are not [unseen, known, known, ...] - ValueError if one of the required fields in implicitron_render is missing - """ - frame_type = frame_data.frame_type - if frame_type is None: - raise ValueError("Frame type has not been set.") - - # we check that all those fields are not None but Pyre can't infer that properly - # TODO: assign to local variables and simplify the code. - if frame_data.image_rgb is None: - raise ValueError("Image is not in the evaluation batch.") - - if frame_data.camera is None: - raise ValueError("Camera is not in the evaluation batch.") - - # eval all results in the resolution of the frame_data image - image_resol = tuple(frame_data.image_rgb.shape[2:]) - - # Post-process the render: - # 1) check implicitron_render for Nones, - # 2) obtain copies to make sure we dont edit the original data, - # 3) take only the 1st (target) image - # 4) resize to match ground-truth resolution - cloned_render: Dict[str, torch.Tensor] = {} - for k in ["mask_render", "image_render", "depth_render"]: - field = getattr(implicitron_render, k) - if field is None: - raise ValueError(f"A required predicted field {k} is missing") - - imode = "bilinear" if k == "image_render" else "nearest" - cloned_render[k] = ( - F.interpolate(field[:1], size=image_resol, mode=imode).detach().clone() - ) - - frame_data = copy.deepcopy(frame_data) - - # mask the ground truth depth in case frame_data contains the depth mask - if frame_data.depth_map is not None and frame_data.depth_mask is not None: - frame_data.depth_map *= frame_data.depth_mask - - if not isinstance(frame_type, list): # not batch FrameData - frame_type = [frame_type] - - is_train = is_train_frame(frame_type) - if len(is_train) > 1 and (is_train[1] != is_train[1:]).any(): - raise ValueError( - "All (conditioning) frames in the eval batch have to be either train/test." - ) - - for k in [ - "depth_map", - "image_rgb", - "fg_probability", - "mask_crop", - ]: - if not hasattr(frame_data, k) or getattr(frame_data, k) is None: - continue - setattr(frame_data, k, getattr(frame_data, k)[:1]) - - if frame_data.depth_map is None or frame_data.depth_map.sum() <= 0: - warnings.warn("Empty or missing depth map in evaluation!") - - if frame_data.mask_crop is None: - warnings.warn("mask_crop is None, assuming the whole image is valid.") - - if frame_data.fg_probability is None: - warnings.warn("fg_probability is None, assuming the whole image is fg.") - - # threshold the masks to make ground truth binary masks - mask_fg = ( - frame_data.fg_probability >= mask_thr - if frame_data.fg_probability is not None - # pyre-ignore [16] - else torch.ones_like(frame_data.image_rgb[:, :1, ...]).bool() - ) - - mask_crop = ( - frame_data.mask_crop - if frame_data.mask_crop is not None - else torch.ones_like(mask_fg) - ) - - # unmasked g.t. image - image_rgb = frame_data.image_rgb - - # fg-masked g.t. image - image_rgb_masked = mask_background( - # pyre-fixme[6]: Expected `Tensor` for 1st param but got - # `Optional[torch.Tensor]`. - frame_data.image_rgb, - mask_fg, - bg_color=bg_color, - ) - - # clamp predicted images - image_render = cloned_render["image_render"].clamp(0.0, 1.0) - - if visualize: - visualizer = _Visualizer( - image_render=image_render, - image_rgb_masked=image_rgb_masked, - depth_render=cloned_render["depth_render"], - depth_map=frame_data.depth_map, - depth_mask=frame_data.depth_mask[:1] - if frame_data.depth_mask is not None - else None, - visdom_env=visualize_visdom_env, - ) - - results: Dict[str, Any] = {} - - results["iou"] = iou( - cloned_render["mask_render"], - mask_fg, - mask=mask_crop, - ) - - for loss_fg_mask, name_postfix in zip((mask_crop, mask_fg), ("_masked", "_fg")): - - loss_mask_now = mask_crop * loss_fg_mask - - for rgb_metric_name, rgb_metric_fun in zip( - ("psnr", "rgb_l1"), (calc_psnr, rgb_l1) - ): - metric_name = rgb_metric_name + name_postfix - results[metric_name] = rgb_metric_fun( - image_render, - image_rgb_masked, - mask=loss_mask_now, - ) - - if visualize: - visualizer.show_rgb( - results[metric_name].item(), metric_name, loss_mask_now - ) - - if name_postfix == "_fg" and frame_data.depth_map is not None: - # only record depth metrics for the foreground - _, abs_ = eval_depth( - cloned_render["depth_render"], - # pyre-fixme[6]: For 2nd param expected `Tensor` but got - # `Optional[Tensor]`. - frame_data.depth_map, - get_best_scale=True, - mask=loss_mask_now, - crop=5, - ) - results["depth_abs" + name_postfix] = abs_.mean() - - if visualize: - visualizer.show_depth(abs_.mean().item(), name_postfix, loss_mask_now) - if break_after_visualising: - breakpoint() # noqa: B601 - - # add the rgb metrics between the render and the unmasked image - for rgb_metric_name, rgb_metric_fun in zip( - ("psnr_full_image", "rgb_l1_full_image"), (calc_psnr, rgb_l1) - ): - results[rgb_metric_name] = rgb_metric_fun( - image_render, - # pyre-fixme[6]: For 2nd argument expected `Tensor` but got - # `Optional[Tensor]`. - image_rgb, - mask=mask_crop, - ) - - if lpips_model is not None: - for gt_image_type in ("_full_image", "_masked"): - im1, im2 = [ - 2.0 * im.clamp(0.0, 1.0) - 1.0 # pyre-ignore[16] - for im in ( - image_rgb_masked if gt_image_type == "_masked" else image_rgb, - cloned_render["image_render"], - ) - ] - results["lpips" + gt_image_type] = lpips_model.forward(im1, im2).item() - - # convert all metrics to floats - results = {k: float(v) for k, v in results.items()} - - results["meta"] = { - # store the size of the batch (corresponds to n_src_views+1) - "batch_size": len(frame_type), - # store the type of the target frame - # pyre-fixme[16]: `None` has no attribute `__getitem__`. - "frame_type": str(frame_data.frame_type[0]), - } - - return results - - -def average_per_batch_results( - results_per_batch: List[Dict[str, Any]], - idx: Optional[torch.Tensor] = None, -) -> dict: - """ - Average a list of per-batch metrics `results_per_batch`. - Optionally, if `idx` is given, only a subset of the per-batch - metrics, indexed by `idx`, is averaged. - """ - result_keys = list(results_per_batch[0].keys()) - result_keys.remove("meta") - if idx is not None: - results_per_batch = [results_per_batch[i] for i in idx] - if len(results_per_batch) == 0: - return {k: float("NaN") for k in result_keys} - return { - k: float(np.array([r[k] for r in results_per_batch]).mean()) - for k in result_keys - } - - -def _reduce_camera_iou_overlap(ious: torch.Tensor, topk: int = 2) -> torch.Tensor: - """ - Calculate the final camera difficulty by computing the average of the - ious of the two most similar cameras. - - Returns: - single-element Tensor - """ - return ious.topk(k=min(topk, len(ious) - 1)).values.mean() - - -def _get_camera_difficulty_bin_edges(camera_difficulty_bin_breaks: Tuple[float, float]): - """ - Get the edges of camera difficulty bins. - """ - _eps = 1e-5 - lower, upper = camera_difficulty_bin_breaks - diff_bin_edges = torch.tensor([0.0 - _eps, lower, upper, 1.0 + _eps]).float() - diff_bin_names = ["hard", "medium", "easy"] - return diff_bin_edges, diff_bin_names - - -def summarize_nvs_eval_results( - per_batch_eval_results: List[Dict[str, Any]], - is_multisequence: bool, -) -> Tuple[Dict[str, Any], Dict[str, Any]]: - """ - Compile the per-batch evaluation results `per_batch_eval_results` into - a set of aggregate metrics. The produced metrics depend on is_multisequence. - - Args: - per_batch_eval_results: Metrics of each per-batch evaluation. - is_multisequence: Whether to evaluate as a multisequence task - camera_difficulty_bin_breaks: edge hard-medium and medium-easy - - - Returns: - nvs_results_flat: A flattened dict of all aggregate metrics. - aux_out: A dictionary holding a set of auxiliary results. - """ - n_batches = len(per_batch_eval_results) - eval_sets: List[Optional[str]] = [] - eval_sets = [None] - if is_multisequence: - eval_sets = ["train", "test"] - batch_sizes = torch.tensor( - [r["meta"]["batch_size"] for r in per_batch_eval_results] - ).long() - - is_train = is_train_frame([r["meta"]["frame_type"] for r in per_batch_eval_results]) - - # init the result database dict - results = [] - - # add per set averages - for SET in eval_sets: - if SET is None: - ok_set = torch.ones(n_batches, dtype=torch.bool) - set_name = "test" - else: - ok_set = is_train == int(SET == "train") - set_name = SET - - # average over all results - bin_results = average_per_batch_results( - per_batch_eval_results, idx=torch.where(ok_set)[0] - ) - results.append( - { - "subset": set_name, - "subsubset": "diff=all", - "metrics": bin_results, - } - ) - - if is_multisequence: - # split based on n_src_views - n_src_views = batch_sizes - 1 - for n_src in EVAL_N_SRC_VIEWS: - ok_src = ok_set & (n_src_views == n_src) - n_src_results = average_per_batch_results( - per_batch_eval_results, - idx=torch.where(ok_src)[0], - ) - results.append( - { - "subset": set_name, - "subsubset": f"n_src={int(n_src)}", - "metrics": n_src_results, - } - ) - - aux_out = {"results": results} - return flatten_nvs_results(results), aux_out - - -def _get_flat_nvs_metric_key(result, metric_name) -> str: - metric_key_postfix = f"|subset={result['subset']}|{result['subsubset']}" - metric_key = f"{metric_name}{metric_key_postfix}" - return metric_key - - -def flatten_nvs_results(results) -> Dict[str, Any]: - """ - Takes input `results` list of dicts of the form:: - - [ - { - 'subset':'train/test/...', - 'subsubset': 'src=1/src=2/...', - 'metrics': nvs_eval_metrics} - }, - ... - ] - - And converts to a flat dict as follows:: - - { - 'subset=train/test/...|subsubset=src=1/src=2/...': nvs_eval_metrics, - ... - } - """ - results_flat = {} - for result in results: - for metric_name, metric_val in result["metrics"].items(): - metric_key = _get_flat_nvs_metric_key(result, metric_name) - assert metric_key not in results_flat - results_flat[metric_key] = metric_val - return results_flat - - -def pretty_print_nvs_metrics(results) -> None: - subsets, subsubsets = [ - _ordered_set([r[k] for r in results]) for k in ("subset", "subsubset") - ] - metrics = _ordered_set([metric for r in results for metric in r["metrics"]]) - - for subset in subsets: - tab = {} - for metric in metrics: - tab[metric] = [] - header = ["metric"] - for subsubset in subsubsets: - metric_vals = [ - r["metrics"][metric] - for r in results - if r["subsubset"] == subsubset and r["subset"] == subset - ] - if len(metric_vals) > 0: - tab[metric].extend(metric_vals) - header.extend(subsubsets) - - if any(len(v) > 0 for v in tab.values()): - print(f"===== NVS results; subset={subset} =====") - print( - tabulate( - [[metric, *v] for metric, v in tab.items()], - # pyre-fixme[61]: `header` is undefined, or not always defined. - headers=header, - ) - ) - - -def _ordered_set(list_): - return list(OrderedDict((i, 0) for i in list_).keys()) - - -def aggregate_nvs_results(task_results): - """ - Aggregate nvs results. - For singlescene, this averages over all categories and scenes, - for multiscene, the average is over all per-category results. - """ - task_results_cat = [r_ for r in task_results for r_ in r] - subsets, subsubsets = [ - _ordered_set([r[k] for r in task_results_cat]) for k in ("subset", "subsubset") - ] - metrics = _ordered_set( - [metric for r in task_results_cat for metric in r["metrics"]] - ) - average_results = [] - for subset in subsets: - for subsubset in subsubsets: - metrics_lists = [ - r["metrics"] - for r in task_results_cat - if r["subsubset"] == subsubset and r["subset"] == subset - ] - avg_metrics = {} - for metric in metrics: - avg_metrics[metric] = float( - np.nanmean( - np.array([metric_list[metric] for metric_list in metrics_lists]) - ) - ) - average_results.append( - { - "subset": subset, - "subsubset": subsubset, - "metrics": avg_metrics, - } - ) - return average_results diff --git a/pytorch3d/pytorch3d/implicitron/evaluation/evaluator.py b/pytorch3d/pytorch3d/implicitron/evaluation/evaluator.py deleted file mode 100644 index e869a6566bf3742655ef70b6343d15ae7407b8e9..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/evaluation/evaluator.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import copy -import json -import logging -import os -import warnings -from typing import Any, Dict, List, Optional, Tuple - -import torch - -import tqdm -from pytorch3d.implicitron.evaluation import evaluate_new_view_synthesis as evaluate -from pytorch3d.implicitron.models.base_model import EvaluationMode, ImplicitronModelBase -from pytorch3d.implicitron.tools.config import ( - registry, - ReplaceableBase, - run_auto_creation, -) -from torch.utils.data import DataLoader - -logger = logging.getLogger(__name__) - - -class EvaluatorBase(ReplaceableBase): - """ - Evaluate a trained model on given data. Returns a dict of loss/objective - names and their values. - """ - - is_multisequence: bool = False - - def run( - self, model: ImplicitronModelBase, dataloader: DataLoader, **kwargs - ) -> Dict[str, Any]: - """ - Evaluate the results of Implicitron training. - """ - raise NotImplementedError() - - -@registry.register -class ImplicitronEvaluator(EvaluatorBase): - """ - Evaluate the results of Implicitron training. - """ - - # UNUSED; preserved for compatibility purposes - camera_difficulty_bin_breaks: Tuple[float, ...] = 0.97, 0.98 - - def __post_init__(self): - run_auto_creation(self) - - # pyre-fixme[14]: `run` overrides method defined in `EvaluatorBase` inconsistently. - def run( - self, - model: ImplicitronModelBase, - dataloader: DataLoader, - device: torch.device, - dump_to_json: bool = False, - exp_dir: Optional[str] = None, - epoch: Optional[int] = None, - **kwargs, - ) -> Dict[str, Any]: - """ - Evaluate the results of Implicitron training. Optionally, dump results to - exp_dir/results_test.json. - - Args: - model: A (trained) model to evaluate. - dataloader: A test dataloader. - device: A torch device. - dump_to_json: If True, will dump the results to a json file. - exp_dir: Root expeirment directory. - epoch: Evaluation epoch (to be stored in the results dict). - - Returns: - A dictionary of results. - """ - try: - import lpips - - lpips_model = lpips.LPIPS(net="vgg") - lpips_model = lpips_model.to(device) - except ImportError: - warnings.warn( - "lpips library NOT FOUND. lpips losses will not be calculated" - ) - lpips_model = None - - model.eval() - - per_batch_eval_results = [] - logger.info("Evaluating model ...") - for frame_data in tqdm.tqdm(dataloader): - frame_data = frame_data.to(device) - - # mask out the unknown images so that the model does not see them - frame_data_for_eval = _get_eval_frame_data(frame_data) - - with torch.no_grad(): - preds = model( - **{ - **frame_data_for_eval, - "evaluation_mode": EvaluationMode.EVALUATION, - } - ) - implicitron_render = copy.deepcopy(preds["implicitron_render"]) - per_batch_eval_results.append( - evaluate.eval_batch( - frame_data, - implicitron_render, - bg_color="black", - lpips_model=lpips_model, - ) - ) - - _, category_result = evaluate.summarize_nvs_eval_results( - per_batch_eval_results, - self.is_multisequence, - ) - - results = category_result["results"] - evaluate.pretty_print_nvs_metrics(results) - if dump_to_json: - _dump_to_json(epoch, exp_dir, results) - - return category_result["results"] - - -def _dump_to_json( - epoch: Optional[int], exp_dir: Optional[str], results: List[Dict[str, Any]] -) -> None: - if epoch is not None: - for r in results: - r["eval_epoch"] = int(epoch) - logger.info("Evaluation results") - - if exp_dir is None: - raise ValueError("Cannot save results to json without a specified save path.") - with open(os.path.join(exp_dir, "results_test.json"), "w") as f: - json.dump(results, f) - - -def _get_eval_frame_data(frame_data: Any) -> Any: - """ - Masks the target image data to make sure we cannot use it at model evaluation - time. Assumes the first batch element is target, the rest are source. - """ - frame_data_for_eval = copy.deepcopy(frame_data) - for k in ("image_rgb", "depth_map", "fg_probability", "mask_crop"): - value = getattr(frame_data_for_eval, k) - value[0].zero_() - return frame_data_for_eval diff --git a/pytorch3d/pytorch3d/implicitron/models/__init__.py b/pytorch3d/pytorch3d/implicitron/models/__init__.py deleted file mode 100644 index 5a3ab83f3ae77e605eeb043fa4a29e246bdc1a91..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Allows to register the models -# see: pytorch3d.implicitron.tools.config.registry:register -from pytorch3d.implicitron.models.generic_model import GenericModel -from pytorch3d.implicitron.models.overfit_model import OverfitModel diff --git a/pytorch3d/pytorch3d/implicitron/models/base_model.py b/pytorch3d/pytorch3d/implicitron/models/base_model.py deleted file mode 100644 index bd48bf7f8b6a59258375450e905a0e2db0a31601..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/base_model.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional - -import torch - -from pytorch3d.implicitron.models.renderer.base import EvaluationMode -from pytorch3d.implicitron.tools.config import ReplaceableBase -from pytorch3d.renderer.cameras import CamerasBase - - -@dataclass -class ImplicitronRender: - """ - Holds the tensors that describe a result of rendering. - """ - - depth_render: Optional[torch.Tensor] = None - image_render: Optional[torch.Tensor] = None - mask_render: Optional[torch.Tensor] = None - camera_distance: Optional[torch.Tensor] = None - - def clone(self) -> "ImplicitronRender": - def safe_clone(t: Optional[torch.Tensor]) -> Optional[torch.Tensor]: - return t.detach().clone() if t is not None else None - - return ImplicitronRender( - depth_render=safe_clone(self.depth_render), - image_render=safe_clone(self.image_render), - mask_render=safe_clone(self.mask_render), - camera_distance=safe_clone(self.camera_distance), - ) - - -class ImplicitronModelBase(ReplaceableBase, torch.nn.Module): - """ - Replaceable abstract base for all image generation / rendering models. - `forward()` method produces a render with a depth map. Derives from Module - so we can rely on basic functionality provided to torch for model - optimization. - """ - - # The keys from `preds` (output of ImplicitronModelBase.forward) to be logged in - # the training loop. - log_vars: List[str] = field(default_factory=lambda: ["objective"]) - - def forward( - self, - *, # force keyword-only arguments - image_rgb: Optional[torch.Tensor], - camera: CamerasBase, - fg_probability: Optional[torch.Tensor], - mask_crop: Optional[torch.Tensor], - depth_map: Optional[torch.Tensor], - sequence_name: Optional[List[str]], - evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION, - **kwargs, - ) -> Dict[str, Any]: - """ - Args: - image_rgb: A tensor of shape `(B, 3, H, W)` containing a batch of rgb images; - the first `min(B, n_train_target_views)` images are considered targets and - are used to supervise the renders; the rest corresponding to the source - viewpoints from which features will be extracted. - camera: An instance of CamerasBase containing a batch of `B` cameras corresponding - to the viewpoints of target images, from which the rays will be sampled, - and source images, which will be used for intersecting with target rays. - fg_probability: A tensor of shape `(B, 1, H, W)` containing a batch of - foreground masks. - mask_crop: A binary tensor of shape `(B, 1, H, W)` deonting valid - regions in the input images (i.e. regions that do not correspond - to, e.g., zero-padding). When the `RaySampler`'s sampling mode is set to - "mask_sample", rays will be sampled in the non zero regions. - depth_map: A tensor of shape `(B, 1, H, W)` containing a batch of depth maps. - sequence_name: A list of `B` strings corresponding to the sequence names - from which images `image_rgb` were extracted. They are used to match - target frames with relevant source frames. - evaluation_mode: one of EvaluationMode.TRAINING or - EvaluationMode.EVALUATION which determines the settings used for - rendering. - - Returns: - preds: A dictionary containing all outputs of the forward pass. All models should - output an instance of `ImplicitronRender` in `preds["implicitron_render"]`. - """ - raise NotImplementedError() diff --git a/pytorch3d/pytorch3d/implicitron/models/feature_extractor/__init__.py b/pytorch3d/pytorch3d/implicitron/models/feature_extractor/__init__.py deleted file mode 100644 index 9141562c848ec7d21b0e4aeb953fe962b8d8a325..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/feature_extractor/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .feature_extractor import FeatureExtractorBase diff --git a/pytorch3d/pytorch3d/implicitron/models/feature_extractor/feature_extractor.py b/pytorch3d/pytorch3d/implicitron/models/feature_extractor/feature_extractor.py deleted file mode 100644 index 9ce7f5e56b033293321b9cc73b31962bd49a249c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/feature_extractor/feature_extractor.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Any, Dict, Optional - -import torch -from pytorch3d.implicitron.tools.config import ReplaceableBase - - -class FeatureExtractorBase(ReplaceableBase, torch.nn.Module): - """ - Base class for an extractor of a set of features from images. - """ - - def get_feat_dims(self) -> int: - """ - Returns: - total number of feature dimensions of the output. - (i.e. sum_i(dim_i)) - """ - raise NotImplementedError - - def forward( - self, - imgs: Optional[torch.Tensor], - masks: Optional[torch.Tensor] = None, - **kwargs, - ) -> Dict[Any, torch.Tensor]: - """ - Args: - imgs: A batch of input images of shape `(B, 3, H, W)`. - masks: A batch of input masks of shape `(B, 3, H, W)`. - - Returns: - out_feats: A dict `{f_i: t_i}` keyed by predicted feature names `f_i` - and their corresponding tensors `t_i` of shape `(B, dim_i, H_i, W_i)`. - """ - raise NotImplementedError diff --git a/pytorch3d/pytorch3d/implicitron/models/feature_extractor/resnet_feature_extractor.py b/pytorch3d/pytorch3d/implicitron/models/feature_extractor/resnet_feature_extractor.py deleted file mode 100644 index ca7cefd0e84ffc6cb930ca8dd965f0454bcaf5e8..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/feature_extractor/resnet_feature_extractor.py +++ /dev/null @@ -1,212 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import logging -import math -from typing import Any, Dict, Optional, Tuple - -import torch -import torch.nn.functional as Fu -import torchvision -from pytorch3d.implicitron.tools.config import registry - -from . import FeatureExtractorBase - - -logger = logging.getLogger(__name__) - -MASK_FEATURE_NAME = "mask" -IMAGE_FEATURE_NAME = "image" - -_FEAT_DIMS = { - "resnet18": (64, 128, 256, 512), - "resnet34": (64, 128, 256, 512), - "resnet50": (256, 512, 1024, 2048), - "resnet101": (256, 512, 1024, 2048), - "resnet152": (256, 512, 1024, 2048), -} - -_RESNET_MEAN = [0.485, 0.456, 0.406] -_RESNET_STD = [0.229, 0.224, 0.225] - - -@registry.register -class ResNetFeatureExtractor(FeatureExtractorBase): - """ - Implements an image feature extractor. Depending on the settings allows - to extract: - - deep features: A CNN ResNet backbone from torchvision (with/without - pretrained weights) which extracts deep features. - - masks: Segmentation masks. - - images: Raw input RGB images. - - Settings: - name: name of the resnet backbone (from torchvision) - pretrained: If true, will load the pretrained weights - stages: List of stages from which to extract features. - Features from each stage are returned as key value - pairs in the forward function - normalize_image: If set will normalize the RGB values of - the image based on the Resnet mean/std - image_rescale: If not 1.0, this rescale factor will be - used to resize the image - first_max_pool: If set, a max pool layer is added after the first - convolutional layer - proj_dim: The number of output channels for the convolutional layers - l2_norm: If set, l2 normalization is applied to the extracted features - add_masks: If set, the masks will be saved in the output dictionary - add_images: If set, the images will be saved in the output dictionary - global_average_pool: If set, global average pooling step is performed - feature_rescale: If not 1.0, this rescale factor will be used to - rescale the output features - """ - - name: str = "resnet34" - pretrained: bool = True - stages: Tuple[int, ...] = (1, 2, 3, 4) - normalize_image: bool = True - image_rescale: float = 128 / 800.0 - first_max_pool: bool = True - proj_dim: int = 32 - l2_norm: bool = True - add_masks: bool = True - add_images: bool = True - global_average_pool: bool = False # this can simulate global/non-spacial features - feature_rescale: float = 1.0 - - def __post_init__(self): - if self.normalize_image: - # register buffers needed to normalize the image - for k, v in (("_resnet_mean", _RESNET_MEAN), ("_resnet_std", _RESNET_STD)): - self.register_buffer( - k, - torch.FloatTensor(v).view(1, 3, 1, 1), - persistent=False, - ) - - self._feat_dim = {} - - if len(self.stages) == 0: - # do not extract any resnet features - pass - else: - net = getattr(torchvision.models, self.name)(pretrained=self.pretrained) - if self.first_max_pool: - self.stem = torch.nn.Sequential( - net.conv1, net.bn1, net.relu, net.maxpool - ) - else: - self.stem = torch.nn.Sequential(net.conv1, net.bn1, net.relu) - self.max_stage = max(self.stages) - self.layers = torch.nn.ModuleList() - self.proj_layers = torch.nn.ModuleList() - for stage in range(self.max_stage): - stage_name = f"layer{stage+1}" - feature_name = self._get_resnet_stage_feature_name(stage) - if (stage + 1) in self.stages: - if ( - self.proj_dim > 0 - and _FEAT_DIMS[self.name][stage] > self.proj_dim - ): - proj = torch.nn.Conv2d( - _FEAT_DIMS[self.name][stage], - self.proj_dim, - 1, - 1, - bias=True, - ) - self._feat_dim[feature_name] = self.proj_dim - else: - proj = torch.nn.Identity() - self._feat_dim[feature_name] = _FEAT_DIMS[self.name][stage] - else: - proj = torch.nn.Identity() - self.proj_layers.append(proj) - self.layers.append(getattr(net, stage_name)) - - if self.add_masks: - self._feat_dim[MASK_FEATURE_NAME] = 1 - - if self.add_images: - self._feat_dim[IMAGE_FEATURE_NAME] = 3 - - logger.info(f"Feat extractor total dim = {self.get_feat_dims()}") - self.stages = set(self.stages) # convert to set for faster "in" - - def _get_resnet_stage_feature_name(self, stage) -> str: - return f"res_layer_{stage+1}" - - def _resnet_normalize_image(self, img: torch.Tensor) -> torch.Tensor: - return (img - self._resnet_mean) / self._resnet_std - - def get_feat_dims(self) -> int: - return sum(self._feat_dim.values()) - - def forward( - self, - imgs: Optional[torch.Tensor], - masks: Optional[torch.Tensor] = None, - **kwargs, - ) -> Dict[Any, torch.Tensor]: - """ - Args: - imgs: A batch of input images of shape `(B, 3, H, W)`. - masks: A batch of input masks of shape `(B, 3, H, W)`. - - Returns: - out_feats: A dict `{f_i: t_i}` keyed by predicted feature names `f_i` - and their corresponding tensors `t_i` of shape `(B, dim_i, H_i, W_i)`. - """ - - out_feats = {} - - imgs_input = imgs - if self.image_rescale != 1.0 and imgs_input is not None: - imgs_resized = Fu.interpolate( - imgs_input, - scale_factor=self.image_rescale, - mode="bilinear", - ) - else: - imgs_resized = imgs_input - - if len(self.stages) > 0: - assert imgs_resized is not None - - if self.normalize_image: - imgs_normed = self._resnet_normalize_image(imgs_resized) - else: - imgs_normed = imgs_resized - # is not a function. - feats = self.stem(imgs_normed) - for stage, (layer, proj) in enumerate(zip(self.layers, self.proj_layers)): - feats = layer(feats) - # just a sanity check below - assert feats.shape[1] == _FEAT_DIMS[self.name][stage] - if (stage + 1) in self.stages: - f = proj(feats) - if self.global_average_pool: - f = f.mean(dims=(2, 3)) - if self.l2_norm: - normfac = 1.0 / math.sqrt(len(self.stages)) - f = Fu.normalize(f, dim=1) * normfac - feature_name = self._get_resnet_stage_feature_name(stage) - out_feats[feature_name] = f - - if self.add_masks: - assert masks is not None - out_feats[MASK_FEATURE_NAME] = masks - - if self.add_images: - assert imgs_resized is not None - out_feats[IMAGE_FEATURE_NAME] = imgs_resized - - if self.feature_rescale != 1.0: - out_feats = {k: self.feature_rescale * f for k, f in out_feats.items()} - - # pyre-fixme[7]: Incompatible return type, expected `Dict[typing.Any, Tensor]` - # but got `Dict[typing.Any, float]` - return out_feats diff --git a/pytorch3d/pytorch3d/implicitron/models/generic_model.py b/pytorch3d/pytorch3d/implicitron/models/generic_model.py deleted file mode 100644 index 7d319594a8db13f4a0c396998599ec374892b660..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/generic_model.py +++ /dev/null @@ -1,765 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -# Note: The #noqa comments below are for unused imports of pluggable implementations -# which are part of implicitron. They ensure that the registry is prepopulated. - -import logging -from dataclasses import field -from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING, Union - -import torch -from omegaconf import DictConfig - -from pytorch3d.implicitron.models.base_model import ( - ImplicitronModelBase, - ImplicitronRender, -) -from pytorch3d.implicitron.models.feature_extractor import FeatureExtractorBase -from pytorch3d.implicitron.models.global_encoder.global_encoder import GlobalEncoderBase -from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase -from pytorch3d.implicitron.models.metrics import ( - RegularizationMetricsBase, - ViewMetricsBase, -) - -from pytorch3d.implicitron.models.renderer.base import ( - BaseRenderer, - EvaluationMode, - ImplicitFunctionWrapper, - ImplicitronRayBundle, - RendererOutput, - RenderSamplingMode, -) -from pytorch3d.implicitron.models.renderer.ray_sampler import RaySamplerBase - -from pytorch3d.implicitron.models.utils import ( - apply_chunked, - chunk_generator, - log_loss_weights, - preprocess_input, - weighted_sum_losses, -) -from pytorch3d.implicitron.models.view_pooler.view_pooler import ViewPooler -from pytorch3d.implicitron.tools import vis_utils -from pytorch3d.implicitron.tools.config import ( - expand_args_fields, - registry, - run_auto_creation, -) - -from pytorch3d.implicitron.tools.rasterize_mc import rasterize_sparse_ray_bundle -from pytorch3d.renderer import utils as rend_utils -from pytorch3d.renderer.cameras import CamerasBase - - -if TYPE_CHECKING: - from visdom import Visdom -logger = logging.getLogger(__name__) - - -@registry.register -class GenericModel(ImplicitronModelBase): # pyre-ignore: 13 - """ - GenericModel is a wrapper for the neural implicit - rendering and reconstruction pipeline which consists - of the following sequence of 7 steps (steps 2–4 are normally - skipped in overfitting scenario, since conditioning on source views - does not add much information; otherwise they should be present altogether): - - - (1) Ray Sampling - ------------------ - Rays are sampled from an image grid based on the target view(s). - β”‚_____________ - β”‚ β”‚ - β”‚ β–Ό - β”‚ (2) Feature Extraction (optional) - β”‚ ----------------------- - β”‚ A feature extractor (e.g. a convolutional - β”‚ neural net) is used to extract image features - β”‚ from the source view(s). - β”‚ β”‚ - β”‚ β–Ό - β”‚ (3) View Sampling (optional) - β”‚ ------------------ - β”‚ Image features are sampled at the 2D projections - β”‚ of a set of 3D points along each of the sampled - β”‚ target rays from (1). - β”‚ β”‚ - β”‚ β–Ό - β”‚ (4) Feature Aggregation (optional) - β”‚ ------------------ - β”‚ Aggregate features and masks sampled from - β”‚ image view(s) in (3). - β”‚ β”‚ - β”‚____________β–Ό - β”‚ - β–Ό - (5) Implicit Function Evaluation - ------------------ - Evaluate the implicit function(s) at the sampled ray points - (optionally pass in the aggregated image features from (4)). - (also optionally pass in a global encoding from global_encoder). - β”‚ - β–Ό - (6) Rendering - ------------------ - Render the image into the target cameras by raymarching along - the sampled rays and aggregating the colors and densities - output by the implicit function in (5). - β”‚ - β–Ό - (7) Loss Computation - ------------------ - Compute losses based on the predicted target image(s). - - - The `forward` function of GenericModel executes - this sequence of steps. Currently, steps 1, 3, 4, 5, 6 - can be customized by intializing a subclass of the appropriate - baseclass and adding the newly created module to the registry. - Please see https://github.com/facebookresearch/pytorch3d/blob/main/projects/implicitron_trainer/README.md#custom-plugins - for more details on how to create and register a custom component. - - In the config .yaml files for experiments, the parameters below are - contained in the - `model_factory_ImplicitronModelFactory_args.model_GenericModel_args` - node. As GenericModel derives from ReplaceableBase, the input arguments are - parsed by the run_auto_creation function to initialize the - necessary member modules. Please see implicitron_trainer/README.md - for more details on this process. - - Args: - mask_images: Whether or not to mask the RGB image background given the - foreground mask (the `fg_probability` argument of `GenericModel.forward`) - mask_depths: Whether or not to mask the depth image background given the - foreground mask (the `fg_probability` argument of `GenericModel.forward`) - render_image_width: Width of the output image to render - render_image_height: Height of the output image to render - mask_threshold: If greater than 0.0, the foreground mask is - thresholded by this value before being applied to the RGB/Depth images - output_rasterized_mc: If True, visualize the Monte-Carlo pixel renders by - splatting onto an image grid. Default: False. - bg_color: RGB values for setting the background color of input image - if mask_images=True. Defaults to (0.0, 0.0, 0.0). Each renderer has its own - way to determine the background color of its output, unrelated to this. - num_passes: The specified implicit_function is initialized num_passes - times and run sequentially. - chunk_size_grid: The total number of points which can be rendered - per chunk. This is used to compute the number of rays used - per chunk when the chunked version of the renderer is used (in order - to fit rendering on all rays in memory) - render_features_dimensions: The number of output features to render. - Defaults to 3, corresponding to RGB images. - n_train_target_views: The number of cameras to render into at training - time; first `n_train_target_views` in the batch are considered targets, - the rest are sources. - sampling_mode_training: The sampling method to use during training. Must be - a value from the RenderSamplingMode Enum. - sampling_mode_evaluation: Same as above but for evaluation. - global_encoder_class_type: The name of the class to use for global_encoder, - which must be available in the registry. Or `None` to disable global encoder. - global_encoder: An instance of `GlobalEncoder`. This is used to generate an encoding - of the image (referred to as the global_code) that can be used to model aspects of - the scene such as multiple objects or morphing objects. It is up to the implicit - function definition how to use it, but the most typical way is to broadcast and - concatenate to the other inputs for the implicit function. - raysampler_class_type: The name of the raysampler class which is available - in the global registry. - raysampler: An instance of RaySampler which is used to emit - rays from the target view(s). - renderer_class_type: The name of the renderer class which is available in the global - registry. - renderer: A renderer class which inherits from BaseRenderer. This is used to - generate the images from the target view(s). - image_feature_extractor_class_type: If a str, constructs and enables - the `image_feature_extractor` object of this type. Or None if not needed. - image_feature_extractor: A module for extrating features from an input image. - view_pooler_enabled: If `True`, constructs and enables the `view_pooler` object. - This means features are sampled from the source image(s) - at the projected 2d locations of the sampled 3d ray points from the target - view(s), i.e. this activates step (3) above. - view_pooler: An instance of ViewPooler which is used for sampling of - image-based features at the 2D projections of a set - of 3D points and aggregating the sampled features. - implicit_function_class_type: The type of implicit function to use which - is available in the global registry. - implicit_function: An instance of ImplicitFunctionBase. The actual implicit functions - are initialised to be in self._implicit_functions. - view_metrics: An instance of ViewMetricsBase used to compute loss terms which - are independent of the model's parameters. - view_metrics_class_type: The type of view metrics to use, must be available in - the global registry. - regularization_metrics: An instance of RegularizationMetricsBase used to compute - regularization terms which can depend on the model's parameters. - regularization_metrics_class_type: The type of regularization metrics to use, - must be available in the global registry. - loss_weights: A dictionary with a {loss_name: weight} mapping; see documentation - for `ViewMetrics` class for available loss functions. - log_vars: A list of variable names which should be logged. - The names should correspond to a subset of the keys of the - dict `preds` output by the `forward` function. - """ # noqa: B950 - - mask_images: bool = True - mask_depths: bool = True - render_image_width: int = 400 - render_image_height: int = 400 - mask_threshold: float = 0.5 - output_rasterized_mc: bool = False - bg_color: Tuple[float, float, float] = (0.0, 0.0, 0.0) - num_passes: int = 1 - chunk_size_grid: int = 4096 - render_features_dimensions: int = 3 - tqdm_trigger_threshold: int = 16 - - n_train_target_views: int = 1 - sampling_mode_training: str = "mask_sample" - sampling_mode_evaluation: str = "full_grid" - - # ---- global encoder settings - global_encoder_class_type: Optional[str] = None - global_encoder: Optional[GlobalEncoderBase] - - # ---- raysampler - raysampler_class_type: str = "AdaptiveRaySampler" - raysampler: RaySamplerBase - - # ---- renderer configs - renderer_class_type: str = "MultiPassEmissionAbsorptionRenderer" - renderer: BaseRenderer - - # ---- image feature extractor settings - # (This is only created if view_pooler is enabled) - image_feature_extractor: Optional[FeatureExtractorBase] - image_feature_extractor_class_type: Optional[str] = None - # ---- view pooler settings - view_pooler_enabled: bool = False - view_pooler: Optional[ViewPooler] - - # ---- implicit function settings - implicit_function_class_type: str = "NeuralRadianceFieldImplicitFunction" - # This is just a model, never constructed. - # The actual implicit functions live in self._implicit_functions - implicit_function: ImplicitFunctionBase - - # ----- metrics - view_metrics: ViewMetricsBase - view_metrics_class_type: str = "ViewMetrics" - - regularization_metrics: RegularizationMetricsBase - regularization_metrics_class_type: str = "RegularizationMetrics" - - # ---- loss weights - loss_weights: Dict[str, float] = field( - default_factory=lambda: { - "loss_rgb_mse": 1.0, - "loss_prev_stage_rgb_mse": 1.0, - "loss_mask_bce": 0.0, - "loss_prev_stage_mask_bce": 0.0, - } - ) - - # ---- variables to be logged (logger automatically ignores if not computed) - log_vars: List[str] = field( - default_factory=lambda: [ - "loss_rgb_psnr_fg", - "loss_rgb_psnr", - "loss_rgb_mse", - "loss_rgb_huber", - "loss_depth_abs", - "loss_depth_abs_fg", - "loss_mask_neg_iou", - "loss_mask_bce", - "loss_mask_beta_prior", - "loss_eikonal", - "loss_density_tv", - "loss_depth_neg_penalty", - "loss_autodecoder_norm", - # metrics that are only logged in 2+stage renderes - "loss_prev_stage_rgb_mse", - "loss_prev_stage_rgb_psnr_fg", - "loss_prev_stage_rgb_psnr", - "loss_prev_stage_mask_bce", - # basic metrics - "objective", - "epoch", - "sec/it", - ] - ) - - @classmethod - def pre_expand(cls) -> None: - # use try/finally to bypass cinder's lazy imports - try: - from pytorch3d.implicitron.models.feature_extractor.resnet_feature_extractor import ( # noqa: F401, B950 - ResNetFeatureExtractor, - ) - from pytorch3d.implicitron.models.implicit_function.idr_feature_field import ( # noqa: F401, B950 - IdrFeatureField, - ) - from pytorch3d.implicitron.models.implicit_function.neural_radiance_field import ( # noqa: F401, B950 - NeRFormerImplicitFunction, - ) - from pytorch3d.implicitron.models.implicit_function.scene_representation_networks import ( # noqa: F401, B950 - SRNHyperNetImplicitFunction, - ) - from pytorch3d.implicitron.models.implicit_function.voxel_grid_implicit_function import ( # noqa: F401, B950 - VoxelGridImplicitFunction, - ) - from pytorch3d.implicitron.models.renderer.lstm_renderer import ( # noqa: F401 - LSTMRenderer, - ) - from pytorch3d.implicitron.models.renderer.multipass_ea import ( # noqa - MultiPassEmissionAbsorptionRenderer, - ) - from pytorch3d.implicitron.models.renderer.sdf_renderer import ( # noqa: F401 - SignedDistanceFunctionRenderer, - ) - finally: - pass - - def __post_init__(self): - if self.view_pooler_enabled: - if self.image_feature_extractor_class_type is None: - raise ValueError( - "image_feature_extractor must be present for view pooling." - ) - run_auto_creation(self) - - self._implicit_functions = self._construct_implicit_functions() - - log_loss_weights(self.loss_weights, logger) - - def forward( - self, - *, # force keyword-only arguments - image_rgb: Optional[torch.Tensor], - camera: CamerasBase, - fg_probability: Optional[torch.Tensor] = None, - mask_crop: Optional[torch.Tensor] = None, - depth_map: Optional[torch.Tensor] = None, - sequence_name: Optional[List[str]] = None, - frame_timestamp: Optional[torch.Tensor] = None, - evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION, - **kwargs, - ) -> Dict[str, Any]: - """ - Args: - image_rgb: A tensor of shape `(B, 3, H, W)` containing a batch of rgb images; - the first `min(B, n_train_target_views)` images are considered targets and - are used to supervise the renders; the rest corresponding to the source - viewpoints from which features will be extracted. - camera: An instance of CamerasBase containing a batch of `B` cameras corresponding - to the viewpoints of target images, from which the rays will be sampled, - and source images, which will be used for intersecting with target rays. - fg_probability: A tensor of shape `(B, 1, H, W)` containing a batch of - foreground masks. - mask_crop: A binary tensor of shape `(B, 1, H, W)` denoting valid - regions in the input images (i.e. regions that do not correspond - to, e.g., zero-padding). When the `RaySampler`'s sampling mode is set to - "mask_sample", rays will be sampled in the non zero regions. - depth_map: A tensor of shape `(B, 1, H, W)` containing a batch of depth maps. - sequence_name: A list of `B` strings corresponding to the sequence names - from which images `image_rgb` were extracted. They are used to match - target frames with relevant source frames. - frame_timestamp: Optionally a tensor of shape `(B,)` containing a batch - of frame timestamps. - evaluation_mode: one of EvaluationMode.TRAINING or - EvaluationMode.EVALUATION which determines the settings used for - rendering. - - Returns: - preds: A dictionary containing all outputs of the forward pass including the - rendered images, depths, masks, losses and other metrics. - """ - image_rgb, fg_probability, depth_map = preprocess_input( - image_rgb, - fg_probability, - depth_map, - self.mask_images, - self.mask_depths, - self.mask_threshold, - self.bg_color, - ) - - # Obtain the batch size from the camera as this is the only required input. - batch_size = camera.R.shape[0] - - # Determine the number of target views, i.e. cameras we render into. - n_targets = ( - 1 - if evaluation_mode == EvaluationMode.EVALUATION - else batch_size - if self.n_train_target_views <= 0 - else min(self.n_train_target_views, batch_size) - ) - - # A helper function for selecting n_target first elements from the input - # where the latter can be None. - def safe_slice_targets( - tensor: Optional[Union[torch.Tensor, List[str]]], - ) -> Optional[Union[torch.Tensor, List[str]]]: - return None if tensor is None else tensor[:n_targets] - - # Select the target cameras. - target_cameras = camera[list(range(n_targets))] - - # Determine the used ray sampling mode. - sampling_mode = RenderSamplingMode( - self.sampling_mode_training - if evaluation_mode == EvaluationMode.TRAINING - else self.sampling_mode_evaluation - ) - - # (1) Sample rendering rays with the ray sampler. - # pyre-ignore[29] - ray_bundle: ImplicitronRayBundle = self.raysampler( - target_cameras, - evaluation_mode, - mask=mask_crop[:n_targets] - if mask_crop is not None and sampling_mode == RenderSamplingMode.MASK_SAMPLE - else None, - ) - - # custom_args hold additional arguments to the implicit function. - custom_args = {} - - if self.image_feature_extractor is not None: - # (2) Extract features for the image - img_feats = self.image_feature_extractor(image_rgb, fg_probability) - else: - img_feats = None - - if self.view_pooler_enabled: - if sequence_name is None: - raise ValueError("sequence_name must be provided for view pooling") - assert img_feats is not None - - # (3-4) Sample features and masks at the ray points. - # Aggregate features from multiple views. - def curried_viewpooler(pts): - return self.view_pooler( - pts=pts, - seq_id_pts=sequence_name[:n_targets], - camera=camera, - seq_id_camera=sequence_name, - feats=img_feats, - masks=mask_crop, - ) - - custom_args["fun_viewpool"] = curried_viewpooler - - global_code = None - if self.global_encoder is not None: - global_code = self.global_encoder( # pyre-fixme[29] - sequence_name=safe_slice_targets(sequence_name), - frame_timestamp=safe_slice_targets(frame_timestamp), - ) - custom_args["global_code"] = global_code - - for func in self._implicit_functions: - func.bind_args(**custom_args) - - inputs_to_be_chunked = {} - if fg_probability is not None and self.renderer.requires_object_mask(): - sampled_fb_prob = rend_utils.ndc_grid_sample( - fg_probability[:n_targets], ray_bundle.xys, mode="nearest" - ) - inputs_to_be_chunked["object_mask"] = sampled_fb_prob > 0.5 - - # (5)-(6) Implicit function evaluation and Rendering - rendered = self._render( - ray_bundle=ray_bundle, - sampling_mode=sampling_mode, - evaluation_mode=evaluation_mode, - implicit_functions=self._implicit_functions, - inputs_to_be_chunked=inputs_to_be_chunked, - ) - - # Unbind the custom arguments to prevent pytorch from storing - # large buffers of intermediate results due to points in the - # bound arguments. - for func in self._implicit_functions: - func.unbind_args() - - # A dict to store losses as well as rendering results. - preds: Dict[str, Any] = {} - - preds.update( - self.view_metrics( - results=preds, - raymarched=rendered, - ray_bundle=ray_bundle, - image_rgb=safe_slice_targets(image_rgb), - depth_map=safe_slice_targets(depth_map), - fg_probability=safe_slice_targets(fg_probability), - mask_crop=safe_slice_targets(mask_crop), - ) - ) - - preds.update( - self.regularization_metrics( - results=preds, - model=self, - ) - ) - - if sampling_mode == RenderSamplingMode.MASK_SAMPLE: - if self.output_rasterized_mc: - # Visualize the monte-carlo pixel renders by splatting onto - # an image grid. - ( - preds["images_render"], - preds["depths_render"], - preds["masks_render"], - ) = rasterize_sparse_ray_bundle( - ray_bundle, - rendered.features, - (self.render_image_height, self.render_image_width), - rendered.depths, - masks=rendered.masks, - ) - elif sampling_mode == RenderSamplingMode.FULL_GRID: - preds["images_render"] = rendered.features.permute(0, 3, 1, 2) - preds["depths_render"] = rendered.depths.permute(0, 3, 1, 2) - preds["masks_render"] = rendered.masks.permute(0, 3, 1, 2) - - preds["implicitron_render"] = ImplicitronRender( - image_render=preds["images_render"], - depth_render=preds["depths_render"], - mask_render=preds["masks_render"], - ) - else: - raise AssertionError("Unreachable state") - - # (7) Compute losses - objective = self._get_objective(preds) - if objective is not None: - preds["objective"] = objective - - return preds - - def _get_objective(self, preds: Dict[str, torch.Tensor]) -> Optional[torch.Tensor]: - """ - A helper function to compute the overall loss as the dot product - of individual loss functions with the corresponding weights. - """ - return weighted_sum_losses(preds, self.loss_weights) - - def visualize( - self, - viz: Optional["Visdom"], - visdom_env_imgs: str, - preds: Dict[str, Any], - prefix: str, - ) -> None: - """ - Helper function to visualize the predictions generated - in the forward pass. - - Args: - viz: Visdom connection object - visdom_env_imgs: name of visdom environment for the images. - preds: predictions dict like returned by forward() - prefix: prepended to the names of images - """ - if viz is None or not viz.check_connection(): - logger.info("no visdom server! -> skipping batch vis") - return - - idx_image = 0 - title = f"{prefix}_im{idx_image}" - - vis_utils.visualize_basics(viz, preds, visdom_env_imgs, title=title) - - def _render( - self, - *, - ray_bundle: ImplicitronRayBundle, - inputs_to_be_chunked: Dict[str, torch.Tensor], - sampling_mode: RenderSamplingMode, - **kwargs, - ) -> RendererOutput: - """ - Args: - ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the - sampled rendering rays. - inputs_to_be_chunked: A collection of tensor of shape `(B, _, H, W)`. E.g. - SignedDistanceFunctionRenderer requires "object_mask", shape - (B, 1, H, W), the silhouette of the object in the image. When - chunking, they are passed to the renderer as shape - `(B, _, chunksize)`. - sampling_mode: The sampling method to use. Must be a value from the - RenderSamplingMode Enum. - - Returns: - An instance of RendererOutput - """ - if sampling_mode == RenderSamplingMode.FULL_GRID and self.chunk_size_grid > 0: - return apply_chunked( - self.renderer, - chunk_generator( - self.chunk_size_grid, - ray_bundle, - inputs_to_be_chunked, - self.tqdm_trigger_threshold, - **kwargs, - ), - lambda batch: torch.cat(batch, dim=1).reshape( - *ray_bundle.lengths.shape[:-1], -1 - ), - ) - else: - # pyre-fixme[29]: `BaseRenderer` is not a function. - return self.renderer( - ray_bundle=ray_bundle, - **inputs_to_be_chunked, - **kwargs, - ) - - def _get_viewpooled_feature_dim(self) -> int: - if self.view_pooler is None: - return 0 - assert self.image_feature_extractor is not None - return self.view_pooler.get_aggregated_feature_dim( - self.image_feature_extractor.get_feat_dims() - ) - - @classmethod - def raysampler_tweak_args(cls, type, args: DictConfig) -> None: - """ - We don't expose certain fields of the raysampler because we want to set - them from our own members. - """ - del args["sampling_mode_training"] - del args["sampling_mode_evaluation"] - del args["image_width"] - del args["image_height"] - - def create_raysampler(self): - extra_args = { - "sampling_mode_training": self.sampling_mode_training, - "sampling_mode_evaluation": self.sampling_mode_evaluation, - "image_width": self.render_image_width, - "image_height": self.render_image_height, - } - raysampler_args = getattr( - self, "raysampler_" + self.raysampler_class_type + "_args" - ) - self.raysampler = registry.get(RaySamplerBase, self.raysampler_class_type)( - **raysampler_args, **extra_args - ) - - @classmethod - def renderer_tweak_args(cls, type, args: DictConfig) -> None: - """ - We don't expose certain fields of the renderer because we want to set - them based on other inputs. - """ - args.pop("render_features_dimensions", None) - args.pop("object_bounding_sphere", None) - - def create_renderer(self): - extra_args = {} - - if self.renderer_class_type == "SignedDistanceFunctionRenderer": - extra_args["render_features_dimensions"] = self.render_features_dimensions - if not hasattr(self.raysampler, "scene_extent"): - raise ValueError( - "SignedDistanceFunctionRenderer requires" - + " a raysampler that defines the 'scene_extent' field" - + " (this field is supported by, e.g., the adaptive raysampler - " - + " self.raysampler_class_type='AdaptiveRaySampler')." - ) - extra_args["object_bounding_sphere"] = self.raysampler.scene_extent - - renderer_args = getattr(self, "renderer_" + self.renderer_class_type + "_args") - self.renderer = registry.get(BaseRenderer, self.renderer_class_type)( - **renderer_args, **extra_args - ) - - def create_implicit_function(self) -> None: - """ - No-op called by run_auto_creation so that self.implicit_function - does not get created. __post_init__ creates the implicit function(s) - in wrappers explicitly in self._implicit_functions. - """ - pass - - @classmethod - def implicit_function_tweak_args(cls, type, args: DictConfig) -> None: - """ - We don't expose certain implicit_function fields because we want to set - them based on other inputs. - """ - args.pop("feature_vector_size", None) - args.pop("encoding_dim", None) - args.pop("latent_dim", None) - args.pop("latent_dim_hypernet", None) - args.pop("color_dim", None) - - def _construct_implicit_functions(self): - """ - After run_auto_creation has been called, the arguments - for each of the possible implicit function methods are - available. `GenericModel` arguments are first validated - based on the custom requirements for each specific - implicit function method. Then the required implicit - function(s) are initialized. - """ - extra_args = {} - global_encoder_dim = ( - 0 if self.global_encoder is None else self.global_encoder.get_encoding_dim() - ) - viewpooled_feature_dim = self._get_viewpooled_feature_dim() - - if self.implicit_function_class_type in ( - "NeuralRadianceFieldImplicitFunction", - "NeRFormerImplicitFunction", - ): - extra_args["latent_dim"] = viewpooled_feature_dim + global_encoder_dim - extra_args["color_dim"] = self.render_features_dimensions - - if self.implicit_function_class_type == "IdrFeatureField": - extra_args["feature_vector_size"] = self.render_features_dimensions - extra_args["encoding_dim"] = global_encoder_dim - - if self.implicit_function_class_type == "SRNImplicitFunction": - extra_args["latent_dim"] = viewpooled_feature_dim + global_encoder_dim - - # srn_hypernet preprocessing - if self.implicit_function_class_type == "SRNHyperNetImplicitFunction": - extra_args["latent_dim"] = viewpooled_feature_dim - extra_args["latent_dim_hypernet"] = global_encoder_dim - - # check that for srn, srn_hypernet, idr we have self.num_passes=1 - implicit_function_type = registry.get( - ImplicitFunctionBase, self.implicit_function_class_type - ) - expand_args_fields(implicit_function_type) - if self.num_passes != 1 and not implicit_function_type.allows_multiple_passes(): - raise ValueError( - self.implicit_function_class_type - + f"requires num_passes=1 not {self.num_passes}" - ) - - if implicit_function_type.requires_pooling_without_aggregation(): - if self.view_pooler_enabled and self.view_pooler.has_aggregation(): - raise ValueError( - "The chosen implicit function requires view pooling without aggregation." - ) - config_name = f"implicit_function_{self.implicit_function_class_type}_args" - config = getattr(self, config_name, None) - if config is None: - raise ValueError(f"{config_name} not present") - implicit_functions_list = [ - ImplicitFunctionWrapper(implicit_function_type(**config, **extra_args)) - for _ in range(self.num_passes) - ] - return torch.nn.ModuleList(implicit_functions_list) diff --git a/pytorch3d/pytorch3d/implicitron/models/global_encoder/__init__.py b/pytorch3d/pytorch3d/implicitron/models/global_encoder/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/global_encoder/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/pytorch3d/implicitron/models/global_encoder/autodecoder.py b/pytorch3d/pytorch3d/implicitron/models/global_encoder/autodecoder.py deleted file mode 100644 index 764785bc1f3f17735b119deb65c162cdab6fea61..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/global_encoder/autodecoder.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import warnings -from collections import defaultdict -from typing import Dict, List, Optional, Union - -import torch -from pytorch3d.implicitron.tools.config import Configurable - - -class Autodecoder(Configurable, torch.nn.Module): - """ - Autodecoder which maps a list of integer or string keys to optimizable embeddings. - - Settings: - encoding_dim: Embedding dimension for the decoder. - n_instances: The maximum number of instances stored by the autodecoder. - init_scale: Scale factor for the initial autodecoder weights. - ignore_input: If `True`, optimizes a single code for any input. - """ - - encoding_dim: int = 0 - n_instances: int = 1 - init_scale: float = 1.0 - ignore_input: bool = False - - def __post_init__(self): - if self.n_instances <= 0: - raise ValueError(f"Invalid n_instances {self.n_instances}") - - self._autodecoder_codes = torch.nn.Embedding( - self.n_instances, - self.encoding_dim, - scale_grad_by_freq=True, - ) - with torch.no_grad(): - # weight has been initialised from Normal(0, 1) - self._autodecoder_codes.weight *= self.init_scale - - self._key_map = self._build_key_map() - # Make sure to register hooks for correct handling of saving/loading - # the module's _key_map. - self._register_load_state_dict_pre_hook(self._load_key_map_hook) - self._register_state_dict_hook(_save_key_map_hook) - - def _build_key_map( - self, key_map_dict: Optional[Dict[str, int]] = None - ) -> Dict[str, int]: - """ - Args: - key_map_dict: A dictionary used to initialize the key_map. - - Returns: - key_map: a dictionary of key: id pairs. - """ - # increments the counter when asked for a new value - key_map = defaultdict(iter(range(self.n_instances)).__next__) - if key_map_dict is not None: - # Assign all keys from the loaded key_map_dict to self._key_map. - # Since this is done in the original order, it should generate - # the same set of key:id pairs. We check this with an assert to be sure. - for x, x_id in key_map_dict.items(): - x_id_ = key_map[x] - assert x_id == x_id_ - return key_map - - def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]: - return (self._autodecoder_codes.weight**2).mean() - - def get_encoding_dim(self) -> int: - return self.encoding_dim - - def forward(self, x: Union[torch.LongTensor, List[str]]) -> Optional[torch.Tensor]: - """ - Args: - x: A batch of `N` identifiers. Either a long tensor of size - `(N,)` keys in [0, n_instances), or a list of `N` string keys that - are hashed to codes (without collisions). - - Returns: - codes: A tensor of shape `(N, self.encoding_dim)` containing the - key-specific autodecoder codes. - """ - if self.ignore_input: - x = ["singleton"] - - if isinstance(x[0], str): - try: - # pyre-fixme[9]: x has type `Union[List[str], LongTensor]`; used as - # `Tensor`. - x = torch.tensor( - [self._key_map[elem] for elem in x], - dtype=torch.long, - device=next(self.parameters()).device, - ) - except StopIteration: - raise ValueError("Not enough n_instances in the autodecoder") from None - - return self._autodecoder_codes(x) - - def _load_key_map_hook( - self, - state_dict, - prefix, - local_metadata, - strict, - missing_keys, - unexpected_keys, - error_msgs, - ): - """ - Args: - state_dict (dict): a dict containing parameters and - persistent buffers. - prefix (str): the prefix for parameters and buffers used in this - module - local_metadata (dict): a dict containing the metadata for this module. - strict (bool): whether to strictly enforce that the keys in - :attr:`state_dict` with :attr:`prefix` match the names of - parameters and buffers in this module - missing_keys (list of str): if ``strict=True``, add missing keys to - this list - unexpected_keys (list of str): if ``strict=True``, add unexpected - keys to this list - error_msgs (list of str): error messages should be added to this - list, and will be reported together in - :meth:`~torch.nn.Module.load_state_dict` - - Returns: - Constructed key_map if it exists in the state_dict - else raises a warning only. - """ - key_map_key = prefix + "_key_map" - if key_map_key in state_dict: - key_map_dict = state_dict.pop(key_map_key) - self._key_map = self._build_key_map(key_map_dict=key_map_dict) - else: - warnings.warn("No key map in Autodecoder state dict!") - - -def _save_key_map_hook( - self, - state_dict, - prefix, - local_metadata, -) -> None: - """ - Args: - state_dict (dict): a dict containing parameters and - persistent buffers. - prefix (str): the prefix for parameters and buffers used in this - module - local_metadata (dict): a dict containing the metadata for this module. - """ - key_map_key = prefix + "_key_map" - key_map_dict = dict(self._key_map.items()) - state_dict[key_map_key] = key_map_dict diff --git a/pytorch3d/pytorch3d/implicitron/models/global_encoder/global_encoder.py b/pytorch3d/pytorch3d/implicitron/models/global_encoder/global_encoder.py deleted file mode 100644 index bb7defac308a6d7497a23b2b2d93cd713ceee6f5..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/global_encoder/global_encoder.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, Optional, Union - -import torch -from pytorch3d.implicitron.tools.config import ( - registry, - ReplaceableBase, - run_auto_creation, -) -from pytorch3d.renderer.implicit import HarmonicEmbedding - -from .autodecoder import Autodecoder - - -class GlobalEncoderBase(ReplaceableBase): - """ - A base class for implementing encoders of global frame-specific quantities. - - The latter includes e.g. the harmonic encoding of a frame timestamp - (`HarmonicTimeEncoder`), or an autodecoder encoding of the frame's sequence - (`SequenceAutodecoder`). - """ - - def get_encoding_dim(self): - """ - Returns the dimensionality of the returned encoding. - """ - raise NotImplementedError() - - def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]: - """ - Calculates the squared norm of the encoding to report as the - `autodecoder_norm` loss of the model, as a zero dimensional tensor. - """ - raise NotImplementedError() - - def forward( - self, - *, - frame_timestamp: Optional[torch.Tensor] = None, - sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None, - **kwargs, - ) -> torch.Tensor: - """ - Given a set of inputs to encode, generates a tensor containing the encoding. - - Returns: - encoding: The tensor containing the global encoding. - """ - raise NotImplementedError() - - -# TODO: probabilistic embeddings? -@registry.register -class SequenceAutodecoder(GlobalEncoderBase, torch.nn.Module): # pyre-ignore: 13 - """ - A global encoder implementation which provides an autodecoder encoding - of the frame's sequence identifier. - """ - - autodecoder: Autodecoder - - def __post_init__(self): - run_auto_creation(self) - - def get_encoding_dim(self): - return self.autodecoder.get_encoding_dim() - - def forward( - self, - *, - frame_timestamp: Optional[torch.Tensor] = None, - sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None, - **kwargs, - ) -> torch.Tensor: - if sequence_name is None: - raise ValueError("sequence_name must be provided.") - # run dtype checks and pass sequence_name to self.autodecoder - return self.autodecoder(sequence_name) - - def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]: - return self.autodecoder.calculate_squared_encoding_norm() - - -@registry.register -class HarmonicTimeEncoder(GlobalEncoderBase, torch.nn.Module): - """ - A global encoder implementation which provides harmonic embeddings - of each frame's timestamp. - """ - - n_harmonic_functions: int = 10 - append_input: bool = True - time_divisor: float = 1.0 - - def __post_init__(self): - self._harmonic_embedding = HarmonicEmbedding( - n_harmonic_functions=self.n_harmonic_functions, - append_input=self.append_input, - ) - - def get_encoding_dim(self): - return self._harmonic_embedding.get_output_dim(1) - - def forward( - self, - *, - frame_timestamp: Optional[torch.Tensor] = None, - sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None, - **kwargs, - ) -> torch.Tensor: - if frame_timestamp is None: - raise ValueError("frame_timestamp must be provided.") - if frame_timestamp.shape[-1] != 1: - raise ValueError("Frame timestamp's last dimensions should be one.") - time = frame_timestamp / self.time_divisor - return self._harmonic_embedding(time) - - def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]: - return None diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/__init__.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/implicit_function/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/base.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/base.py deleted file mode 100644 index 7cd67edeb4dcd0597605b3b77a8759bebb44ab74..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/implicit_function/base.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from abc import ABC, abstractmethod -from typing import Optional - -from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle - -from pytorch3d.implicitron.tools.config import ReplaceableBase -from pytorch3d.renderer.cameras import CamerasBase - - -class ImplicitFunctionBase(ABC, ReplaceableBase): - @abstractmethod - def forward( - self, - *, - ray_bundle: ImplicitronRayBundle, - fun_viewpool=None, - camera: Optional[CamerasBase] = None, - global_code=None, - **kwargs, - ): - raise NotImplementedError() - - @staticmethod - def allows_multiple_passes() -> bool: - """ - Returns True if this implicit function allows - multiple passes. - """ - return False - - @staticmethod - def requires_pooling_without_aggregation() -> bool: - """ - Returns True if this implicit function needs - pooling without aggregation. - """ - return False - - def on_bind_args(self) -> None: - """ - Called when the custom args are fixed in the main model forward pass. - """ - pass diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/decoding_functions.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/decoding_functions.py deleted file mode 100644 index e7b3dadfc2b1d18cb1935825e8f69014b7b5e419..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/implicit_function/decoding_functions.py +++ /dev/null @@ -1,489 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -""" -This file contains - - modules which get used by ImplicitFunction objects for decoding an embedding defined in - space, e.g. to color or opacity. - - DecoderFunctionBase and its subclasses, which wrap some of those modules, providing - some such modules as an extension point which an ImplicitFunction object could use. -""" - -import logging -from dataclasses import field - -from enum import Enum -from typing import Dict, Optional, Tuple - -import torch - -from omegaconf import DictConfig - -from pytorch3d.implicitron.tools.config import ( - Configurable, - registry, - ReplaceableBase, - run_auto_creation, -) - -logger = logging.getLogger(__name__) - - -class DecoderActivation(Enum): - RELU = "relu" - SOFTPLUS = "softplus" - SIGMOID = "sigmoid" - IDENTITY = "identity" - - -class DecoderFunctionBase(ReplaceableBase, torch.nn.Module): - """ - Decoding function is a torch.nn.Module which takes the embedding of a location in - space and transforms it into the required quantity (for example density and color). - """ - - def forward( - self, features: torch.Tensor, z: Optional[torch.Tensor] = None - ) -> torch.Tensor: - """ - Args: - features (torch.Tensor): tensor of shape (batch, ..., num_in_features) - z: optional tensor to append to parts of the decoding function - Returns: - decoded_features (torch.Tensor) : tensor of - shape (batch, ..., num_out_features) - """ - raise NotImplementedError() - - -@registry.register -class ElementwiseDecoder(DecoderFunctionBase): - """ - Decoding function which scales the input, adds shift and then applies - `relu`, `softplus`, `sigmoid` or nothing on its input: - `result = operation(input * scale + shift)` - - Members: - scale: a scalar with which input is multiplied before being shifted. - Defaults to 1. - shift: a scalar which is added to the scaled input before performing - the operation. Defaults to 0. - operation: which operation to perform on the transformed input. Options are: - `RELU`, `SOFTPLUS`, `SIGMOID` or `IDENTITY`. Defaults to `IDENTITY`. - """ - - scale: float = 1 - shift: float = 0 - operation: DecoderActivation = DecoderActivation.IDENTITY - - def __post_init__(self): - if self.operation not in [ - DecoderActivation.RELU, - DecoderActivation.SOFTPLUS, - DecoderActivation.SIGMOID, - DecoderActivation.IDENTITY, - ]: - raise ValueError( - "`operation` can only be `RELU`, `SOFTPLUS`, `SIGMOID` or `IDENTITY`." - ) - - def forward( - self, features: torch.Tensor, z: Optional[torch.Tensor] = None - ) -> torch.Tensor: - transfomed_input = features * self.scale + self.shift - if self.operation == DecoderActivation.SOFTPLUS: - return torch.nn.functional.softplus(transfomed_input) - if self.operation == DecoderActivation.RELU: - return torch.nn.functional.relu(transfomed_input) - if self.operation == DecoderActivation.SIGMOID: - return torch.nn.functional.sigmoid(transfomed_input) - return transfomed_input - - -class MLPWithInputSkips(Configurable, torch.nn.Module): - """ - Implements the multi-layer perceptron architecture of the Neural Radiance Field. - - As such, `MLPWithInputSkips` is a multi layer perceptron consisting - of a sequence of linear layers with ReLU activations. - - Additionally, for a set of predefined layers `input_skips`, the forward pass - appends a skip tensor `z` to the output of the preceding layer. - - Note that this follows the architecture described in the Supplementary - Material (Fig. 7) of [1], for which keep the defaults for: - - `last_layer_bias_init` to None - - `last_activation` to "relu" - - `use_xavier_init` to `true` - - If you want to use this as a part of the color prediction in TensoRF model set: - - `last_layer_bias_init` to 0 - - `last_activation` to "sigmoid" - - `use_xavier_init` to `False` - - References: - [1] Ben Mildenhall and Pratul P. Srinivasan and Matthew Tancik - and Jonathan T. Barron and Ravi Ramamoorthi and Ren Ng: - NeRF: Representing Scenes as Neural Radiance Fields for View - Synthesis, ECCV2020 - - Members: - n_layers: The number of linear layers of the MLP. - input_dim: The number of channels of the input tensor. - output_dim: The number of channels of the output. - skip_dim: The number of channels of the tensor `z` appended when - evaluating the skip layers. - hidden_dim: The number of hidden units of the MLP. - input_skips: The list of layer indices at which we append the skip - tensor `z`. - last_layer_bias_init: If set then all the biases in the last layer - are initialized to that value. - last_activation: Which activation to use in the last layer. Options are: - "relu", "softplus", "sigmoid" and "identity". Default is "relu". - use_xavier_init: If True uses xavier init for all linear layer weights. - Otherwise the default PyTorch initialization is used. Default True. - """ - - n_layers: int = 8 - input_dim: int = 39 - output_dim: int = 256 - skip_dim: int = 39 - hidden_dim: int = 256 - input_skips: Tuple[int, ...] = (5,) - skip_affine_trans: bool = False - last_layer_bias_init: Optional[float] = None - last_activation: DecoderActivation = DecoderActivation.RELU - use_xavier_init: bool = True - - def __post_init__(self): - try: - last_activation = { - DecoderActivation.RELU: torch.nn.ReLU(True), - DecoderActivation.SOFTPLUS: torch.nn.Softplus(), - DecoderActivation.SIGMOID: torch.nn.Sigmoid(), - DecoderActivation.IDENTITY: torch.nn.Identity(), - }[self.last_activation] - except KeyError as e: - raise ValueError( - "`last_activation` can only be `RELU`," - " `SOFTPLUS`, `SIGMOID` or `IDENTITY`." - ) from e - - layers = [] - skip_affine_layers = [] - for layeri in range(self.n_layers): - dimin = self.hidden_dim if layeri > 0 else self.input_dim - dimout = self.hidden_dim if layeri + 1 < self.n_layers else self.output_dim - - if layeri > 0 and layeri in self.input_skips: - if self.skip_affine_trans: - skip_affine_layers.append( - self._make_affine_layer(self.skip_dim, self.hidden_dim) - ) - else: - dimin = self.hidden_dim + self.skip_dim - - linear = torch.nn.Linear(dimin, dimout) - if self.use_xavier_init: - _xavier_init(linear) - if layeri == self.n_layers - 1 and self.last_layer_bias_init is not None: - torch.nn.init.constant_(linear.bias, self.last_layer_bias_init) - layers.append( - torch.nn.Sequential(linear, torch.nn.ReLU(True)) - if not layeri + 1 < self.n_layers - else torch.nn.Sequential(linear, last_activation) - ) - self.mlp = torch.nn.ModuleList(layers) - if self.skip_affine_trans: - self.skip_affines = torch.nn.ModuleList(skip_affine_layers) - self._input_skips = set(self.input_skips) - self._skip_affine_trans = self.skip_affine_trans - - def _make_affine_layer(self, input_dim, hidden_dim): - l1 = torch.nn.Linear(input_dim, hidden_dim * 2) - l2 = torch.nn.Linear(hidden_dim * 2, hidden_dim * 2) - if self.use_xavier_init: - _xavier_init(l1) - _xavier_init(l2) - return torch.nn.Sequential(l1, torch.nn.ReLU(True), l2) - - def _apply_affine_layer(self, layer, x, z): - mu_log_std = layer(z) - mu, log_std = mu_log_std.split(mu_log_std.shape[-1] // 2, dim=-1) - std = torch.nn.functional.softplus(log_std) - return (x - mu) * std - - def forward(self, x: torch.Tensor, z: Optional[torch.Tensor] = None): - """ - Args: - x: The input tensor of shape `(..., input_dim)`. - z: The input skip tensor of shape `(..., skip_dim)` which is appended - to layers whose indices are specified by `input_skips`. - Returns: - y: The output tensor of shape `(..., output_dim)`. - """ - y = x - if z is None: - # if the skip tensor is None, we use `x` instead. - z = x - skipi = 0 - for li, layer in enumerate(self.mlp): - if li in self._input_skips: - if self._skip_affine_trans: - y = self._apply_affine_layer(self.skip_affines[skipi], y, z) - else: - y = torch.cat((y, z), dim=-1) - skipi += 1 - y = layer(y) - return y - - -@registry.register -# pyre-fixme[13]: Attribute `network` is never initialized. -class MLPDecoder(DecoderFunctionBase): - """ - Decoding function which uses `MLPWithIputSkips` to convert the embedding to output. - The `input_dim` of the `network` is set from the value of `input_dim` member. - - Members: - input_dim: dimension of input. - param_groups: dictionary where keys are names of individual parameters - or module members and values are the parameter group where the - parameter/member will be sorted to. "self" key is used to denote the - parameter group at the module level. Possible keys, including the "self" key - do not have to be defined. By default all parameters are put into "default" - parameter group and have the learning rate defined in the optimizer, - it can be overridden at the: - - module level with β€œself” key, all the parameters and child - module's parameters will be put to that parameter group - - member level, which is the same as if the `param_groups` in that - member has key=β€œself” and value equal to that parameter group. - This is useful if members do not have `param_groups`, for - example torch.nn.Linear. - - parameter level, parameter with the same name as the key - will be put to that parameter group. - network_args: configuration for MLPWithInputSkips - """ - - input_dim: int = 3 - param_groups: Dict[str, str] = field(default_factory=lambda: {}) - network: MLPWithInputSkips - - def __post_init__(self): - run_auto_creation(self) - - def forward( - self, features: torch.Tensor, z: Optional[torch.Tensor] = None - ) -> torch.Tensor: - return self.network(features, z) - - @classmethod - def network_tweak_args(cls, type, args: DictConfig) -> None: - """ - Special method to stop get_default_args exposing member's `input_dim`. - """ - args.pop("input_dim", None) - - def create_network_impl(self, type, args: DictConfig) -> None: - """ - Set the input dimension of the `network` to the input dimension of the - decoding function. - """ - self.network = MLPWithInputSkips(input_dim=self.input_dim, **args) - - -class TransformerWithInputSkips(torch.nn.Module): - def __init__( - self, - n_layers: int = 8, - input_dim: int = 39, - output_dim: int = 256, - skip_dim: int = 39, - hidden_dim: int = 64, - input_skips: Tuple[int, ...] = (5,), - dim_down_factor: float = 1, - ): - """ - Args: - n_layers: The number of linear layers of the MLP. - input_dim: The number of channels of the input tensor. - output_dim: The number of channels of the output. - skip_dim: The number of channels of the tensor `z` appended when - evaluating the skip layers. - hidden_dim: The number of hidden units of the MLP. - input_skips: The list of layer indices at which we append the skip - tensor `z`. - """ - super().__init__() - - self.first = torch.nn.Linear(input_dim, hidden_dim) - _xavier_init(self.first) - - self.skip_linear = torch.nn.ModuleList() - - layers_pool, layers_ray = [], [] - dimout = 0 - for layeri in range(n_layers): - dimin = int(round(hidden_dim / (dim_down_factor**layeri))) - dimout = int(round(hidden_dim / (dim_down_factor ** (layeri + 1)))) - logger.info(f"Tr: {dimin} -> {dimout}") - for _i, l in enumerate((layers_pool, layers_ray)): - l.append( - TransformerEncoderLayer( - d_model=[dimin, dimout][_i], - nhead=4, - dim_feedforward=hidden_dim, - dropout=0.0, - d_model_out=dimout, - ) - ) - - if layeri in input_skips: - self.skip_linear.append(torch.nn.Linear(input_dim, dimin)) - - self.last = torch.nn.Linear(dimout, output_dim) - _xavier_init(self.last) - - # pyre-fixme[8]: Attribute has type `Tuple[ModuleList, ModuleList]`; used as - # `ModuleList`. - self.layers_pool, self.layers_ray = ( - torch.nn.ModuleList(layers_pool), - torch.nn.ModuleList(layers_ray), - ) - self._input_skips = set(input_skips) - - def forward( - self, - x: torch.Tensor, - z: Optional[torch.Tensor] = None, - ): - """ - Args: - x: The input tensor of shape - `(minibatch, n_pooled_feats, ..., n_ray_pts, input_dim)`. - z: The input skip tensor of shape - `(minibatch, n_pooled_feats, ..., n_ray_pts, skip_dim)` - which is appended to layers whose indices are specified by `input_skips`. - Returns: - y: The output tensor of shape - `(minibatch, 1, ..., n_ray_pts, input_dim)`. - """ - - if z is None: - # if the skip tensor is None, we use `x` instead. - z = x - - y = self.first(x) - - B, n_pool, n_rays, n_pts, dim = y.shape - - # y_p in n_pool, n_pts, B x n_rays x dim - y_p = y.permute(1, 3, 0, 2, 4) - - skipi = 0 - dimh = dim - for li, (layer_pool, layer_ray) in enumerate( - zip(self.layers_pool, self.layers_ray) - ): - y_pool_attn = y_p.reshape(n_pool, n_pts * B * n_rays, dimh) - if li in self._input_skips: - z_skip = self.skip_linear[skipi](z) - y_pool_attn = y_pool_attn + z_skip.permute(1, 3, 0, 2, 4).reshape( - n_pool, n_pts * B * n_rays, dimh - ) - skipi += 1 - # n_pool x B*n_rays*n_pts x dim - y_pool_attn, pool_attn = layer_pool(y_pool_attn, src_key_padding_mask=None) - dimh = y_pool_attn.shape[-1] - - y_ray_attn = ( - y_pool_attn.view(n_pool, n_pts, B * n_rays, dimh) - .permute(1, 0, 2, 3) - .reshape(n_pts, n_pool * B * n_rays, dimh) - ) - # n_pts x n_pool*B*n_rays x dim - y_ray_attn, ray_attn = layer_ray( - y_ray_attn, - src_key_padding_mask=None, - ) - - y_p = y_ray_attn.view(n_pts, n_pool, B * n_rays, dimh).permute(1, 0, 2, 3) - - y = y_p.view(n_pool, n_pts, B, n_rays, dimh).permute(2, 0, 3, 1, 4) - - W = torch.softmax(y[..., :1], dim=1) - y = (y * W).sum(dim=1) - y = self.last(y) - - return y - - -class TransformerEncoderLayer(torch.nn.Module): - r"""TransformerEncoderLayer is made up of self-attn and feedforward network. - This standard encoder layer is based on the paper "Attention Is All You Need". - Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, - Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in - Neural Information Processing Systems, pages 6000-6010. Users may modify or implement - in a different way during application. - - Args: - d_model: the number of expected features in the input (required). - nhead: the number of heads in the multiheadattention models (required). - dim_feedforward: the dimension of the feedforward network model (default=2048). - dropout: the dropout value (default=0.1). - activation: the activation function of intermediate layer, relu or gelu (default=relu). - - Examples:: - >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8) - >>> src = torch.rand(10, 32, 512) - >>> out = encoder_layer(src) - """ - - def __init__( - self, d_model, nhead, dim_feedforward=2048, dropout=0.1, d_model_out=-1 - ): - super(TransformerEncoderLayer, self).__init__() - self.self_attn = torch.nn.MultiheadAttention(d_model, nhead, dropout=dropout) - # Implementation of Feedforward model - self.linear1 = torch.nn.Linear(d_model, dim_feedforward) - self.dropout = torch.nn.Dropout(dropout) - d_model_out = d_model if d_model_out <= 0 else d_model_out - self.linear2 = torch.nn.Linear(dim_feedforward, d_model_out) - self.norm1 = torch.nn.LayerNorm(d_model) - self.norm2 = torch.nn.LayerNorm(d_model_out) - self.dropout1 = torch.nn.Dropout(dropout) - self.dropout2 = torch.nn.Dropout(dropout) - - self.activation = torch.nn.functional.relu - - def forward(self, src, src_mask=None, src_key_padding_mask=None): - r"""Pass the input through the encoder layer. - - Args: - src: the sequence to the encoder layer (required). - src_mask: the mask for the src sequence (optional). - src_key_padding_mask: the mask for the src keys per batch (optional). - - Shape: - see the docs in Transformer class. - """ - src2, attn = self.self_attn( - src, src, src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask - ) - src = src + self.dropout1(src2) - src = self.norm1(src) - src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) - d_out = src2.shape[-1] - src = src[..., :d_out] + self.dropout2(src2)[..., :d_out] - src = self.norm2(src) - return src, attn - - -def _xavier_init(linear) -> None: - """ - Performs the Xavier weight initialization of the linear layer `linear`. - """ - torch.nn.init.xavier_uniform_(linear.weight.data) diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py deleted file mode 100644 index cb70c957b3cf3f2dd77653c5fbf46a222525e85c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py +++ /dev/null @@ -1,176 +0,0 @@ -# @lint-ignore-every LICENSELINT -# Adapted from https://github.com/lioryariv/idr/blob/main/code/model/ -# implicit_differentiable_renderer.py -# Copyright (c) 2020 Lior Yariv -import math -from typing import Optional, Tuple - -import torch -from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle -from pytorch3d.implicitron.tools.config import registry -from pytorch3d.renderer.implicit import HarmonicEmbedding - -from torch import nn - -from .base import ImplicitFunctionBase -from .utils import get_rays_points_world - - -@registry.register -class IdrFeatureField(ImplicitFunctionBase, torch.nn.Module): - """ - Implicit function as used in http://github.com/lioryariv/idr. - - Members: - d_in: dimension of the input point. - n_harmonic_functions_xyz: If -1, do not embed the point. - If >=0, use a harmonic embedding with this number of - harmonic functions. (The harmonic embedding includes the input - itself, so a value of 0 means the point is used but without - any harmonic functions.) - d_out and feature_vector_size: Sum of these is the output - dimension. This implicit function thus returns a concatenation - of `d_out` signed distance function values and `feature_vector_size` - features (such as colors). When used in `GenericModel`, - `feature_vector_size` corresponds is automatically set to - `render_features_dimensions`. - dims: list of hidden layer sizes. - geometric_init: whether to use custom weight initialization - in linear layers. If False, pytorch default (uniform sampling) - is used. - bias: if geometric_init=True, initial value for bias subtracted - in the last layer. - skip_in: List of indices of layers that receive as input the initial - value concatenated with the output of the previous layers. - weight_norm: whether to apply weight normalization to each layer. - pooled_feature_dim: If view pooling is in use (provided as - fun_viewpool to forward()) this must be its number of features. - Otherwise this must be set to 0. (If used from GenericModel, - this config value will be overridden automatically.) - encoding_dim: If global coding is in use (provided as global_code - to forward()) this must be its number of featuress. - Otherwise this must be set to 0. (If used from GenericModel, - this config value will be overridden automatically.) - """ - - feature_vector_size: int = 3 - d_in: int = 3 - d_out: int = 1 - dims: Tuple[int, ...] = (512, 512, 512, 512, 512, 512, 512, 512) - geometric_init: bool = True - bias: float = 1.0 - skip_in: Tuple[int, ...] = () - weight_norm: bool = True - n_harmonic_functions_xyz: int = 0 - pooled_feature_dim: int = 0 - encoding_dim: int = 0 - - def __post_init__(self): - dims = [self.d_in] + list(self.dims) + [self.d_out + self.feature_vector_size] - - self.embed_fn = None - if self.n_harmonic_functions_xyz >= 0: - self.embed_fn = HarmonicEmbedding( - self.n_harmonic_functions_xyz, append_input=True - ) - dims[0] = self.embed_fn.get_output_dim() - if self.pooled_feature_dim > 0: - dims[0] += self.pooled_feature_dim - if self.encoding_dim > 0: - dims[0] += self.encoding_dim - - self.num_layers = len(dims) - - out_dim = 0 - layers = [] - for layer_idx in range(self.num_layers - 1): - if layer_idx + 1 in self.skip_in: - out_dim = dims[layer_idx + 1] - dims[0] - else: - out_dim = dims[layer_idx + 1] - - lin = nn.Linear(dims[layer_idx], out_dim) - - if self.geometric_init: - if layer_idx == self.num_layers - 2: - torch.nn.init.normal_( - lin.weight, - mean=math.pi**0.5 / dims[layer_idx] ** 0.5, - std=0.0001, - ) - torch.nn.init.constant_(lin.bias, -self.bias) - elif self.n_harmonic_functions_xyz >= 0 and layer_idx == 0: - torch.nn.init.constant_(lin.bias, 0.0) - torch.nn.init.constant_(lin.weight[:, 3:], 0.0) - torch.nn.init.normal_( - lin.weight[:, :3], 0.0, 2**0.5 / out_dim**0.5 - ) - elif self.n_harmonic_functions_xyz >= 0 and layer_idx in self.skip_in: - torch.nn.init.constant_(lin.bias, 0.0) - torch.nn.init.normal_(lin.weight, 0.0, 2**0.5 / out_dim**0.5) - torch.nn.init.constant_(lin.weight[:, -(dims[0] - 3) :], 0.0) - else: - torch.nn.init.constant_(lin.bias, 0.0) - torch.nn.init.normal_(lin.weight, 0.0, 2**0.5 / out_dim**0.5) - - if self.weight_norm: - lin = nn.utils.weight_norm(lin) - - layers.append(lin) - - self.linear_layers = torch.nn.ModuleList(layers) - self.out_dim = out_dim - self.softplus = nn.Softplus(beta=100) - - # pyre-fixme[14]: `forward` overrides method defined in `ImplicitFunctionBase` - # inconsistently. - def forward( - self, - *, - ray_bundle: Optional[ImplicitronRayBundle] = None, - rays_points_world: Optional[torch.Tensor] = None, - fun_viewpool=None, - global_code=None, - **kwargs, - ): - # this field only uses point locations - # rays_points_world.shape = [minibatch x ... x pts_per_ray x 3] - rays_points_world = get_rays_points_world(ray_bundle, rays_points_world) - - if rays_points_world.numel() == 0 or ( - self.embed_fn is None and fun_viewpool is None and global_code is None - ): - return torch.tensor( - [], device=rays_points_world.device, dtype=rays_points_world.dtype - ).view(0, self.out_dim) - - embeddings = [] - if self.embed_fn is not None: - embeddings.append(self.embed_fn(rays_points_world)) - - if fun_viewpool is not None: - assert rays_points_world.ndim == 2 - pooled_feature = fun_viewpool(rays_points_world[None]) - # TODO: pooled features are 4D! - embeddings.append(pooled_feature) - - if global_code is not None: - assert global_code.shape[0] == 1 # TODO: generalize to batches! - # This will require changing raytracer code - # embedding = embedding[None].expand(global_code.shape[0], *embedding.shape) - embeddings.append( - global_code[0, None, :].expand(rays_points_world.shape[0], -1) - ) - - embedding = torch.cat(embeddings, dim=-1) - x = embedding - for layer_idx in range(self.num_layers - 1): - if layer_idx in self.skip_in: - x = torch.cat([x, embedding], dim=-1) / 2**0.5 - - x = self.linear_layers[layer_idx](x) - - if layer_idx < self.num_layers - 2: - x = self.softplus(x) - - return x diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py deleted file mode 100644 index 0706d9a87acb4252a0cf7c920f7302ddef293b2b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py +++ /dev/null @@ -1,273 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import logging -from typing import Optional, Tuple - -import torch -from pytorch3d.common.linear_with_repeat import LinearWithRepeat -from pytorch3d.implicitron.models.renderer.base import ( - conical_frustum_to_gaussian, - ImplicitronRayBundle, -) -from pytorch3d.implicitron.tools.config import expand_args_fields, registry -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.renderer.implicit import HarmonicEmbedding -from pytorch3d.renderer.implicit.utils import ray_bundle_to_ray_points - -from .base import ImplicitFunctionBase - -from .decoding_functions import ( # noqa - _xavier_init, - MLPWithInputSkips, - TransformerWithInputSkips, -) -from .utils import create_embeddings_for_implicit_function - - -logger = logging.getLogger(__name__) - - -class NeuralRadianceFieldBase(ImplicitFunctionBase, torch.nn.Module): - n_harmonic_functions_xyz: int = 10 - n_harmonic_functions_dir: int = 4 - n_hidden_neurons_dir: int = 128 - latent_dim: int = 0 - input_xyz: bool = True - xyz_ray_dir_in_camera_coords: bool = False - color_dim: int = 3 - use_integrated_positional_encoding: bool = False - """ - Args: - n_harmonic_functions_xyz: The number of harmonic functions - used to form the harmonic embedding of 3D point locations. - n_harmonic_functions_dir: The number of harmonic functions - used to form the harmonic embedding of the ray directions. - n_hidden_neurons_xyz: The number of hidden units in the - fully connected layers of the MLP that accepts the 3D point - locations and outputs the occupancy field with the intermediate - features. - n_hidden_neurons_dir: The number of hidden units in the - fully connected layers of the MLP that accepts the intermediate - features and ray directions and outputs the radiance field - (per-point colors). - n_layers_xyz: The number of layers of the MLP that outputs the - occupancy field. - append_xyz: The list of indices of the skip layers of the occupancy MLP. - use_integrated_positional_encoding: If True, use integrated positional enoding - as defined in `MIP-NeRF `_. - If False, use the classical harmonic embedding - defined in `NeRF `_. - """ - - def __post_init__(self): - # The harmonic embedding layer converts input 3D coordinates - # to a representation that is more suitable for - # processing with a deep neural network. - self.harmonic_embedding_xyz = HarmonicEmbedding( - self.n_harmonic_functions_xyz, append_input=True - ) - self.harmonic_embedding_dir = HarmonicEmbedding( - self.n_harmonic_functions_dir, append_input=True - ) - if not self.input_xyz and self.latent_dim <= 0: - raise ValueError("The latent dimension has to be > 0 if xyz is not input!") - - embedding_dim_dir = self.harmonic_embedding_dir.get_output_dim() - - self.xyz_encoder = self._construct_xyz_encoder( - input_dim=self.get_xyz_embedding_dim() - ) - - self.intermediate_linear = torch.nn.Linear( - self.n_hidden_neurons_xyz, self.n_hidden_neurons_xyz - ) - _xavier_init(self.intermediate_linear) - - self.density_layer = torch.nn.Linear(self.n_hidden_neurons_xyz, 1) - _xavier_init(self.density_layer) - - # Zero the bias of the density layer to avoid - # a completely transparent initialization. - self.density_layer.bias.data[:] = 0.0 # fixme: Sometimes this is not enough - - self.color_layer = torch.nn.Sequential( - LinearWithRepeat( - self.n_hidden_neurons_xyz + embedding_dim_dir, self.n_hidden_neurons_dir - ), - torch.nn.ReLU(True), - torch.nn.Linear(self.n_hidden_neurons_dir, self.color_dim), - torch.nn.Sigmoid(), - ) - - def get_xyz_embedding_dim(self): - return ( - self.harmonic_embedding_xyz.get_output_dim() * int(self.input_xyz) - + self.latent_dim - ) - - def _construct_xyz_encoder(self, input_dim: int): - raise NotImplementedError() - - def _get_colors(self, features: torch.Tensor, rays_directions: torch.Tensor): - """ - This function takes per-point `features` predicted by `self.xyz_encoder` - and evaluates the color model in order to attach to each - point a 3D vector of its RGB color. - """ - # Normalize the ray_directions to unit l2 norm. - rays_directions_normed = torch.nn.functional.normalize(rays_directions, dim=-1) - # Obtain the harmonic embedding of the normalized ray directions. - rays_embedding = self.harmonic_embedding_dir(rays_directions_normed) - - return self.color_layer((self.intermediate_linear(features), rays_embedding)) - - @staticmethod - def allows_multiple_passes() -> bool: - """ - Returns True as this implicit function allows - multiple passes. Overridden from ImplicitFunctionBase. - """ - return True - - def forward( - self, - *, - ray_bundle: ImplicitronRayBundle, - fun_viewpool=None, - camera: Optional[CamerasBase] = None, - global_code=None, - **kwargs, - ): - """ - The forward function accepts the parametrizations of - 3D points sampled along projection rays. The forward - pass is responsible for attaching a 3D vector - and a 1D scalar representing the point's - RGB color and opacity respectively. - - Args: - ray_bundle: An ImplicitronRayBundle object containing the following variables: - origins: A tensor of shape `(minibatch, ..., 3)` denoting the - origins of the sampling rays in world coords. - directions: A tensor of shape `(minibatch, ..., 3)` - containing the direction vectors of sampling rays in world coords. - lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)` - containing the lengths at which the rays are sampled. - bins: An optional tensor of shape `(minibatch,..., num_points_per_ray + 1)` - containing the bins at which the rays are sampled. In this case - lengths is equal to the midpoints of bins. - - fun_viewpool: an optional callback with the signature - fun_fiewpool(points) -> pooled_features - where points is a [N_TGT x N x 3] tensor of world coords, - and pooled_features is a [N_TGT x ... x N_SRC x latent_dim] tensor - of the features pooled from the context images. - - Returns: - rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)` - denoting the opacitiy of each ray point. - rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)` - denoting the color of each ray point. - - Raises: - ValueError: If `use_integrated_positional_encoding` is True and - `ray_bundle.bins` is None. - """ - if self.use_integrated_positional_encoding and ray_bundle.bins is None: - raise ValueError( - "When use_integrated_positional_encoding is True, ray_bundle.bins must be set." - "Have you set to True `AbstractMaskRaySampler.use_bins_for_ray_sampling`?" - ) - - rays_points_world, diag_cov = ( - conical_frustum_to_gaussian(ray_bundle) - if self.use_integrated_positional_encoding - else (ray_bundle_to_ray_points(ray_bundle), None) # pyre-ignore - ) - # rays_points_world.shape = [minibatch x ... x pts_per_ray x 3] - - embeds = create_embeddings_for_implicit_function( - xyz_world=rays_points_world, - # for 2nd param but got `Union[None, torch.Tensor, torch.nn.Module]`. - xyz_embedding_function=self.harmonic_embedding_xyz - if self.input_xyz - else None, - global_code=global_code, - fun_viewpool=fun_viewpool, - xyz_in_camera_coords=self.xyz_ray_dir_in_camera_coords, - camera=camera, - diag_cov=diag_cov, - ) - - # embeds.shape = [minibatch x n_src x n_rays x n_pts x self.n_harmonic_functions*6+3] - features = self.xyz_encoder(embeds) - # features.shape = [minibatch x ... x self.n_hidden_neurons_xyz] - # NNs operate on the flattenned rays; reshaping to the correct spatial size - # TODO: maybe make the transformer work on non-flattened tensors to avoid this reshape - features = features.reshape(*rays_points_world.shape[:-1], -1) - - raw_densities = self.density_layer(features) - # raw_densities.shape = [minibatch x ... x 1] in [0-1] - - if self.xyz_ray_dir_in_camera_coords: - if camera is None: - raise ValueError("Camera must be given if xyz_ray_dir_in_camera_coords") - - directions = ray_bundle.directions @ camera.R - else: - directions = ray_bundle.directions - - rays_colors = self._get_colors(features, directions) - # rays_colors.shape = [minibatch x ... x 3] in [0-1] - - return raw_densities, rays_colors, {} - - -@registry.register -class NeuralRadianceFieldImplicitFunction(NeuralRadianceFieldBase): - transformer_dim_down_factor: float = 1.0 - n_hidden_neurons_xyz: int = 256 - n_layers_xyz: int = 8 - append_xyz: Tuple[int, ...] = (5,) - - def _construct_xyz_encoder(self, input_dim: int): - expand_args_fields(MLPWithInputSkips) - return MLPWithInputSkips( - self.n_layers_xyz, - input_dim, - self.n_hidden_neurons_xyz, - input_dim, - self.n_hidden_neurons_xyz, - input_skips=self.append_xyz, - ) - - -@registry.register -class NeRFormerImplicitFunction(NeuralRadianceFieldBase): - transformer_dim_down_factor: float = 2.0 - n_hidden_neurons_xyz: int = 80 - n_layers_xyz: int = 2 - append_xyz: Tuple[int, ...] = (1,) - - def _construct_xyz_encoder(self, input_dim: int): - return TransformerWithInputSkips( - self.n_layers_xyz, - input_dim, - self.n_hidden_neurons_xyz, - input_dim, - self.n_hidden_neurons_xyz, - input_skips=self.append_xyz, - dim_down_factor=self.transformer_dim_down_factor, - ) - - @staticmethod - def requires_pooling_without_aggregation() -> bool: - """ - Returns True as this implicit function needs - pooling without aggregation. Overridden from ImplicitFunctionBase. - """ - return True diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/scene_representation_networks.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/scene_representation_networks.py deleted file mode 100644 index 9ac0992cf52d0b38ec552904390c60b6387eb313..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/implicit_function/scene_representation_networks.py +++ /dev/null @@ -1,427 +0,0 @@ -# @lint-ignore-every LICENSELINT -# Adapted from https://github.com/vsitzmann/scene-representation-networks -# Copyright (c) 2019 Vincent Sitzmann -from typing import Any, cast, Optional, Tuple - -import torch -from omegaconf import DictConfig -from pytorch3d.common.linear_with_repeat import LinearWithRepeat -from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle -from pytorch3d.implicitron.third_party import hyperlayers, pytorch_prototyping -from pytorch3d.implicitron.tools.config import Configurable, registry, run_auto_creation -from pytorch3d.renderer import ray_bundle_to_ray_points -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.renderer.implicit import HarmonicEmbedding - -from .base import ImplicitFunctionBase -from .utils import create_embeddings_for_implicit_function - - -def _kaiming_normal_init(module: torch.nn.Module) -> None: - if isinstance(module, (torch.nn.Linear, LinearWithRepeat)): - torch.nn.init.kaiming_normal_( - module.weight, a=0.0, nonlinearity="relu", mode="fan_in" - ) - - -class SRNRaymarchFunction(Configurable, torch.nn.Module): - n_harmonic_functions: int = 3 # 0 means raw 3D coord inputs - n_hidden_units: int = 256 - n_layers: int = 2 - in_features: int = 3 - out_features: int = 256 - latent_dim: int = 0 - xyz_in_camera_coords: bool = False - - # The internal network can be set as an output of an SRNHyperNet. - # Note that, in order to avoid Pytorch's automatic registering of the - # raymarch_function module on construction, we input the network wrapped - # as a 1-tuple. - - # raymarch_function should ideally be typed as Optional[Tuple[Callable]] - # but Omegaconf.structured doesn't like that. TODO: revisit after new - # release of omegaconf including https://github.com/omry/omegaconf/pull/749 . - raymarch_function: Any = None - - def __post_init__(self): - self._harmonic_embedding = HarmonicEmbedding( - self.n_harmonic_functions, append_input=True - ) - input_embedding_dim = ( - HarmonicEmbedding.get_output_dim_static( - self.in_features, - self.n_harmonic_functions, - True, - ) - + self.latent_dim - ) - - if self.raymarch_function is not None: - self._net = self.raymarch_function[0] - else: - self._net = pytorch_prototyping.FCBlock( - hidden_ch=self.n_hidden_units, - num_hidden_layers=self.n_layers, - in_features=input_embedding_dim, - out_features=self.out_features, - ) - - def forward( - self, - ray_bundle: ImplicitronRayBundle, - fun_viewpool=None, - camera: Optional[CamerasBase] = None, - global_code=None, - **kwargs, - ): - """ - Args: - ray_bundle: An ImplicitronRayBundle object containing the following variables: - origins: A tensor of shape `(minibatch, ..., 3)` denoting the - origins of the sampling rays in world coords. - directions: A tensor of shape `(minibatch, ..., 3)` - containing the direction vectors of sampling rays in world coords. - lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)` - containing the lengths at which the rays are sampled. - fun_viewpool: an optional callback with the signature - fun_fiewpool(points) -> pooled_features - where points is a [N_TGT x N x 3] tensor of world coords, - and pooled_features is a [N_TGT x ... x N_SRC x latent_dim] tensor - of the features pooled from the context images. - - Returns: - rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)` - denoting the opacitiy of each ray point. - rays_colors: Set to None. - """ - # We first convert the ray parametrizations to world - # coordinates with `ray_bundle_to_ray_points`. - # pyre-ignore[6] - rays_points_world = ray_bundle_to_ray_points(ray_bundle) - - embeds = create_embeddings_for_implicit_function( - xyz_world=rays_points_world, - xyz_embedding_function=self._harmonic_embedding, - global_code=global_code, - fun_viewpool=fun_viewpool, - xyz_in_camera_coords=self.xyz_in_camera_coords, - camera=camera, - ) - - # Before running the network, we have to resize embeds to ndims=3, - # otherwise the SRN layers consume huge amounts of memory. - raymarch_features = self._net( - embeds.view(embeds.shape[0], -1, embeds.shape[-1]) - ) - # raymarch_features.shape = [minibatch x ... x self.n_hidden_neurons_xyz] - - # NNs operate on the flattenned rays; reshaping to the correct spatial size - raymarch_features = raymarch_features.reshape(*rays_points_world.shape[:-1], -1) - - return raymarch_features, None - - -class SRNPixelGenerator(Configurable, torch.nn.Module): - n_harmonic_functions: int = 4 - n_hidden_units: int = 256 - n_hidden_units_color: int = 128 - n_layers: int = 2 - in_features: int = 256 - out_features: int = 3 - ray_dir_in_camera_coords: bool = False - - def __post_init__(self): - self._harmonic_embedding = HarmonicEmbedding( - self.n_harmonic_functions, append_input=True - ) - self._net = pytorch_prototyping.FCBlock( - hidden_ch=self.n_hidden_units, - num_hidden_layers=self.n_layers, - in_features=self.in_features, - out_features=self.n_hidden_units, - ) - self._density_layer = torch.nn.Linear(self.n_hidden_units, 1) - self._density_layer.apply(_kaiming_normal_init) - embedding_dim_dir = self._harmonic_embedding.get_output_dim(input_dims=3) - self._color_layer = torch.nn.Sequential( - LinearWithRepeat( - self.n_hidden_units + embedding_dim_dir, - self.n_hidden_units_color, - ), - torch.nn.LayerNorm([self.n_hidden_units_color]), - torch.nn.ReLU(inplace=True), - torch.nn.Linear(self.n_hidden_units_color, self.out_features), - ) - self._color_layer.apply(_kaiming_normal_init) - - # TODO: merge with NeuralRadianceFieldBase's _get_colors - def _get_colors(self, features: torch.Tensor, rays_directions: torch.Tensor): - """ - This function takes per-point `features` predicted by `self.net` - and evaluates the color model in order to attach to each - point a 3D vector of its RGB color. - """ - # Normalize the ray_directions to unit l2 norm. - rays_directions_normed = torch.nn.functional.normalize(rays_directions, dim=-1) - # Obtain the harmonic embedding of the normalized ray directions. - rays_embedding = self._harmonic_embedding(rays_directions_normed) - return self._color_layer((features, rays_embedding)) - - def forward( - self, - raymarch_features: torch.Tensor, - ray_bundle: ImplicitronRayBundle, - camera: Optional[CamerasBase] = None, - **kwargs, - ): - """ - Args: - raymarch_features: Features from the raymarching network of shape - `(minibatch, ..., self.in_features)` - ray_bundle: An ImplicitronRayBundle object containing the following variables: - origins: A tensor of shape `(minibatch, ..., 3)` denoting the - origins of the sampling rays in world coords. - directions: A tensor of shape `(minibatch, ..., 3)` - containing the direction vectors of sampling rays in world coords. - lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)` - containing the lengths at which the rays are sampled. - - Returns: - rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)` - denoting the opacitiy of each ray point. - rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)` - denoting the color of each ray point. - """ - # raymarch_features.shape = [minibatch x ... x pts_per_ray x 3] - features = self._net(raymarch_features) - # features.shape = [minibatch x ... x self.n_hidden_units] - - if self.ray_dir_in_camera_coords: - if camera is None: - raise ValueError("Camera must be given if xyz_ray_dir_in_camera_coords") - - directions = ray_bundle.directions @ camera.R - else: - directions = ray_bundle.directions - - # NNs operate on the flattenned rays; reshaping to the correct spatial size - features = features.reshape(*raymarch_features.shape[:-1], -1) - - raw_densities = self._density_layer(features) - - rays_colors = self._get_colors(features, directions) - - return raw_densities, rays_colors - - -class SRNRaymarchHyperNet(Configurable, torch.nn.Module): - """ - This is a raymarching function which has a forward like SRNRaymarchFunction - but instead of the weights being parameters of the module, they - are the output of another network, the hypernet, which takes the global_code - as input. All the dataclass members of SRNRaymarchFunction are here with the - same meaning. In addition, there are members with names ending `_hypernet` - which affect the hypernet. - - Because this class may be called repeatedly for the same global_code, the - output of the hypernet is cached in self.cached_srn_raymarch_function. - This member must be manually set to None whenever the global_code changes. - """ - - n_harmonic_functions: int = 3 # 0 means raw 3D coord inputs - n_hidden_units: int = 256 - n_layers: int = 2 - n_hidden_units_hypernet: int = 256 - n_layers_hypernet: int = 1 - in_features: int = 3 - out_features: int = 256 - latent_dim_hypernet: int = 0 - latent_dim: int = 0 - xyz_in_camera_coords: bool = False - - def __post_init__(self): - raymarch_input_embedding_dim = ( - HarmonicEmbedding.get_output_dim_static( - self.in_features, - self.n_harmonic_functions, - True, - ) - + self.latent_dim - ) - - self._hypernet = hyperlayers.HyperFC( - hyper_in_ch=self.latent_dim_hypernet, - hyper_num_hidden_layers=self.n_layers_hypernet, - hyper_hidden_ch=self.n_hidden_units_hypernet, - hidden_ch=self.n_hidden_units, - num_hidden_layers=self.n_layers, - in_ch=raymarch_input_embedding_dim, - out_ch=self.n_hidden_units, - ) - - self.cached_srn_raymarch_function: Optional[Tuple[SRNRaymarchFunction]] = None - - def _run_hypernet(self, global_code: torch.Tensor) -> Tuple[SRNRaymarchFunction]: - """ - Runs the hypernet and returns a 1-tuple containing the generated - srn_raymarch_function. - """ - - net = self._hypernet(global_code) - - # use the hyper-net generated network to instantiate the raymarch module - srn_raymarch_function = SRNRaymarchFunction( - n_harmonic_functions=self.n_harmonic_functions, - n_hidden_units=self.n_hidden_units, - n_layers=self.n_layers, - in_features=self.in_features, - out_features=self.out_features, - latent_dim=self.latent_dim, - xyz_in_camera_coords=self.xyz_in_camera_coords, - raymarch_function=(net,), - ) - - # move the generated raymarch function to the correct device - srn_raymarch_function.to(global_code.device) - - return (srn_raymarch_function,) - - def forward( - self, - ray_bundle: ImplicitronRayBundle, - fun_viewpool=None, - camera: Optional[CamerasBase] = None, - global_code=None, - **kwargs, - ): - - if global_code is None: - raise ValueError("SRN Hypernetwork requires a non-trivial global code.") - - # The raymarching network is cached in case the function is called repeatedly - # across LSTM iterations for the same global_code. - if self.cached_srn_raymarch_function is None: - # generate the raymarching network from the hypernet - self.cached_srn_raymarch_function = self._run_hypernet(global_code) - (srn_raymarch_function,) = cast( - Tuple[SRNRaymarchFunction], self.cached_srn_raymarch_function - ) - - return srn_raymarch_function( - ray_bundle=ray_bundle, - fun_viewpool=fun_viewpool, - camera=camera, - global_code=None, # the hypernetwork takes the global code - ) - - -@registry.register -# pyre-fixme[13]: Uninitialized attribute -class SRNImplicitFunction(ImplicitFunctionBase, torch.nn.Module): - latent_dim: int = 0 - raymarch_function: SRNRaymarchFunction - pixel_generator: SRNPixelGenerator - - def __post_init__(self): - run_auto_creation(self) - - def create_raymarch_function(self) -> None: - self.raymarch_function = SRNRaymarchFunction( - latent_dim=self.latent_dim, - **self.raymarch_function_args, - ) - - @classmethod - def raymarch_function_tweak_args(cls, type, args: DictConfig) -> None: - args.pop("latent_dim", None) - - def forward( - self, - *, - ray_bundle: ImplicitronRayBundle, - fun_viewpool=None, - camera: Optional[CamerasBase] = None, - global_code=None, - raymarch_features: Optional[torch.Tensor] = None, - **kwargs, - ): - predict_colors = raymarch_features is not None - if predict_colors: - return self.pixel_generator( - raymarch_features=raymarch_features, - ray_bundle=ray_bundle, - camera=camera, - **kwargs, - ) - else: - return self.raymarch_function( - ray_bundle=ray_bundle, - fun_viewpool=fun_viewpool, - camera=camera, - global_code=global_code, - **kwargs, - ) - - -@registry.register -# pyre-fixme[13]: Uninitialized attribute -class SRNHyperNetImplicitFunction(ImplicitFunctionBase, torch.nn.Module): - """ - This implicit function uses a hypernetwork to generate the - SRNRaymarchingFunction, and this is cached. Whenever the - global_code changes, `on_bind_args` must be called to clear - the cache. - """ - - latent_dim_hypernet: int = 0 - latent_dim: int = 0 - hypernet: SRNRaymarchHyperNet - pixel_generator: SRNPixelGenerator - - def __post_init__(self): - run_auto_creation(self) - - def create_hypernet(self) -> None: - self.hypernet = SRNRaymarchHyperNet( - latent_dim=self.latent_dim, - latent_dim_hypernet=self.latent_dim_hypernet, - **self.hypernet_args, - ) - - @classmethod - def hypernet_tweak_args(cls, type, args: DictConfig) -> None: - args.pop("latent_dim", None) - args.pop("latent_dim_hypernet", None) - - def forward( - self, - *, - ray_bundle: ImplicitronRayBundle, - fun_viewpool=None, - camera: Optional[CamerasBase] = None, - global_code=None, - raymarch_features: Optional[torch.Tensor] = None, - **kwargs, - ): - predict_colors = raymarch_features is not None - if predict_colors: - return self.pixel_generator( - raymarch_features=raymarch_features, - ray_bundle=ray_bundle, - camera=camera, - **kwargs, - ) - else: - return self.hypernet( - ray_bundle=ray_bundle, - fun_viewpool=fun_viewpool, - camera=camera, - global_code=global_code, - **kwargs, - ) - - def on_bind_args(self): - """ - The global_code may have changed, so we reset the hypernet. - """ - self.hypernet.cached_srn_raymarch_function = None diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/utils.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/utils.py deleted file mode 100644 index 25ec3fcb6ef6947666f7ce7bfbcdd591cd61ec81..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/implicit_function/utils.py +++ /dev/null @@ -1,219 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Callable, Optional - -import torch - -import torch.nn.functional as F -from pytorch3d.common.compat import prod -from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle -from pytorch3d.renderer import ray_bundle_to_ray_points -from pytorch3d.renderer.cameras import CamerasBase - - -def broadcast_global_code(embeds: torch.Tensor, global_code: torch.Tensor): - """ - Expands the `global_code` of shape (minibatch, dim) - so that it can be appended to `embeds` of shape (minibatch, ..., dim2), - and appends to the last dimension of `embeds`. - """ - bs = embeds.shape[0] - global_code_broadcast = global_code.view(bs, *([1] * (embeds.ndim - 2)), -1).expand( - *embeds.shape[:-1], - global_code.shape[-1], - ) - return torch.cat([embeds, global_code_broadcast], dim=-1) - - -def create_embeddings_for_implicit_function( - xyz_world: torch.Tensor, - xyz_in_camera_coords: bool, - global_code: Optional[torch.Tensor], - camera: Optional[CamerasBase], - fun_viewpool: Optional[Callable], - xyz_embedding_function: Optional[Callable], - diag_cov: Optional[torch.Tensor] = None, -) -> torch.Tensor: - - bs, *spatial_size, pts_per_ray, _ = xyz_world.shape - - if xyz_in_camera_coords: - if camera is None: - raise ValueError("Camera must be given if xyz_in_camera_coords") - - ray_points_for_embed = ( - camera.get_world_to_view_transform() - .transform_points(xyz_world.view(bs, -1, 3)) - .view(xyz_world.shape) - ) - else: - ray_points_for_embed = xyz_world - - if xyz_embedding_function is None: - embeds = torch.empty( - bs, - 1, - prod(spatial_size), - pts_per_ray, - 0, - ) - else: - - embeds = xyz_embedding_function(ray_points_for_embed, diag_cov=diag_cov) - embeds = embeds.reshape( - bs, - 1, - prod(spatial_size), - pts_per_ray, - -1, - ) # flatten spatial, add n_src dim - - if fun_viewpool is not None: - # viewpooling - embeds_viewpooled = fun_viewpool(xyz_world.reshape(bs, -1, 3)) - embed_shape = ( - bs, - embeds_viewpooled.shape[1], - prod(spatial_size), - pts_per_ray, - -1, - ) - embeds_viewpooled = embeds_viewpooled.reshape(*embed_shape) - if embeds is not None: - embeds = torch.cat([embeds.expand(*embed_shape), embeds_viewpooled], dim=-1) - else: - embeds = embeds_viewpooled - - if global_code is not None: - # append the broadcasted global code to embeds - embeds = broadcast_global_code(embeds, global_code) - - return embeds - - -def interpolate_line( - points: torch.Tensor, - source: torch.Tensor, - **kwargs, -) -> torch.Tensor: - """ - Linearly interpolates values of source grids. The first dimension of points represents - number of points and the second coordinate, for example ([[x0], [x1], ...]). The first - dimension of argument source represents feature and ones after that the spatial - dimension. - - Arguments: - points: shape (n_grids, n_points, 1), - source: tensor of shape (n_grids, features, width), - Returns: - interpolated tensor of shape (n_grids, n_points, features) - """ - # To enable sampling of the source using the torch.functional.grid_sample - # points need to have 2 coordinates. - expansion = points.new_zeros(points.shape) - points = torch.cat((points, expansion), dim=-1) - - source = source[:, :, None, :] - points = points[:, :, None, :] - - out = F.grid_sample( - grid=points, - input=source, - **kwargs, - ) - return out[:, :, :, 0].permute(0, 2, 1) - - -def interpolate_plane( - points: torch.Tensor, - source: torch.Tensor, - **kwargs, -) -> torch.Tensor: - """ - Bilinearly interpolates values of source grids. The first dimension of points represents - number of points and the second coordinates, for example ([[x0, y0], [x1, y1], ...]). - The first dimension of argument source represents feature and ones after that the - spatial dimension. - - Arguments: - points: shape (n_grids, n_points, 2), - source: tensor of shape (n_grids, features, width, height), - Returns: - interpolated tensor of shape (n_grids, n_points, features) - """ - # permuting because torch.nn.functional.grid_sample works with - # (features, height, width) and not - # (features, width, height) - source = source.permute(0, 1, 3, 2) - points = points[:, :, None, :] - - out = F.grid_sample( - grid=points, - input=source, - **kwargs, - ) - return out[:, :, :, 0].permute(0, 2, 1) - - -def interpolate_volume( - points: torch.Tensor, source: torch.Tensor, **kwargs -) -> torch.Tensor: - """ - Interpolates values of source grids. The first dimension of points represents - number of points and the second coordinates, for example - [[x0, y0, z0], [x1, y1, z1], ...]. The first dimension of a source represents features - and ones after that the spatial dimension. - - Arguments: - points: shape (n_grids, n_points, 3), - source: tensor of shape (n_grids, features, width, height, depth), - Returns: - interpolated tensor of shape (n_grids, n_points, features) - """ - if "mode" in kwargs and kwargs["mode"] == "trilinear": - kwargs = kwargs.copy() - kwargs["mode"] = "bilinear" - # permuting because torch.nn.functional.grid_sample works with - # (features, depth, height, width) and not (features, width, height, depth) - source = source.permute(0, 1, 4, 3, 2) - grid = points[:, :, None, None, :] - - out = F.grid_sample( - grid=grid, - input=source, - **kwargs, - ) - return out[:, :, :, 0, 0].permute(0, 2, 1) - - -def get_rays_points_world( - ray_bundle: Optional[ImplicitronRayBundle] = None, - rays_points_world: Optional[torch.Tensor] = None, -) -> torch.Tensor: - """ - Converts the ray_bundle to rays_points_world if rays_points_world is not defined - and raises error if both are defined. - - Args: - ray_bundle: An ImplicitronRayBundle object or None - rays_points_world: A torch.Tensor representing ray points converted to - world coordinates - Returns: - A torch.Tensor representing ray points converted to world coordinates - of shape [minibatch x ... x pts_per_ray x 3]. - """ - if rays_points_world is not None and ray_bundle is not None: - raise ValueError( - "Cannot define both rays_points_world and ray_bundle," - + " one has to be None." - ) - if rays_points_world is not None: - return rays_points_world - if ray_bundle is not None: - # pyre-ignore[6] - return ray_bundle_to_ray_points(ray_bundle) - raise ValueError("ray_bundle and rays_points_world cannot both be None") diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid.py deleted file mode 100644 index 8115d072dafa62ee1fb455711aa32198adf233a9..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid.py +++ /dev/null @@ -1,1137 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -""" -This file contains classes that implement Voxel grids, both in their full resolution -as in the factorized form. There are two factorized forms implemented, Tensor rank decomposition -or CANDECOMP/PARAFAC (here CP) and Vector Matrix (here VM) factorization from the -TensoRF (https://arxiv.org/abs/2203.09517) paper. - -In addition, the module VoxelGridModule implements a trainable instance of one of -these classes. - -""" - -import logging -import warnings -from collections.abc import Mapping -from dataclasses import dataclass, field - -from distutils.version import LooseVersion -from typing import Any, Callable, ClassVar, Dict, Iterator, List, Optional, Tuple, Type - -import torch -from omegaconf import DictConfig -from pytorch3d.implicitron.tools.config import ( - Configurable, - registry, - ReplaceableBase, - run_auto_creation, -) -from pytorch3d.structures.volumes import VolumeLocator - -from .utils import interpolate_line, interpolate_plane, interpolate_volume - - -logger = logging.getLogger(__name__) - - -@dataclass -class VoxelGridValuesBase: - pass - - -class VoxelGridBase(ReplaceableBase, torch.nn.Module): - """ - Base class for all the voxel grid variants whith added trilinear interpolation between - voxels (for example if voxel (0.333, 1, 3) is queried that would return the result - 2/3*voxel[0, 1, 3] + 1/3*voxel[1, 1, 3]) - - Internally voxel grids are indexed by (features, x, y, z). If queried the point is not - inside the voxel grid the vector that will be returned is determined by padding. - - Members: - align_corners: parameter used in torch.functional.grid_sample. For details go to - https://pytorch.org/docs/stable/generated/torch.nn.functional.grid_sample.html by - default is True - padding: padding mode for outside grid values 'zeros' | 'border' | 'reflection'. - Default is 'zeros' - mode: interpolation mode to calculate output values : - 'bilinear' | 'nearest' | 'bicubic' | 'trilinear'. - Default: 'bilinear' Note: mode='bicubic' supports only FullResolutionVoxelGrid. - When mode='bilinear' and the input is 5-D, the interpolation mode used internally - will actually be trilinear. - n_features: number of dimensions of base feature vector. Determines how many features - the grid returns. - resolution_changes: a dictionary, where keys are change epochs and values are - 3-tuples containing x, y, z grid sizes corresponding to each axis to each epoch - """ - - align_corners: bool = True - padding: str = "zeros" - mode: str = "bilinear" - n_features: int = 1 - # return the line below once we drop OmegaConf 2.1 support - # resolution_changes: Dict[int, List[int]] = field( - resolution_changes: Dict[int, Any] = field( - default_factory=lambda: {0: [128, 128, 128]} - ) - - def __post_init__(self): - if 0 not in self.resolution_changes: - raise ValueError("There has to be key `0` in `resolution_changes`.") - - def evaluate_world( - self, - points: torch.Tensor, - grid_values: VoxelGridValuesBase, - locator: VolumeLocator, - ) -> torch.Tensor: - """ - Evaluates the voxel grid at points in the world coordinate frame. - The interpolation type is determined by the `mode` member. - - Arguments: - points (torch.Tensor): tensor of points that you want to query - of a form (n_grids, ..., 3) - grid_values: an object of type Class.values_type which has tensors as - members which have shapes derived from the get_shapes() method - locator: a VolumeLocator object - Returns: - torch.Tensor: shape (n_grids, ..., n_features) - """ - points_local = locator.world_to_local_coords(points) - return self.evaluate_local(points_local, grid_values) - - def evaluate_local( - self, points: torch.Tensor, grid_values: VoxelGridValuesBase - ) -> torch.Tensor: - """ - Evaluates the voxel grid at points in the local coordinate frame, - The interpolation type is determined by the `mode` member. - - Arguments: - points (torch.Tensor): tensor of points that you want to query - of a form (n_grids, ..., 3), in a normalized form (coordinates are in [-1, 1]) - grid_values: an object of type VMFactorizedVoxelGrid.values_type which has tensors - as members which have shapes derived from the get_shapes() method - Returns: - torch.Tensor: shape (n_grids, ..., n_features) - """ - raise NotImplementedError() - - def get_shapes(self, epoch: int) -> Dict[str, Tuple]: - """ - Using parameters from the __init__ method, this method returns the - shapes of individual tensors needed to run the evaluate method. - - Args: - epoch: If the shape varies during training, which training epoch's shape to return. - Returns: - a dictionary of needed shapes. To use the evaluate_local and evaluate_world methods - replace the shapes in the dictionary with tensors of those shapes and add the - first 'batch' dimension. If the required shape is (a, b) and you want to - have g grids then the tensor that replaces the shape should have the - shape (g, a, b). - """ - raise NotImplementedError() - - def get_resolution(self, epoch: int) -> List[int]: - """ - Returns the resolution which the grid should have at specific epoch - - Args: - epoch which to use in the resolution calculation - Returns: - resolution at specific epoch - """ - last_change = 0 - for change_epoch in self.resolution_changes: - if change_epoch <= epoch: - last_change = max(last_change, change_epoch) - return self.resolution_changes[last_change] - - @staticmethod - def get_output_dim(args: DictConfig) -> int: - """ - Given all the arguments of the grid's __init__, returns output's last dimension length. - - In particular, if self.evaluate_world or self.evaluate_local - are called with `points` of shape (n_grids, n_points, 3), - their output will be of shape - (n_grids, n_points, grid.get_output_dim()). - - Args: - args: DictConfig which would be used to initialize the object - Returns: - output's last dimension length - """ - return args["n_features"] - - def change_resolution( - self, - grid_values: VoxelGridValuesBase, - *, - epoch: Optional[int] = None, - grid_values_with_wanted_resolution: Optional[VoxelGridValuesBase] = None, - mode: str = "linear", - align_corners: bool = True, - antialias: bool = False, - ) -> Tuple[VoxelGridValuesBase, bool]: - """ - Changes resolution of tensors in `grid_values` to match the - `grid_values_with_wanted_resolution` or resolution on wanted epoch. - - Args: - epoch: current training epoch, used to see if the grid needs regridding - grid_values: instance of self.values_type which contains - the voxel grid which will be interpolated to create the new grid - epoch: epoch which is used to get the resolution of the new - `grid_values` using `self.resolution_changes`. - grid_values_with_wanted_resolution: `VoxelGridValuesBase` to whose resolution - to interpolate grid_values - align_corners: as for torch.nn.functional.interpolate - mode: as for torch.nn.functional.interpolate - 'nearest' | 'bicubic' | 'linear' | 'area' | 'nearest-exact'. - Default: 'linear' - antialias: as for torch.nn.functional.interpolate. - Using anti-alias option - together with align_corners=False and mode='bicubic', interpolation - result would match Pillow result for downsampling operation. - Supported mode: 'bicubic' - Returns: - tuple of - - new voxel grid_values of desired resolution, of type self.values_type - - True if regridding has happened. - """ - - if (epoch is None) == (grid_values_with_wanted_resolution is None): - raise ValueError( - "Exactly one of `epoch` or " - "`grid_values_with_wanted_resolution` has to be defined." - ) - - if mode not in ("nearest", "bicubic", "linear", "area", "nearest-exact"): - raise ValueError( - "`mode` should be one of the following 'nearest'" - + "| 'bicubic' | 'linear' | 'area' | 'nearest-exact'" - ) - - interpolate_has_antialias = LooseVersion(torch.__version__) >= "1.11" - - if antialias and not interpolate_has_antialias: - warnings.warn("Antialiased interpolation requires PyTorch 1.11+; ignoring") - - interp_kwargs = {"antialias": antialias} if interpolate_has_antialias else {} - - def change_individual_resolution(tensor, wanted_resolution): - if mode == "linear": - n_dim = len(wanted_resolution) - new_mode = ("linear", "bilinear", "trilinear")[n_dim - 1] - else: - new_mode = mode - return torch.nn.functional.interpolate( - input=tensor, - size=wanted_resolution, - mode=new_mode, - align_corners=align_corners, - recompute_scale_factor=False, - **interp_kwargs, - ) - - if epoch is not None: - if epoch not in self.resolution_changes: - return grid_values, False - - wanted_shapes = self.get_shapes(epoch=epoch) - params = { - name: change_individual_resolution( - getattr(grid_values, name), shape[1:] - ) - for name, shape in wanted_shapes.items() - } - res = self.get_resolution(epoch) - logger.info(f"Changed grid resolutiuon at epoch {epoch} to {res}") - else: - params = { - name: ( - change_individual_resolution( - getattr(grid_values, name), tensor.shape[2:] - ) - if tensor is not None - else None - ) - for name, tensor in vars(grid_values_with_wanted_resolution).items() - } - - return self.values_type(**params), True - - def get_resolution_change_epochs(self) -> Tuple[int, ...]: - """ - Returns epochs at which this grid should change epochs. - """ - return tuple(self.resolution_changes.keys()) - - def get_align_corners(self) -> bool: - """ - Returns True if voxel grid uses align_corners=True - """ - return self.align_corners - - def crop_world( - self, - min_point_world: torch.Tensor, - max_point_world: torch.Tensor, - grid_values: VoxelGridValuesBase, - volume_locator: VolumeLocator, - ) -> VoxelGridValuesBase: - """ - Crops the voxel grid based on minimum and maximum occupied point in - world coordinates. After cropping all 8 corner points are preserved in - the voxel grid. This is achieved by preserving all the voxels needed to - calculate the point. - - +--------B - / /| - / / | - +--------+ | <==== Bounding box represented by points A and B: - | | | - B has x, y and z coordinates bigger or equal - | | + to all other points of the object - | | / - A has x, y and z coordinates smaller or equal - | |/ to all other points of the object - A--------+ - - Args: - min_point_world: torch.Tensor of shape (3,). Has x, y and z coordinates - smaller or equal to all other occupied points. Point A from the - picture above. - max_point_world: torch.Tensor of shape (3,). Has x, y and z coordinates - bigger or equal to all other occupied points. Point B from the - picture above. - grid_values: instance of self.values_type which contains - the voxel grid which will be cropped to create the new grid - volume_locator: VolumeLocator object used to convert world to local - cordinates - Returns: - instance of self.values_type which has volume cropped to desired size. - """ - min_point_local = volume_locator.world_to_local_coords(min_point_world[None])[0] - max_point_local = volume_locator.world_to_local_coords(max_point_world[None])[0] - return self.crop_local(min_point_local, max_point_local, grid_values) - - def crop_local( - self, - min_point_local: torch.Tensor, - max_point_local: torch.Tensor, - grid_values: VoxelGridValuesBase, - ) -> VoxelGridValuesBase: - """ - Crops the voxel grid based on minimum and maximum occupied point in local - coordinates. After cropping both min and max point are preserved in the voxel - grid. This is achieved by preserving all the voxels needed to calculate the point. - - +--------B - / /| - / / | - +--------+ | <==== Bounding box represented by points A and B: - | | | - B has x, y and z coordinates bigger or equal - | | + to all other points of the object - | | / - A has x, y and z coordinates smaller or equal - | |/ to all other points of the object - A--------+ - - Args: - min_point_local: torch.Tensor of shape (3,). Has x, y and z coordinates - smaller or equal to all other occupied points. Point A from the - picture above. All elements in [-1, 1]. - max_point_local: torch.Tensor of shape (3,). Has x, y and z coordinates - bigger or equal to all other occupied points. Point B from the - picture above. All elements in [-1, 1]. - grid_values: instance of self.values_type which contains - the voxel grid which will be cropped to create the new grid - Returns: - instance of self.values_type which has volume cropped to desired size. - """ - raise NotImplementedError() - - -@dataclass -class FullResolutionVoxelGridValues(VoxelGridValuesBase): - voxel_grid: torch.Tensor - - -@registry.register -class FullResolutionVoxelGrid(VoxelGridBase): - """ - Full resolution voxel grid equivalent to 4D tensor where shape is - (features, width, height, depth) with linear interpolation between voxels. - """ - - # the type of grid_values argument needed to run evaluate_local() - values_type: ClassVar[Type[VoxelGridValuesBase]] = FullResolutionVoxelGridValues - - # pyre-fixme[14]: `evaluate_local` overrides method defined in `VoxelGridBase` - # inconsistently. - def evaluate_local( - self, points: torch.Tensor, grid_values: FullResolutionVoxelGridValues - ) -> torch.Tensor: - """ - Evaluates the voxel grid at points in the local coordinate frame, - The interpolation type is determined by the `mode` member. - - Arguments: - points (torch.Tensor): tensor of points that you want to query - of a form (..., 3), in a normalized form (coordinates are in [-1, 1]) - grid_values: an object of type values_type which has tensors as - members which have shapes derived from the get_shapes() method - Returns: - torch.Tensor: shape (n_grids, ..., n_features) - """ - # (n_grids, n_points_total, n_features) from (n_grids, ..., n_features) - recorded_shape = points.shape - points = points.view(points.shape[0], -1, points.shape[-1]) - interpolated = interpolate_volume( - points, - grid_values.voxel_grid, - align_corners=self.align_corners, - padding_mode=self.padding, - mode=self.mode, - ) - return interpolated.view(*recorded_shape[:-1], -1) - - def get_shapes(self, epoch: int) -> Dict[str, Tuple]: - width, height, depth = self.get_resolution(epoch) - return {"voxel_grid": (self.n_features, width, height, depth)} - - # pyre-ignore[14] - def crop_local( - self, - min_point_local: torch.Tensor, - max_point_local: torch.Tensor, - grid_values: FullResolutionVoxelGridValues, - ) -> FullResolutionVoxelGridValues: - assert torch.all(min_point_local < max_point_local) - min_point_local = torch.clamp(min_point_local, -1, 1) - max_point_local = torch.clamp(max_point_local, -1, 1) - _, _, width, height, depth = grid_values.voxel_grid.shape - resolution = grid_values.voxel_grid.new_tensor([width, height, depth]) - min_point_local01 = (min_point_local + 1) / 2 - max_point_local01 = (max_point_local + 1) / 2 - - if self.align_corners: - minx, miny, minz = torch.floor(min_point_local01 * (resolution - 1)).long() - maxx, maxy, maxz = torch.ceil(max_point_local01 * (resolution - 1)).long() - else: - minx, miny, minz = torch.floor(min_point_local01 * resolution - 0.5).long() - maxx, maxy, maxz = torch.ceil(max_point_local01 * resolution - 0.5).long() - - return FullResolutionVoxelGridValues( - voxel_grid=grid_values.voxel_grid[ - :, :, minx : maxx + 1, miny : maxy + 1, minz : maxz + 1 - ] - ) - - -@dataclass -class CPFactorizedVoxelGridValues(VoxelGridValuesBase): - vector_components_x: torch.Tensor - vector_components_y: torch.Tensor - vector_components_z: torch.Tensor - basis_matrix: Optional[torch.Tensor] = None - - -@registry.register -class CPFactorizedVoxelGrid(VoxelGridBase): - """ - Canonical Polyadic (CP/CANDECOMP/PARAFAC) Factorization factorizes the 3d grid into three - vectors (x, y, z). For n_components=n, the 3d grid is a sum of the two outer products - (call it βŠ—) of each vector type (x, y, z): - - 3d_grid = x0 βŠ— y0 βŠ— z0 + x1 βŠ— y1 βŠ— z1 + ... + xn βŠ— yn βŠ— zn - - These tensors are passed in a object of CPFactorizedVoxelGridValues (here obj) as - obj.vector_components_x, obj.vector_components_y, obj.vector_components_z. Their shapes are - `(n_components, r)` where `r` is the relevant resolution. - - Each element of this sum has an extra dimension, which gets matrix-multiplied by an - appropriate "basis matrix" of shape (n_grids, n_components, n_features). This multiplication - brings us to the desired "n_features" dimensionality. If basis_matrix=False the elements - of different components are summed together to create (n_grids, n_components, 1) tensor. - With some notation abuse, ignoring the interpolation operation, simplifying and denoting - n_features as F, n_components as C and n_grids as G: - - 3d_grid = (x βŠ— y βŠ— z) @ basis # GWHDC x GCF -> GWHDF - - The basis feature vectors are passed as obj.basis_matrix. - - Members: - n_components: number of vector triplets, higher number gives better approximation. - basis_matrix: how to transform components. If matrix_reduction=True result - matrix of shape (n_grids, n_points_total, n_components) is batch matrix multiplied - by the basis_matrix of shape (n_grids, n_components, n_features). If - matrix_reduction=False, the result tensor of (n_grids, n_points_total, n_components) - is summed along the rows to get (n_grids, n_points_total, 1), which is then viewed - to return to starting shape (n_grids, ..., 1). - """ - - # the type of grid_values argument needed to run evaluate_local() - values_type: ClassVar[Type[VoxelGridValuesBase]] = CPFactorizedVoxelGridValues - - n_components: int = 24 - basis_matrix: bool = True - - # pyre-fixme[14]: `evaluate_local` overrides method defined in `VoxelGridBase` - # inconsistently. - def evaluate_local( - self, points: torch.Tensor, grid_values: CPFactorizedVoxelGridValues - ) -> torch.Tensor: - def factor(axis): - i = {"x": 0, "y": 1, "z": 2}[axis] - index = points[..., i, None] - vector = getattr(grid_values, "vector_components_" + axis) - return interpolate_line( - index, - vector, - align_corners=self.align_corners, - padding_mode=self.padding, - mode=self.mode, - ) - - # (n_grids, n_points_total, n_features) from (n_grids, ..., n_features) - recorded_shape = points.shape - points = points.view(points.shape[0], -1, points.shape[-1]) - - # collect points from all the vectors and multipy them out - mult = factor("x") * factor("y") * factor("z") - - # reduce the result from - # (n_grids, n_points_total, n_components) to (n_grids, n_points_total, n_features) - if grid_values.basis_matrix is not None: - # (n_grids, n_points_total, n_features) = - # (n_grids, n_points_total, total_n_components) @ - # (n_grids, total_n_components, n_features) - result = torch.bmm(mult, grid_values.basis_matrix) - else: - # (n_grids, n_points_total, 1) from (n_grids, n_points_total, n_features) - result = mult.sum(axis=-1, keepdim=True) - # (n_grids, ..., n_features) - return result.view(*recorded_shape[:-1], -1) - - def get_shapes(self, epoch: int) -> Dict[str, Tuple[int, int]]: - if self.basis_matrix is False and self.n_features != 1: - raise ValueError("Cannot set basis_matrix=False and n_features to != 1") - - width, height, depth = self.get_resolution(epoch=epoch) - shape_dict = { - "vector_components_x": (self.n_components, width), - "vector_components_y": (self.n_components, height), - "vector_components_z": (self.n_components, depth), - } - if self.basis_matrix: - shape_dict["basis_matrix"] = (self.n_components, self.n_features) - return shape_dict - - # pyre-ignore[14] - def crop_local( - self, - min_point_local: torch.Tensor, - max_point_local: torch.Tensor, - grid_values: CPFactorizedVoxelGridValues, - ) -> CPFactorizedVoxelGridValues: - assert torch.all(min_point_local < max_point_local) - min_point_local = torch.clamp(min_point_local, -1, 1) - max_point_local = torch.clamp(max_point_local, -1, 1) - _, _, width = grid_values.vector_components_x.shape - _, _, height = grid_values.vector_components_y.shape - _, _, depth = grid_values.vector_components_z.shape - resolution = grid_values.vector_components_x.new_tensor([width, height, depth]) - min_point_local01 = (min_point_local + 1) / 2 - max_point_local01 = (max_point_local + 1) / 2 - - if self.align_corners: - minx, miny, minz = torch.floor(min_point_local01 * (resolution - 1)).long() - maxx, maxy, maxz = torch.ceil(max_point_local01 * (resolution - 1)).long() - else: - minx, miny, minz = torch.floor(min_point_local01 * resolution - 0.5).long() - maxx, maxy, maxz = torch.ceil(max_point_local01 * resolution - 0.5).long() - - return CPFactorizedVoxelGridValues( - vector_components_x=grid_values.vector_components_x[:, :, minx : maxx + 1], - vector_components_y=grid_values.vector_components_y[:, :, miny : maxy + 1], - vector_components_z=grid_values.vector_components_z[:, :, minz : maxz + 1], - basis_matrix=grid_values.basis_matrix, - ) - - -@dataclass -class VMFactorizedVoxelGridValues(VoxelGridValuesBase): - vector_components_x: torch.Tensor - vector_components_y: torch.Tensor - vector_components_z: torch.Tensor - matrix_components_xy: torch.Tensor - matrix_components_yz: torch.Tensor - matrix_components_xz: torch.Tensor - basis_matrix: Optional[torch.Tensor] = None - - -@registry.register -class VMFactorizedVoxelGrid(VoxelGridBase): - """ - Implementation of Vector-Matrix Factorization of a tensor from - https://arxiv.org/abs/2203.09517. - - Vector-Matrix Factorization factorizes the 3d grid into three matrices - (xy, xz, yz) and three vectors (x, y, z). For n_components=1, the 3d grid - is a sum of the outer products (call it βŠ—) of each matrix with its - complementary vector: - - 3d_grid = xy βŠ— z + xz βŠ— y + yz βŠ— x. - - These tensors are passed in a VMFactorizedVoxelGridValues object (here obj) - as obj.matrix_components_xy, obj.matrix_components_xy, obj.vector_components_y, etc. - - Their shapes are `(n_grids, n_components, r0, r1)` for matrix_components and - (n_grids, n_components, r2)` for vector_componenets. Each of `r0, r1 and r2` coresponds - to one resolution in (width, height and depth). - - Each element of this sum has an extra dimension, which gets matrix-multiplied by an - appropriate "basis matrix" of shape (n_grids, n_components, n_features). This multiplication - brings us to the desired "n_features" dimensionality. If basis_matrix=False the elements - of different components are summed together to create (n_grids, n_components, 1) tensor. - With some notation abuse, ignoring the interpolation operation, simplifying and denoting - n_features as F, n_components as C (which can differ for each dimension) and n_grids as G: - - 3d_grid = concat((xy βŠ— z), (xz βŠ— y).permute(0, 2, 1), - (yz βŠ— x).permute(2, 0, 1)) @ basis_matrix # GWHDC x GCF -> GWHDF - - Members: - n_components: total number of matrix vector pairs, this must be divisible by 3. Set - this if you want to have equal representational power in all 3 directions. You - must specify either n_components or distribution_of_components, you cannot - specify both. - distribution_of_components: if you do not want equal representational power in - all 3 directions specify a tuple of numbers of matrix_vector pairs for each - coordinate of a form (n_xy_planes, n_yz_planes, n_xz_planes). You must specify - either n_components or distribution_of_components, you cannot specify both. - basis_matrix: how to transform components. If matrix_reduction=True result - matrix of shape (n_grids, n_points_total, n_components) is batch matrix multiplied - by the basis_matrix of shape (n_grids, n_components, n_features). If - matrix_reduction=False, the result tensor of (n_grids, n_points_total, n_components) - is summed along the rows to get (n_grids, n_points_total, 1), which is then viewed - to return to starting shape (n_grids, ..., 1). - """ - - # the type of grid_values argument needed to run evaluate_local() - values_type: ClassVar[Type[VoxelGridValuesBase]] = VMFactorizedVoxelGridValues - - n_components: Optional[int] = None - distribution_of_components: Optional[Tuple[int, int, int]] = None - basis_matrix: bool = True - - # pyre-fixme[14]: `evaluate_local` overrides method defined in `VoxelGridBase` - # inconsistently. - def evaluate_local( - self, points: torch.Tensor, grid_values: VMFactorizedVoxelGridValues - ) -> torch.Tensor: - # (n_grids, n_points_total, n_features) from (n_grids, ..., n_features) - recorded_shape = points.shape - points = points.view(points.shape[0], -1, points.shape[-1]) - - # collect points from matrices and vectors and multiply them - a = interpolate_plane( - points[..., :2], - grid_values.matrix_components_xy, - align_corners=self.align_corners, - padding_mode=self.padding, - mode=self.mode, - ) * interpolate_line( - points[..., 2:], - grid_values.vector_components_z, - align_corners=self.align_corners, - padding_mode=self.padding, - mode=self.mode, - ) - b = interpolate_plane( - points[..., [0, 2]], - grid_values.matrix_components_xz, - align_corners=self.align_corners, - padding_mode=self.padding, - mode=self.mode, - ) * interpolate_line( - points[..., 1:2], - grid_values.vector_components_y, - align_corners=self.align_corners, - padding_mode=self.padding, - mode=self.mode, - ) - c = interpolate_plane( - points[..., 1:], - grid_values.matrix_components_yz, - align_corners=self.align_corners, - padding_mode=self.padding, - mode=self.mode, - ) * interpolate_line( - points[..., :1], - grid_values.vector_components_x, - align_corners=self.align_corners, - padding_mode=self.padding, - mode=self.mode, - ) - # pyre-ignore[28] - feats = torch.cat((a, b, c), axis=-1) - - # reduce the result from - # (n_grids, n_points, n_components) to (n_grids, n_points, n_features) - if grid_values.basis_matrix is not None: - # (n_grids, n_points, n_features) = - # (n_grids, n_points, total_n_components) x - # (n_grids, total_n_components, n_features) - result = torch.bmm(feats, grid_values.basis_matrix) - else: - # pyre-ignore[28] - # (n_grids, n_points, 1) from (n_grids, n_points, n_features) - result = feats.sum(axis=-1, keepdim=True) - # (n_grids, ..., n_features) - return result.view(*recorded_shape[:-1], -1) - - def get_shapes(self, epoch: int) -> Dict[str, Tuple]: - if self.basis_matrix is False and self.n_features != 1: - raise ValueError("Cannot set basis_matrix=False and n_features to != 1") - if self.distribution_of_components is None and self.n_components is None: - raise ValueError( - "You need to provide n_components or distribution_of_components" - ) - if ( - self.distribution_of_components is not None - and self.n_components is not None - ): - raise ValueError( - "You cannot define n_components and distribution_of_components" - ) - # pyre-ignore[58] - if self.distribution_of_components is None and self.n_components % 3 != 0: - raise ValueError("n_components must be divisible by 3") - if self.distribution_of_components is None: - calculated_distribution_of_components = [ - # pyre-fixme[58]: `//` is not supported for operand types - # `Optional[int]` and `int`. - self.n_components // 3 - for _ in range(3) - ] - else: - calculated_distribution_of_components = self.distribution_of_components - - width, height, depth = self.get_resolution(epoch=epoch) - shape_dict = { - "vector_components_x": ( - calculated_distribution_of_components[1], - width, - ), - "vector_components_y": ( - calculated_distribution_of_components[2], - height, - ), - "vector_components_z": ( - calculated_distribution_of_components[0], - depth, - ), - "matrix_components_xy": ( - calculated_distribution_of_components[0], - width, - height, - ), - "matrix_components_yz": ( - calculated_distribution_of_components[1], - height, - depth, - ), - "matrix_components_xz": ( - calculated_distribution_of_components[2], - width, - depth, - ), - } - if self.basis_matrix: - shape_dict["basis_matrix"] = ( - sum(calculated_distribution_of_components), - self.n_features, - ) - - return shape_dict - - # pyre-ignore[14] - def crop_local( - self, - min_point_local: torch.Tensor, - max_point_local: torch.Tensor, - grid_values: VMFactorizedVoxelGridValues, - ) -> VMFactorizedVoxelGridValues: - assert torch.all(min_point_local < max_point_local) - min_point_local = torch.clamp(min_point_local, -1, 1) - max_point_local = torch.clamp(max_point_local, -1, 1) - _, _, width = grid_values.vector_components_x.shape - _, _, height = grid_values.vector_components_y.shape - _, _, depth = grid_values.vector_components_z.shape - resolution = grid_values.vector_components_x.new_tensor([width, height, depth]) - min_point_local01 = (min_point_local + 1) / 2 - max_point_local01 = (max_point_local + 1) / 2 - - if self.align_corners: - minx, miny, minz = torch.floor(min_point_local01 * (resolution - 1)).long() - maxx, maxy, maxz = torch.ceil(max_point_local01 * (resolution - 1)).long() - else: - minx, miny, minz = torch.floor(min_point_local01 * resolution - 0.5).long() - maxx, maxy, maxz = torch.ceil(max_point_local01 * resolution - 0.5).long() - - return VMFactorizedVoxelGridValues( - vector_components_x=grid_values.vector_components_x[:, :, minx : maxx + 1], - vector_components_y=grid_values.vector_components_y[:, :, miny : maxy + 1], - vector_components_z=grid_values.vector_components_z[:, :, minz : maxz + 1], - matrix_components_xy=grid_values.matrix_components_xy[ - :, :, minx : maxx + 1, miny : maxy + 1 - ], - matrix_components_yz=grid_values.matrix_components_yz[ - :, :, miny : maxy + 1, minz : maxz + 1 - ], - matrix_components_xz=grid_values.matrix_components_xz[ - :, :, minx : maxx + 1, minz : maxz + 1 - ], - basis_matrix=grid_values.basis_matrix, - ) - - -# pyre-fixme[13]: Attribute `voxel_grid` is never initialized. -class VoxelGridModule(Configurable, torch.nn.Module): - """ - A wrapper torch.nn.Module for the VoxelGrid classes, which - contains parameters that are needed to train the VoxelGrid classes. - Can contain the parameters for the voxel grid as pytorch parameters - or as registered buffers. - - Members: - voxel_grid_class_type: The name of the class to use for voxel_grid, - which must be available in the registry. Default FullResolutionVoxelGrid. - voxel_grid: An instance of `VoxelGridBase`. This is the object which - this class wraps. - extents: 3-tuple of a form (width, height, depth), denotes the size of the grid - in world units. - translation: 3-tuple of float. The center of the volume in world units as (x, y, z). - init_std: Parameters are initialized using the gaussian distribution - with mean=init_mean and std=init_std. Default 0.1 - init_mean: Parameters are initialized using the gaussian distribution - with mean=init_mean and std=init_std. Default 0. - hold_voxel_grid_as_parameters: if True components of the underlying voxel grids - will be saved as parameters and therefore be trainable. Default True. - param_groups: dictionary where keys are names of individual parameters - or module members and values are the parameter group where the - parameter/member will be sorted to. "self" key is used to denote the - parameter group at the module level. Possible keys, including the "self" key - do not have to be defined. By default all parameters are put into "default" - parameter group and have the learning rate defined in the optimizer, - it can be overridden at the: - - module level with β€œself” key, all the parameters and child - module's parameters will be put to that parameter group - - member level, which is the same as if the `param_groups` in that - member has key=β€œself” and value equal to that parameter group. - This is useful if members do not have `param_groups`, for - example torch.nn.Linear. - - parameter level, parameter with the same name as the key - will be put to that parameter group. - """ - - voxel_grid_class_type: str = "FullResolutionVoxelGrid" - voxel_grid: VoxelGridBase - - extents: Tuple[float, float, float] = (2.0, 2.0, 2.0) - translation: Tuple[float, float, float] = (0.0, 0.0, 0.0) - - init_std: float = 0.1 - init_mean: float = 0 - - hold_voxel_grid_as_parameters: bool = True - param_groups: Dict[str, str] = field(default_factory=lambda: {}) - - def __post_init__(self): - run_auto_creation(self) - n_grids = 1 # Voxel grid objects are batched. We need only a single grid. - shapes = self.voxel_grid.get_shapes(epoch=0) - params = { - name: torch.normal( - mean=torch.zeros((n_grids, *shape)) + self.init_mean, - std=self.init_std, - ) - for name, shape in shapes.items() - } - - self.set_voxel_grid_parameters(self.voxel_grid.values_type(**params)) - self._register_load_state_dict_pre_hook(self._create_parameters_with_new_size) - - def forward(self, points: torch.Tensor) -> torch.Tensor: - """ - Evaluates points in the world coordinate frame on the voxel_grid. - - Args: - points (torch.Tensor): tensor of points that you want to query - of a form (..., 3) - Returns: - torch.Tensor of shape (..., n_features) - """ - locator = self._get_volume_locator() - grid_values = self.voxel_grid.values_type(**self.params) - # voxel grids operate with extra n_grids dimension, which we fix to one - return self.voxel_grid.evaluate_world(points[None], grid_values, locator)[0] - - def set_voxel_grid_parameters(self, params: VoxelGridValuesBase) -> None: - """ - Sets the parameters of the underlying voxel grid. - - Args: - params: parameters of type `self.voxel_grid.values_type` which will - replace current parameters - """ - if self.hold_voxel_grid_as_parameters: - self.params = torch.nn.ParameterDict( - { - k: torch.nn.Parameter(val) - for k, val in vars(params).items() - if val is not None - } - ) - else: - # Torch Module to hold parameters since they can only be registered - # at object level. - self.params = _RegistratedBufferDict(vars(params)) - - @staticmethod - def get_output_dim(args: DictConfig) -> int: - """ - Utility to help predict the shape of the output of `forward`. - - Args: - args: DictConfig which would be used to initialize the object - Returns: - int: the length of the last dimension of the output tensor - """ - grid = registry.get(VoxelGridBase, args["voxel_grid_class_type"]) - return grid.get_output_dim( - args["voxel_grid_" + args["voxel_grid_class_type"] + "_args"] - ) - - def subscribe_to_epochs(self) -> Tuple[Tuple[int, ...], Callable[[int], bool]]: - """ - Method which expresses interest in subscribing to optimization epoch updates. - - Returns: - tuple of epochs on which to call a callable and callable to be called on - particular epoch. The callable returns True if parameter change has - happened else False and it must be supplied with one argument, epoch. - """ - return self.voxel_grid.get_resolution_change_epochs(), self._apply_epochs - - def _apply_epochs(self, epoch: int) -> bool: - """ - Asks voxel_grid to change the resolution. - This method is returned with subscribe_to_epochs and is the method that collects - updates on training epochs, it is run on the training epochs that are requested. - - Args: - epoch: current training epoch used for voxel grids to know to which - resolution to change - Returns: - True if parameter change has happened else False. - """ - grid_values = self.voxel_grid.values_type(**self.params) - grid_values, change = self.voxel_grid.change_resolution( - grid_values, epoch=epoch - ) - if change: - self.set_voxel_grid_parameters(grid_values) - return change and self.hold_voxel_grid_as_parameters - - def _create_parameters_with_new_size( - self, - state_dict: dict, - prefix: str, - local_metadata: dict, - strict: bool, - missing_keys: List[str], - unexpected_keys: List[str], - error_msgs: List[str], - ) -> None: - ''' - Automatically ran before loading the parameters with `load_state_dict()`. - Creates new parameters with the sizes of the ones in the loaded state dict. - This is necessary because the parameters are changing throughout training and - at the time of construction `VoxelGridModule` does not know the size of - parameters which will be loaded. - - Args: - state_dict (dict): a dict containing parameters and - persistent buffers. - prefix (str): the prefix for parameters and buffers used in this - module - local_metadata (dict): a dict containing the metadata for this module. - See - strict (bool): whether to strictly enforce that the keys in - :attr:`state_dict` with :attr:`prefix` match the names of - parameters and buffers in this module - missing_keys (list of str): if ``strict=True``, add missing keys to - this list - unexpected_keys (list of str): if ``strict=True``, add unexpected - keys to this list - error_msgs (list of str): error messages should be added to this - list, and will be reported together in - :meth:`~torch.nn.Module.load_state_dict` - Returns: - nothing - """ - ''' - new_params = {} - for name in self.params: - key = prefix + "params." + name - if key in state_dict: - new_params[name] = torch.zeros_like(state_dict[key]) - self.set_voxel_grid_parameters(self.voxel_grid.values_type(**new_params)) - - def get_device(self) -> torch.device: - """ - Returns torch.device on which module parameters are located - """ - return next(val for val in self.params.values() if val is not None).device - - def crop_self(self, min_point: torch.Tensor, max_point: torch.Tensor) -> None: - """ - Crops self to only represent points between min_point and max_point (inclusive). - - Args: - min_point: torch.Tensor of shape (3,). Has x, y and z coordinates - smaller or equal to all other occupied points. - max_point: torch.Tensor of shape (3,). Has x, y and z coordinates - bigger or equal to all other occupied points. - Returns: - nothing - """ - locator = self._get_volume_locator() - # torch.nn.modules.module.Module]` is not a function. - old_grid_values = self.voxel_grid.values_type(**self.params) - new_grid_values = self.voxel_grid.crop_world( - min_point, max_point, old_grid_values, locator - ) - grid_values, _ = self.voxel_grid.change_resolution( - new_grid_values, grid_values_with_wanted_resolution=old_grid_values - ) - self.params = torch.nn.ParameterDict( - { - k: torch.nn.Parameter(val) - for k, val in vars(grid_values).items() - if val is not None - } - ) - # New center of voxel grid is the middle point between max and min points. - self.translation = tuple((max_point + min_point) / 2) - # new extents of voxel grid are distances between min and max points - self.extents = tuple(max_point - min_point) - - def _get_volume_locator(self) -> VolumeLocator: - """ - Returns VolumeLocator calculated from `extents` and `translation` members. - """ - return VolumeLocator( - batch_size=1, - # The resolution of the voxel grid does not need to be known - # to the locator object. It is easiest to fix the resolution of the locator. - # In particular we fix it to (2,2,2) so that there is exactly one voxel of the - # desired size. The locator object uses (z, y, x) convention for the grid_size, - # and this module uses (x, y, z) convention so the order has to be reversed - # (irrelevant in this case since they are all equal). - # It is (2, 2, 2) because the VolumeLocator object behaves like - # align_corners=True, which means that the points are in the corners of - # the volume. So in the grid of (2, 2, 2) there is only one voxel. - grid_sizes=(2, 2, 2), - # The locator object uses (x, y, z) convention for the - # voxel size and translation. - voxel_size=tuple(self.extents), - # volume_translation is defined in `VolumeLocator` as a vector from the origin - # of local coordinate frame to origin of world coordinate frame, that is: - # x_world = x_local * extents/2 - translation. - # To get the reverse we need to negate it. - volume_translation=tuple(-t for t in self.translation), - device=self.get_device(), - ) - - def get_grid_points(self, epoch: int) -> torch.Tensor: - """ - Returns a grid of points that represent centers of voxels of the - underlying voxel grid in world coordinates at specific epoch. - - Args: - epoch: underlying voxel grids change resolution depending on the - epoch, this argument is used to determine the resolution - of the voxel grid at that epoch. - Returns: - tensor of shape [xresolution, yresolution, zresolution, 3] where - xresolution, yresolution, zresolution are resolutions of the - underlying voxel grid - """ - xresolution, yresolution, zresolution = self.voxel_grid.get_resolution(epoch) - width, height, depth = self.extents - if not self.voxel_grid.get_align_corners(): - width = ( - width * (xresolution - 1) / xresolution if xresolution > 1 else width - ) - height = ( - height * (xresolution - 1) / xresolution if xresolution > 1 else height - ) - depth = ( - depth * (xresolution - 1) / xresolution if xresolution > 1 else depth - ) - xs = torch.linspace( - -width / 2, width / 2, xresolution, device=self.get_device() - ) - ys = torch.linspace( - -height / 2, height / 2, yresolution, device=self.get_device() - ) - zs = torch.linspace( - -depth / 2, depth / 2, zresolution, device=self.get_device() - ) - xmesh, ymesh, zmesh = torch.meshgrid(xs, ys, zs, indexing="ij") - return torch.stack((xmesh, ymesh, zmesh), dim=3) - - -class _RegistratedBufferDict(torch.nn.Module, Mapping): - """ - Mapping class and a torch.nn.Module that registeres its values - with `self.register_buffer`. Can be indexed like a regular Python - dictionary, but torch.Tensors it contains are properly registered, and will be visible - by all Module methods. Supports only `torch.Tensor` as value and str as key. - """ - - def __init__(self, init_dict: Optional[Dict[str, torch.Tensor]] = None) -> None: - """ - Args: - init_dict: dictionary which will be used to populate the object - """ - super().__init__() - self._keys = set() - if init_dict is not None: - for k, v in init_dict.items(): - self[k] = v - - def __iter__(self) -> Iterator[Dict[str, torch.Tensor]]: - return iter({k: self[k] for k in self._keys}) - - def __len__(self) -> int: - return len(self._keys) - - def __getitem__(self, key: str) -> torch.Tensor: - return getattr(self, key) - - def __setitem__(self, key, value) -> None: - self._keys.add(key) - self.register_buffer(key, value) - - def __hash__(self) -> int: - return hash(repr(self)) diff --git a/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py b/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py deleted file mode 100644 index b413204600192ba387332ce8c5e8d166f3be3c46..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py +++ /dev/null @@ -1,616 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import logging -import math -import warnings -from dataclasses import fields -from typing import Callable, Dict, Optional, Tuple - -import torch - -from omegaconf import DictConfig - -from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase -from pytorch3d.implicitron.models.implicit_function.decoding_functions import ( - DecoderFunctionBase, -) -from pytorch3d.implicitron.models.implicit_function.voxel_grid import VoxelGridModule -from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle -from pytorch3d.implicitron.tools.config import ( - enable_get_default_args, - get_default_args_field, - registry, - run_auto_creation, -) -from pytorch3d.renderer import ray_bundle_to_ray_points -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.renderer.implicit import HarmonicEmbedding - -logger = logging.getLogger(__name__) - - -enable_get_default_args(HarmonicEmbedding) - - -@registry.register -# pyre-ignore[13] -class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module): - """ - This implicit function consists of two streams, one for the density calculation and one - for the color calculation. Each of these streams has three main parts: - 1) Voxel grids: - They take the (x, y, z) position and return the embedding of that point. - These components are replaceable, you can make your own or choose one of - several options. - 2) Harmonic embeddings: - Convert each feature into series of 'harmonic features', feature is passed through - sine and cosine functions. Input is of shape [minibatch, ..., D] output - [minibatch, ..., (n_harmonic_functions * 2 + int(append_input)) * D]. Appends - input by default. If you want it to behave like identity, put n_harmonic_functions=0 - and append_input=True. - 3) Decoding functions: - The decoder is an instance of the DecoderFunctionBase and converts the embedding - of a spatial location to density/color. Examples are Identity which returns its - input and the MLP which uses fully connected nerual network to transform the input. - These components are replaceable, you can make your own or choose from - several options. - - Calculating density is done in three steps: - 1) Evaluating the voxel grid on points - 2) Embedding the outputs with harmonic embedding - 3) Passing through the Density decoder - - To calculate the color we need the embedding and the viewing direction, it has five steps: - 1) Transforming the viewing direction with camera - 2) Evaluating the voxel grid on points - 3) Embedding the outputs with harmonic embedding - 4) Embedding the normalized direction with harmonic embedding - 5) Passing everything through the Color decoder - - If using the Implicitron configuration system the input_dim to the decoding functions will - be set to the output_dim of the Harmonic embeddings. - - A speed up comes from using the scaffold, a low resolution voxel grid. - The scaffold is referenced as "binary occupancy grid mask" in TensoRF paper and "AlphaMask" - in official TensoRF implementation. - The scaffold is used in: - 1) filtering points in empty space - - controlled by `scaffold_filter_points` boolean. If set to True, points for which - scaffold predicts that are in empty space will return 0 density and - (0, 0, 0) color. - 2) calculating the bounding box of an object and cropping the voxel grids - - controlled by `volume_cropping_epochs`. - - at those epochs the implicit function will find the bounding box of an object - inside it and crop density and color grids. Cropping of the voxel grids means - preserving only voxel values that are inside the bounding box and changing the - resolution to match the original, while preserving the new cropped location in - world coordinates. - - The scaffold has to exist before attempting filtering and cropping, and is created on - `scaffold_calculating_epochs`. Each voxel in the scaffold is labeled as having density 1 if - the point in the center of it evaluates to greater than `scaffold_empty_space_threshold`. - 3D max pooling is performed on the densities of the points in 3D. - Scaffold features are off by default. - - Members: - voxel_grid_density (VoxelGridBase): voxel grid to use for density estimation - voxel_grid_color (VoxelGridBase): voxel grid to use for color estimation - - harmonic_embedder_xyz_density (HarmonicEmbedder): Function to transform the outputs of - the voxel_grid_density - harmonic_embedder_xyz_color (HarmonicEmbedder): Function to transform the outputs of - the voxel_grid_color for density - harmonic_embedder_dir_color (HarmonicEmbedder): Function to transform the outputs of - the voxel_grid_color for color - - decoder_density (DecoderFunctionBase): decoder function to use for density estimation - color_density (DecoderFunctionBase): decoder function to use for color estimation - - use_multiple_streams (bool): if you want the density and color calculations to run on - different cuda streams set this to True. Default True. - xyz_ray_dir_in_camera_coords (bool): This is true if the directions are given in - camera coordinates. Default False. - - voxel_grid_scaffold (VoxelGridModule): which holds the scaffold. Extents and - translation of it are set to those of voxel_grid_density. - scaffold_calculating_epochs (Tuple[int, ...]): at which epochs to recalculate the - scaffold. (The scaffold will be created automatically at the beginning of - the calculation.) - scaffold_resolution (Tuple[int, int, int]): (width, height, depth) of the underlying - voxel grid which stores scaffold - scaffold_empty_space_threshold (float): if `self._get_density` evaluates to less than - this it will be considered as empty space and the scaffold at that point would - evaluate as empty space. - scaffold_occupancy_chunk_size (str or int): Number of xy scaffold planes to calculate - at the same time. To calculate the scaffold we need to query `_get_density()` at - every voxel, this calculation can be split into scaffold depth number of xy plane - calculations if you want the lowest memory usage, one calculation to calculate the - whole scaffold, but with higher memory footprint or any other number of planes. - Setting to a non-positive number calculates all planes at the same time. - Defaults to '-1' (=calculating all planes). - scaffold_max_pool_kernel_size (int): Size of the pooling region to use when - calculating the scaffold. Defaults to 3. - scaffold_filter_points (bool): If set to True the points will be filtered using - `self.voxel_grid_scaffold`. Filtered points will be predicted as having 0 density - and (0, 0, 0) color. The points which were not evaluated as empty space will be - passed through the steps outlined above. - volume_cropping_epochs: on which epochs to crop the voxel grids to fit the object's - bounding box. Scaffold has to be calculated before cropping. - """ - - # ---- voxel grid for density - voxel_grid_density: VoxelGridModule - - # ---- voxel grid for color - voxel_grid_color: VoxelGridModule - - # ---- harmonic embeddings density - harmonic_embedder_xyz_density_args: DictConfig = get_default_args_field( - HarmonicEmbedding - ) - harmonic_embedder_xyz_color_args: DictConfig = get_default_args_field( - HarmonicEmbedding - ) - harmonic_embedder_dir_color_args: DictConfig = get_default_args_field( - HarmonicEmbedding - ) - - # ---- decoder function for density - decoder_density_class_type: str = "MLPDecoder" - decoder_density: DecoderFunctionBase - - # ---- decoder function for color - decoder_color_class_type: str = "MLPDecoder" - decoder_color: DecoderFunctionBase - - # ---- cuda streams - use_multiple_streams: bool = True - - # ---- camera - xyz_ray_dir_in_camera_coords: bool = False - - # --- scaffold - # voxel_grid_scaffold: VoxelGridModule - scaffold_calculating_epochs: Tuple[int, ...] = () - scaffold_resolution: Tuple[int, int, int] = (128, 128, 128) - scaffold_empty_space_threshold: float = 0.001 - scaffold_occupancy_chunk_size: int = -1 - scaffold_max_pool_kernel_size: int = 3 - scaffold_filter_points: bool = True - - # --- cropping - volume_cropping_epochs: Tuple[int, ...] = () - - def __post_init__(self) -> None: - run_auto_creation(self) - self.voxel_grid_scaffold = self._create_voxel_grid_scaffold() - self.harmonic_embedder_xyz_density = HarmonicEmbedding( - **self.harmonic_embedder_xyz_density_args - ) - self.harmonic_embedder_xyz_color = HarmonicEmbedding( - **self.harmonic_embedder_xyz_color_args - ) - self.harmonic_embedder_dir_color = HarmonicEmbedding( - **self.harmonic_embedder_dir_color_args - ) - self._scaffold_ready = False - - def forward( - self, - ray_bundle: ImplicitronRayBundle, - fun_viewpool=None, - camera: Optional[CamerasBase] = None, - global_code=None, - **kwargs, - ) -> Tuple[torch.Tensor, torch.Tensor, Dict]: - """ - The forward function accepts the parametrizations of 3D points sampled along - projection rays. The forward pass is responsible for attaching a 3D vector - and a 1D scalar representing the point's RGB color and opacity respectively. - - Args: - ray_bundle: An ImplicitronRayBundle object containing the following variables: - origins: A tensor of shape `(minibatch, ..., 3)` denoting the - origins of the sampling rays in world coords. - directions: A tensor of shape `(minibatch, ..., 3)` - containing the direction vectors of sampling rays in world coords. - lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)` - containing the lengths at which the rays are sampled. - fun_viewpool: an optional callback with the signature - fun_fiewpool(points) -> pooled_features - where points is a [N_TGT x N x 3] tensor of world coords, - and pooled_features is a [N_TGT x ... x N_SRC x latent_dim] tensor - of the features pooled from the context images. - camera: A camera model which will be used to transform the viewing - directions - - Returns: - rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)` - denoting the opacitiy of each ray point. - rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)` - denoting the color of each ray point. - """ - # ########## convert the ray parametrizations to world coordinates ########## # - # points.shape = [minibatch x n_rays_width x n_rays_height x pts_per_ray x 3] - # pyre-ignore[6] - points = ray_bundle_to_ray_points(ray_bundle) - directions = ray_bundle.directions.reshape(-1, 3) - input_shape = points.shape - num_points_per_ray = input_shape[-2] - points = points.view(-1, 3) - non_empty_points = None - - # ########## filter the points using the scaffold ########## # - if self._scaffold_ready and self.scaffold_filter_points: - with torch.no_grad(): - non_empty_points = self.voxel_grid_scaffold(points)[..., 0] > 0 - points = points[non_empty_points] - if len(points) == 0: - warnings.warn( - "The scaffold has filtered all the points." - "The voxel grids and decoding functions will not be run." - ) - return ( - points.new_zeros((*input_shape[:-1], 1)), - points.new_zeros((*input_shape[:-1], 3)), - {}, - ) - - # ########## calculate color and density ########## # - rays_densities, rays_colors = self._calculate_density_and_color( - points, directions, camera, non_empty_points, num_points_per_ray - ) - - if not (self._scaffold_ready and self.scaffold_filter_points): - return ( - rays_densities.view((*input_shape[:-1], rays_densities.shape[-1])), - rays_colors.view((*input_shape[:-1], rays_colors.shape[-1])), - {}, - ) - - # ########## merge scaffold calculated points ########## # - # Create a zeroed tensor corresponding to a point with density=0 and fill it - # with calculated density for points which are not in empty space. Do the - # same for color - rays_densities_combined = rays_densities.new_zeros( - (math.prod(input_shape[:-1]), rays_densities.shape[-1]) - ) - rays_colors_combined = rays_colors.new_zeros( - (math.prod(input_shape[:-1]), rays_colors.shape[-1]) - ) - assert non_empty_points is not None - rays_densities_combined[non_empty_points] = rays_densities - rays_colors_combined[non_empty_points] = rays_colors - - return ( - rays_densities_combined.view((*input_shape[:-1], rays_densities.shape[-1])), - rays_colors_combined.view((*input_shape[:-1], rays_colors.shape[-1])), - {}, - ) - - def _calculate_density_and_color( - self, - points: torch.Tensor, - directions: torch.Tensor, - camera: Optional[CamerasBase], - non_empty_points: Optional[torch.Tensor], - num_points_per_ray: int, - ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Calculates density and color at `points`. - If enabled use cuda streams. - - Args: - points: points at which to calculate density and color. - Tensor of shape [n_points, 3]. - directions: from which directions are the points viewed. - One per ray. Tensor of shape [n_rays, 3]. - camera: A camera model which will be used to transform the viewing - directions - non_empty_points: indices of points which weren't filtered out; - used for expanding directions - num_points_per_ray: number of points per ray, needed to expand directions. - Returns: - Tuple of color (tensor of shape [..., 3]) and density - (tensor of shape [..., 1]) - """ - if self.use_multiple_streams and points.is_cuda: - current_stream = torch.cuda.current_stream(points.device) - other_stream = torch.cuda.Stream(points.device) - other_stream.wait_stream(current_stream) - - with torch.cuda.stream(other_stream): - # rays_densities.shape = - # [minibatch x n_rays_width x n_rays_height x pts_per_ray x density_dim] - rays_densities = self._get_density(points) - - # rays_colors.shape = - # [minibatch x n_rays_width x n_rays_height x pts_per_ray x color_dim] - rays_colors = self._get_color( - points, camera, directions, non_empty_points, num_points_per_ray - ) - - current_stream.wait_stream(other_stream) - else: - # Same calculation as above, just serial. - rays_densities = self._get_density(points) - rays_colors = self._get_color( - points, camera, directions, non_empty_points, num_points_per_ray - ) - return rays_densities, rays_colors - - def _get_density(self, points: torch.Tensor) -> torch.Tensor: - """ - Calculates density at points: - 1) Evaluates the voxel grid on points - 2) Embeds the outputs with harmonic embedding - 3) Passes everything through the Density decoder - - Args: - points: tensor of shape [..., 3] - where the last dimension is the points in the (x, y, z) - Returns: - calculated densities of shape [..., density_dim], `density_dim` is the - feature dimensionality which `decoder_density` returns - """ - embeds_density = self.voxel_grid_density(points) - harmonic_embedding_density = self.harmonic_embedder_xyz_density(embeds_density) - # shape = [..., density_dim] - return self.decoder_density(harmonic_embedding_density) - - def _get_color( - self, - points: torch.Tensor, - camera: Optional[CamerasBase], - directions: torch.Tensor, - non_empty_points: Optional[torch.Tensor], - num_points_per_ray: int, - ) -> torch.Tensor: - """ - Calculates color at points using the viewing direction: - 1) Transforms the viewing direction with camera - 2) Evaluates the voxel grid on points - 3) Embeds the outputs with harmonic embedding - 4) Embeds the normalized direction with harmonic embedding - 5) Passes everything through the Color decoder - Args: - points: tensor of shape (..., 3) - where the last dimension is the points in the (x, y, z) - camera: A camera model which will be used to transform the viewing - directions - directions: A tensor of shape `(..., 3)` - containing the direction vectors of sampling rays in world coords. - non_empty_points: indices of points which weren't filtered out; - used for expanding directions - num_points_per_ray: number of points per ray, needed to expand directions. - """ - # ########## transform direction ########## # - if self.xyz_ray_dir_in_camera_coords: - if camera is None: - raise ValueError("Camera must be given if xyz_ray_dir_in_camera_coords") - directions = directions @ camera.R - - # ########## get voxel grid output ########## # - # embeds_color.shape = [..., pts_per_ray, n_features] - embeds_color = self.voxel_grid_color(points) - - # ########## embed with the harmonic function ########## # - # Obtain the harmonic embedding of the voxel grid output. - harmonic_embedding_color = self.harmonic_embedder_xyz_color(embeds_color) - - # Normalize the ray_directions to unit l2 norm. - rays_directions_normed = torch.nn.functional.normalize(directions, dim=-1) - # Obtain the harmonic embedding of the normalized ray directions. - harmonic_embedding_dir = self.harmonic_embedder_dir_color( - rays_directions_normed - ) - - harmonic_embedding_dir = torch.repeat_interleave( - harmonic_embedding_dir, num_points_per_ray, dim=0 - ) - if non_empty_points is not None: - harmonic_embedding_dir = harmonic_embedding_dir[non_empty_points] - - # total color embedding is concatenation of the harmonic embedding of voxel grid - # output and harmonic embedding of the normalized direction - total_color_embedding = torch.cat( - (harmonic_embedding_color, harmonic_embedding_dir), dim=-1 - ) - - # ########## evaluate color with the decoding function ########## # - # rays_colors.shape = [..., pts_per_ray, 3] in [0-1] - return self.decoder_color(total_color_embedding) - - @staticmethod - def allows_multiple_passes() -> bool: - """ - Returns True as this implicit function allows - multiple passes. Overridden from ImplicitFunctionBase. - """ - return True - - def subscribe_to_epochs(self) -> Tuple[Tuple[int, ...], Callable[[int], bool]]: - """ - Method which expresses interest in subscribing to optimization epoch updates. - This implicit function subscribes to epochs to calculate the scaffold and to - crop voxel grids, so this method combines wanted epochs and wraps their callbacks. - - Returns: - list of epochs on which to call a callable and callable to be called on - particular epoch. The callable returns True if parameter change has - happened else False and it must be supplied with one argument, epoch. - """ - - def callback(epoch) -> bool: - change = False - if epoch in self.scaffold_calculating_epochs: - change = self._get_scaffold(epoch) - if epoch in self.volume_cropping_epochs: - change = self._crop(epoch) or change - return change - - # remove duplicates - call_epochs = list( - set(self.scaffold_calculating_epochs) | set(self.volume_cropping_epochs) - ) - return call_epochs, callback - - def _crop(self, epoch: int) -> bool: - """ - Finds the bounding box of an object represented in the scaffold and crops - density and color voxel grids to match that bounding box. If density of the - scaffold is 0 everywhere (there is no object in it) no change will - happen. - - Args: - epoch: ignored - Returns: - True (indicating that parameter change has happened) if there is - an object inside, else False. - """ - # find bounding box - points = self.voxel_grid_scaffold.get_grid_points(epoch=epoch) - assert self._scaffold_ready, "Scaffold has to be calculated before cropping." - occupancy = self.voxel_grid_scaffold(points)[..., 0] > 0 - non_zero_idxs = torch.nonzero(occupancy) - if len(non_zero_idxs) == 0: - return False - min_indices = tuple(torch.min(non_zero_idxs, dim=0)[0]) - max_indices = tuple(torch.max(non_zero_idxs, dim=0)[0]) - min_point, max_point = points[min_indices], points[max_indices] - - logger.info( - f"Cropping at epoch {epoch} to bounding box " - f"[{min_point.tolist()}, {max_point.tolist()}]." - ) - - # crop the voxel grids - self.voxel_grid_density.crop_self(min_point, max_point) - self.voxel_grid_color.crop_self(min_point, max_point) - return True - - @torch.no_grad() - def _get_scaffold(self, epoch: int) -> bool: - """ - Creates a low resolution grid which is used to filter points that are in empty - space. - - Args: - epoch: epoch on which it is called, ignored inside method - Returns: - Always False: Modifies `self.voxel_grid_scaffold` member. - """ - - planes = [] - points = self.voxel_grid_scaffold.get_grid_points(epoch=epoch) - - chunk_size = ( - self.scaffold_occupancy_chunk_size - if self.scaffold_occupancy_chunk_size > 0 - else points.shape[-1] - ) - for k in range(0, points.shape[-1], chunk_size): - points_in_planes = points[..., k : k + chunk_size] - planes.append(self._get_density(points_in_planes)[..., 0]) - - density_cube = torch.cat(planes, dim=-1) - density_cube = torch.nn.functional.max_pool3d( - density_cube[None, None], - kernel_size=self.scaffold_max_pool_kernel_size, - padding=self.scaffold_max_pool_kernel_size // 2, - stride=1, - ) - occupancy_cube = density_cube > self.scaffold_empty_space_threshold - self.voxel_grid_scaffold.params["voxel_grid"] = occupancy_cube.float() - self._scaffold_ready = True - - return False - - @classmethod - def decoder_density_tweak_args(cls, type_, args: DictConfig) -> None: - args.pop("input_dim", None) - - def create_decoder_density_impl(self, type_, args: DictConfig) -> None: - """ - Decoding functions come after harmonic embedding and voxel grid. In order to not - calculate the input dimension of the decoder in the config file this function - calculates the required input dimension and sets the input dimension of the - decoding function to this value. - """ - grid_args = self.voxel_grid_density_args - grid_output_dim = VoxelGridModule.get_output_dim(grid_args) - - embedder_args = self.harmonic_embedder_xyz_density_args - input_dim = HarmonicEmbedding.get_output_dim_static( - grid_output_dim, - embedder_args["n_harmonic_functions"], - embedder_args["append_input"], - ) - - cls = registry.get(DecoderFunctionBase, type_) - need_input_dim = any(field.name == "input_dim" for field in fields(cls)) - if need_input_dim: - self.decoder_density = cls(input_dim=input_dim, **args) - else: - self.decoder_density = cls(**args) - - @classmethod - def decoder_color_tweak_args(cls, type_, args: DictConfig) -> None: - args.pop("input_dim", None) - - def create_decoder_color_impl(self, type_, args: DictConfig) -> None: - """ - Decoding functions come after harmonic embedding and voxel grid. In order to not - calculate the input dimension of the decoder in the config file this function - calculates the required input dimension and sets the input dimension of the - decoding function to this value. - """ - grid_args = self.voxel_grid_color_args - grid_output_dim = VoxelGridModule.get_output_dim(grid_args) - - embedder_args = self.harmonic_embedder_xyz_color_args - input_dim0 = HarmonicEmbedding.get_output_dim_static( - grid_output_dim, - embedder_args["n_harmonic_functions"], - embedder_args["append_input"], - ) - - dir_dim = 3 - embedder_args = self.harmonic_embedder_dir_color_args - input_dim1 = HarmonicEmbedding.get_output_dim_static( - dir_dim, - embedder_args["n_harmonic_functions"], - embedder_args["append_input"], - ) - - input_dim = input_dim0 + input_dim1 - - cls = registry.get(DecoderFunctionBase, type_) - need_input_dim = any(field.name == "input_dim" for field in fields(cls)) - if need_input_dim: - self.decoder_color = cls(input_dim=input_dim, **args) - else: - self.decoder_color = cls(**args) - - def _create_voxel_grid_scaffold(self) -> VoxelGridModule: - """ - Creates object to become self.voxel_grid_scaffold: - - makes `self.voxel_grid_scaffold` have same world to local mapping as - `self.voxel_grid_density` - """ - return VoxelGridModule( - extents=self.voxel_grid_density_args["extents"], - translation=self.voxel_grid_density_args["translation"], - voxel_grid_class_type="FullResolutionVoxelGrid", - hold_voxel_grid_as_parameters=False, - voxel_grid_FullResolutionVoxelGrid_args={ - "resolution_changes": {0: self.scaffold_resolution}, - "padding": "zeros", - "align_corners": True, - "mode": "trilinear", - }, - ) diff --git a/pytorch3d/pytorch3d/implicitron/models/metrics.py b/pytorch3d/pytorch3d/implicitron/models/metrics.py deleted file mode 100644 index edd4b9408d2e6d70c0ec017fd2077b5093248603..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/metrics.py +++ /dev/null @@ -1,400 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import warnings -from typing import Any, Dict, Optional - -import torch -from pytorch3d.implicitron.models.renderer.ray_sampler import ImplicitronRayBundle -from pytorch3d.implicitron.tools import metric_utils as utils -from pytorch3d.implicitron.tools.config import registry, ReplaceableBase -from pytorch3d.ops import padded_to_packed -from pytorch3d.renderer import utils as rend_utils - -from .renderer.base import RendererOutput - - -class RegularizationMetricsBase(ReplaceableBase, torch.nn.Module): - """ - Replaceable abstract base for regularization metrics. - `forward()` method produces regularization metrics and (unlike ViewMetrics) can - depend on the model's parameters. - """ - - def forward( - self, model: Any, keys_prefix: str = "loss_", **kwargs - ) -> Dict[str, Any]: - """ - Calculates various regularization terms useful for supervising differentiable - rendering pipelines. - - Args: - model: A model instance. Useful, for example, to implement - weights-based regularization. - keys_prefix: A common prefix for all keys in the output dictionary - containing all regularization metrics. - - Returns: - A dictionary with the resulting regularization metrics. The items - will have form `{metric_name_i: metric_value_i}` keyed by the - names of the output metrics `metric_name_i` with their corresponding - values `metric_value_i` represented as 0-dimensional float tensors. - """ - raise NotImplementedError - - -class ViewMetricsBase(ReplaceableBase, torch.nn.Module): - """ - Replaceable abstract base for model metrics. - `forward()` method produces losses and other metrics. - """ - - def forward( - self, - raymarched: RendererOutput, - ray_bundle: ImplicitronRayBundle, - image_rgb: Optional[torch.Tensor] = None, - depth_map: Optional[torch.Tensor] = None, - fg_probability: Optional[torch.Tensor] = None, - mask_crop: Optional[torch.Tensor] = None, - keys_prefix: str = "loss_", - **kwargs, - ) -> Dict[str, Any]: - """ - Calculates various metrics and loss functions useful for supervising - differentiable rendering pipelines. Any additional parameters can be passed - in the `raymarched.aux` dictionary. - - Args: - results: A dictionary with the resulting view metrics. The items - will have form `{metric_name_i: metric_value_i}` keyed by the - names of the output metrics `metric_name_i` with their corresponding - values `metric_value_i` represented as 0-dimensional float tensors. - raymarched: Output of the renderer. - ray_bundle: ImplicitronRayBundle object which was used to produce the raymarched - object - image_rgb: A tensor of shape `(B, H, W, 3)` containing ground truth rgb - values. - depth_map: A tensor of shape `(B, Hd, Wd, 1)` containing ground truth depth - values. - fg_probability: A tensor of shape `(B, Hm, Wm, 1)` containing ground truth - foreground masks. - keys_prefix: A common prefix for all keys in the output dictionary - containing all view metrics. - - Returns: - A dictionary with the resulting view metrics. The items - will have form `{metric_name_i: metric_value_i}` keyed by the - names of the output metrics `metric_name_i` with their corresponding - values `metric_value_i` represented as 0-dimensional float tensors. - """ - raise NotImplementedError() - - -@registry.register -class RegularizationMetrics(RegularizationMetricsBase): - def forward( - self, model: Any, keys_prefix: str = "loss_", **kwargs - ) -> Dict[str, Any]: - """ - Calculates the AD penalty, or returns an empty dict if the model's autoencoder - is inactive. - - Args: - model: A model instance. - keys_prefix: A common prefix for all keys in the output dictionary - containing all regularization metrics. - - Returns: - A dictionary with the resulting regularization metrics. The items - will have form `{metric_name_i: metric_value_i}` keyed by the - names of the output metrics `metric_name_i` with their corresponding - values `metric_value_i` represented as 0-dimensional float tensors. - - The calculated metric is: - autoencoder_norm: Autoencoder weight norm regularization term. - """ - metrics = {} - if getattr(model, "sequence_autodecoder", None) is not None: - ad_penalty = model.sequence_autodecoder.calculate_squared_encoding_norm() - if ad_penalty is not None: - metrics["autodecoder_norm"] = ad_penalty - - if keys_prefix is not None: - metrics = {(keys_prefix + k): v for k, v in metrics.items()} - - return metrics - - -@registry.register -class ViewMetrics(ViewMetricsBase): - def forward( - self, - raymarched: RendererOutput, - ray_bundle: ImplicitronRayBundle, - image_rgb: Optional[torch.Tensor] = None, - depth_map: Optional[torch.Tensor] = None, - fg_probability: Optional[torch.Tensor] = None, - mask_crop: Optional[torch.Tensor] = None, - keys_prefix: str = "loss_", - **kwargs, - ) -> Dict[str, Any]: - """ - Calculates various differentiable metrics useful for supervising - differentiable rendering pipelines. - - Args: - results: A dict to store the results in. - raymarched.features: Predicted rgb or feature values. - raymarched.depths: A tensor of shape `(B, ..., 1)` containing - predicted depth values. - raymarched.masks: A tensor of shape `(B, ..., 1)` containing - predicted foreground masks. - raymarched.aux["grad_theta"]: A tensor of shape `(B, ..., 3)` containing an - evaluation of a gradient of a signed distance function w.r.t. - input 3D coordinates used to compute the eikonal loss. - raymarched.aux["density_grid"]: A tensor of shape `(B, Hg, Wg, Dg, 1)` - containing a `Hg x Wg x Dg` voxel grid of density values. - ray_bundle: ImplicitronRayBundle object which was used to produce the raymarched - object - image_rgb: A tensor of shape `(B, H, W, 3)` containing ground truth rgb - values. - depth_map: A tensor of shape `(B, Hd, Wd, 1)` containing ground truth depth - values. - fg_probability: A tensor of shape `(B, Hm, Wm, 1)` containing ground truth - foreground masks. - keys_prefix: A common prefix for all keys in the output dictionary - containing all view metrics. - - Returns: - A dictionary `{metric_name_i: metric_value_i}` keyed by the - names of the output metrics `metric_name_i` with their corresponding - values `metric_value_i` represented as 0-dimensional float tensors. - - The calculated metrics are: - rgb_huber: A robust huber loss between `image_pred` and `image`. - rgb_mse: Mean squared error between `image_pred` and `image`. - rgb_psnr: Peak signal-to-noise ratio between `image_pred` and `image`. - rgb_psnr_fg: Peak signal-to-noise ratio between the foreground - region of `image_pred` and `image` as defined by `mask`. - rgb_mse_fg: Mean squared error between the foreground - region of `image_pred` and `image` as defined by `mask`. - mask_neg_iou: (1 - intersection-over-union) between `mask_pred` - and `mask`. - mask_bce: Binary cross entropy between `mask_pred` and `mask`. - mask_beta_prior: A loss enforcing strictly binary values - of `mask_pred`: `log(mask_pred) + log(1-mask_pred)` - depth_abs: Mean per-pixel L1 distance between - `depth_pred` and `depth`. - depth_abs_fg: Mean per-pixel L1 distance between the foreground - region of `depth_pred` and `depth` as defined by `mask`. - eikonal: Eikonal regularizer `(||grad_theta|| - 1)**2`. - density_tv: The Total Variation regularizer of density - values in `density_grid` (sum of L1 distances of values - of all 4-neighbouring cells). - depth_neg_penalty: `min(depth_pred, 0)**2` penalizing negative - predicted depth values. - """ - metrics = self._calculate_stage( - raymarched, - ray_bundle, - image_rgb, - depth_map, - fg_probability, - mask_crop, - keys_prefix, - ) - - if raymarched.prev_stage: - metrics.update( - self( - raymarched.prev_stage, - ray_bundle, - image_rgb, - depth_map, - fg_probability, - mask_crop, - keys_prefix=(keys_prefix + "prev_stage_"), - ) - ) - - return metrics - - def _calculate_stage( - self, - raymarched: RendererOutput, - ray_bundle: ImplicitronRayBundle, - image_rgb: Optional[torch.Tensor] = None, - depth_map: Optional[torch.Tensor] = None, - fg_probability: Optional[torch.Tensor] = None, - mask_crop: Optional[torch.Tensor] = None, - keys_prefix: str = "loss_", - **kwargs, - ) -> Dict[str, Any]: - """ - Calculate metrics for the current stage. - """ - # TODO: extract functions - - # reshape from B x ... x DIM to B x DIM x -1 x 1 - image_rgb_pred, fg_probability_pred, depth_map_pred = [ - _reshape_nongrid_var(x) - for x in [raymarched.features, raymarched.masks, raymarched.depths] - ] - xys = ray_bundle.xys - - # If ray_bundle is packed than we can sample images in padded state to lower - # memory requirements. Instead of having one image for every element in - # ray_bundle we can than have one image per unique sampled camera. - if ray_bundle.is_packed(): - xys, first_idxs, num_inputs = ray_bundle.get_padded_xys() - - # reshape the sampling grid as well - # TODO: we can get rid of the singular dimension here and in _reshape_nongrid_var - # now that we use rend_utils.ndc_grid_sample - xys = xys.reshape(xys.shape[0], -1, 1, 2) - - # closure with the given xys - def sample_full(tensor, mode): - if tensor is None: - return tensor - return rend_utils.ndc_grid_sample(tensor, xys, mode=mode) - - def sample_packed(tensor, mode): - if tensor is None: - return tensor - - # select images that corespond to sampled cameras if raybundle is packed - tensor = tensor[ray_bundle.camera_ids] - if ray_bundle.is_packed(): - # select images that corespond to sampled cameras if raybundle is packed - tensor = tensor[ray_bundle.camera_ids] - result = rend_utils.ndc_grid_sample(tensor, xys, mode=mode) - return padded_to_packed(result, first_idxs, num_inputs, max_size_dim=2)[ - :, :, None - ] # the result is [n_rays_total_training, 3, 1, 1] - - sample = sample_packed if ray_bundle.is_packed() else sample_full - - # eval all results in this size - image_rgb = sample(image_rgb, mode="bilinear") - depth_map = sample(depth_map, mode="nearest") - fg_probability = sample(fg_probability, mode="nearest") - mask_crop = sample(mask_crop, mode="nearest") - if mask_crop is None and image_rgb_pred is not None: - mask_crop = torch.ones_like(image_rgb_pred[:, :1]) - if mask_crop is None and depth_map_pred is not None: - mask_crop = torch.ones_like(depth_map_pred[:, :1]) - - metrics = {} - if image_rgb is not None and image_rgb_pred is not None: - metrics.update( - _rgb_metrics( - image_rgb, - image_rgb_pred, - fg_probability, - fg_probability_pred, - mask_crop, - ) - ) - - if fg_probability_pred is not None: - metrics["mask_beta_prior"] = utils.beta_prior(fg_probability_pred) - if fg_probability is not None and fg_probability_pred is not None: - metrics["mask_neg_iou"] = utils.neg_iou_loss( - fg_probability_pred, fg_probability, mask=mask_crop - ) - metrics["mask_bce"] = utils.calc_bce( - fg_probability_pred, fg_probability, mask=mask_crop - ) - - if depth_map is not None and depth_map_pred is not None: - assert mask_crop is not None - _, abs_ = utils.eval_depth( - depth_map_pred, depth_map, get_best_scale=True, mask=mask_crop, crop=0 - ) - metrics["depth_abs"] = abs_.mean() - - if fg_probability is not None: - mask = fg_probability * mask_crop - _, abs_ = utils.eval_depth( - depth_map_pred, depth_map, get_best_scale=True, mask=mask, crop=0 - ) - metrics["depth_abs_fg"] = abs_.mean() - - # regularizers - grad_theta = raymarched.aux.get("grad_theta") - if grad_theta is not None: - metrics["eikonal"] = _get_eikonal_loss(grad_theta) - - density_grid = raymarched.aux.get("density_grid") - if density_grid is not None: - metrics["density_tv"] = _get_grid_tv_loss(density_grid) - - if depth_map_pred is not None: - metrics["depth_neg_penalty"] = _get_depth_neg_penalty_loss(depth_map_pred) - - if keys_prefix is not None: - metrics = {(keys_prefix + k): v for k, v in metrics.items()} - - return metrics - - -def _rgb_metrics(images, images_pred, masks, masks_pred, masks_crop): - assert masks_crop is not None - if images.shape[1] != images_pred.shape[1]: - raise ValueError( - f"Network output's RGB images had {images_pred.shape[1]} " - f"channels. {images.shape[1]} expected." - ) - rgb_squared = ((images_pred - images) ** 2).mean(dim=1, keepdim=True) - rgb_loss = utils.huber(rgb_squared, scaling=0.03) - crop_mass = masks_crop.sum().clamp(1.0) - results = { - "rgb_huber": (rgb_loss * masks_crop).sum() / crop_mass, - "rgb_mse": (rgb_squared * masks_crop).sum() / crop_mass, - "rgb_psnr": utils.calc_psnr(images_pred, images, mask=masks_crop), - } - if masks is not None: - masks = masks_crop * masks - results["rgb_psnr_fg"] = utils.calc_psnr(images_pred, images, mask=masks) - results["rgb_mse_fg"] = (rgb_squared * masks).sum() / masks.sum().clamp(1.0) - return results - - -def _get_eikonal_loss(grad_theta): - return ((grad_theta.norm(2, dim=1) - 1) ** 2).mean() - - -def _get_grid_tv_loss(grid, log_domain: bool = True, eps: float = 1e-5): - if log_domain: - if (grid <= -eps).any(): - warnings.warn("Grid has negative values; this will produce NaN loss") - grid = torch.log(grid + eps) - - # this is an isotropic version, note that it ignores last rows/cols - return torch.mean( - utils.safe_sqrt( - (grid[..., :-1, :-1, 1:] - grid[..., :-1, :-1, :-1]) ** 2 - + (grid[..., :-1, 1:, :-1] - grid[..., :-1, :-1, :-1]) ** 2 - + (grid[..., 1:, :-1, :-1] - grid[..., :-1, :-1, :-1]) ** 2, - eps=1e-5, - ) - ) - - -def _get_depth_neg_penalty_loss(depth): - neg_penalty = depth.clamp(min=None, max=0.0) ** 2 - return torch.mean(neg_penalty) - - -def _reshape_nongrid_var(x): - if x is None: - return None - - ba, *_, dim = x.shape - return x.reshape(ba, -1, 1, dim).permute(0, 3, 1, 2).contiguous() diff --git a/pytorch3d/pytorch3d/implicitron/models/model_dbir.py b/pytorch3d/pytorch3d/implicitron/models/model_dbir.py deleted file mode 100644 index 4f470a6e00da3e0b146880d8a0cfb18b03ec37d7..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/model_dbir.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -from typing import Any, Dict, List, Optional, Tuple - -import torch -from pytorch3d.implicitron.dataset.utils import is_known_frame -from pytorch3d.implicitron.tools.config import registry -from pytorch3d.implicitron.tools.point_cloud_utils import ( - get_rgbd_point_cloud, - render_point_cloud_pytorch3d, -) -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.structures import Pointclouds - -from .base_model import ImplicitronModelBase, ImplicitronRender -from .renderer.base import EvaluationMode - - -@registry.register -class ModelDBIR(ImplicitronModelBase): - """ - A simple depth-based image rendering model. - - Args: - render_image_width: The width of the rendered rectangular images. - render_image_height: The height of the rendered rectangular images. - bg_color: The color of the background. - max_points: Maximum number of points in the point cloud - formed by unprojecting all source view depths. - If more points are present, they are randomly subsampled - to this number of points without replacement. - """ - - render_image_width: int = 256 - render_image_height: int = 256 - bg_color: Tuple[float, float, float] = (0.0, 0.0, 0.0) - max_points: int = -1 - - # pyre-fixme[14]: `forward` overrides method defined in `ImplicitronModelBase` - # inconsistently. - def forward( - self, - *, # force keyword-only arguments - image_rgb: Optional[torch.Tensor], - camera: CamerasBase, - fg_probability: Optional[torch.Tensor], - mask_crop: Optional[torch.Tensor], - depth_map: Optional[torch.Tensor], - sequence_name: Optional[List[str]], - evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION, - frame_type: List[str], - **kwargs, - ) -> Dict[str, Any]: # TODO: return a namedtuple or dataclass - """ - Given a set of input source cameras images and depth maps, unprojects - all RGBD maps to a colored point cloud and renders into the target views. - - Args: - camera: A batch of `N` PyTorch3D cameras. - image_rgb: A batch of `N` images of shape `(N, 3, H, W)`. - depth_map: A batch of `N` depth maps of shape `(N, 1, H, W)`. - fg_probability: A batch of `N` foreground probability maps - of shape `(N, 1, H, W)`. - frame_type: A list of `N` strings containing frame type indicators - which specify target and source views. - - Returns: - preds: A dict with the following fields: - implicitron_render: The rendered colors, depth and mask - of the target views. - point_cloud: The point cloud of the scene. It's renders are - stored in `implicitron_render`. - """ - - if image_rgb is None: - raise ValueError("ModelDBIR needs image input") - - if fg_probability is None: - raise ValueError("ModelDBIR needs foreground mask input") - - if depth_map is None: - raise ValueError("ModelDBIR needs depth map input") - - is_known = is_known_frame(frame_type) - is_known_idx = torch.where(is_known)[0] - - mask_fg = (fg_probability > 0.5).type_as(image_rgb) - - point_cloud = get_rgbd_point_cloud( - # pyre-fixme[6]: For 1st param expected `Union[List[int], int, - # LongTensor]` but got `Tensor`. - camera[is_known_idx], - image_rgb[is_known_idx], - depth_map[is_known_idx], - mask_fg[is_known_idx], - ) - - pcl_size = point_cloud.num_points_per_cloud().item() - if (self.max_points > 0) and (pcl_size > self.max_points): - # pyre-fixme[6]: For 1st param expected `int` but got `Union[bool, - # float, int]`. - prm = torch.randperm(pcl_size)[: self.max_points] - point_cloud = Pointclouds( - point_cloud.points_padded()[:, prm, :], - # pyre-fixme[16]: Optional type has no attribute `__getitem__`. - features=point_cloud.features_padded()[:, prm, :], - ) - - is_target_idx = torch.where(~is_known)[0] - - depth_render, image_render, mask_render = [], [], [] - - # render into target frames in a for loop to save memory - for tgt_idx in is_target_idx: - _image_render, _mask_render, _depth_render = render_point_cloud_pytorch3d( - camera[int(tgt_idx)], - point_cloud, - render_size=(self.render_image_height, self.render_image_width), - point_radius=1e-2, - topk=10, - bg_color=self.bg_color, - ) - _image_render = _image_render.clamp(0.0, 1.0) - # the mask is the set of pixels with opacity bigger than eps - _mask_render = (_mask_render > 1e-4).float() - - depth_render.append(_depth_render) - image_render.append(_image_render) - mask_render.append(_mask_render) - - implicitron_render = ImplicitronRender( - **{ - k: torch.cat(v, dim=0) - for k, v in zip( - ["depth_render", "image_render", "mask_render"], - [depth_render, image_render, mask_render], - ) - } - ) - - preds = { - "implicitron_render": implicitron_render, - "point_cloud": point_cloud, - } - - return preds diff --git a/pytorch3d/pytorch3d/implicitron/models/overfit_model.py b/pytorch3d/pytorch3d/implicitron/models/overfit_model.py deleted file mode 100644 index 40ee5a1ba8008e5f02d9037312c47452eb5d2970..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/overfit_model.py +++ /dev/null @@ -1,664 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -# Note: The #noqa comments below are for unused imports of pluggable implementations -# which are part of implicitron. They ensure that the registry is prepopulated. - -import functools -import logging -from dataclasses import field -from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING, Union - -import torch -from omegaconf import DictConfig - -from pytorch3d.implicitron.models.base_model import ( - ImplicitronModelBase, - ImplicitronRender, -) -from pytorch3d.implicitron.models.global_encoder.global_encoder import GlobalEncoderBase -from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase -from pytorch3d.implicitron.models.metrics import ( - RegularizationMetricsBase, - ViewMetricsBase, -) - -from pytorch3d.implicitron.models.renderer.base import ( - BaseRenderer, - EvaluationMode, - ImplicitronRayBundle, - RendererOutput, - RenderSamplingMode, -) -from pytorch3d.implicitron.models.renderer.ray_sampler import RaySamplerBase -from pytorch3d.implicitron.models.utils import ( - apply_chunked, - chunk_generator, - log_loss_weights, - preprocess_input, - weighted_sum_losses, -) -from pytorch3d.implicitron.tools import vis_utils -from pytorch3d.implicitron.tools.config import ( - expand_args_fields, - registry, - run_auto_creation, -) - -from pytorch3d.implicitron.tools.rasterize_mc import rasterize_sparse_ray_bundle -from pytorch3d.renderer import utils as rend_utils -from pytorch3d.renderer.cameras import CamerasBase - - -if TYPE_CHECKING: - from visdom import Visdom -logger = logging.getLogger(__name__) - -IMPLICIT_FUNCTION_ARGS_TO_REMOVE: List[str] = [ - "feature_vector_size", - "encoding_dim", - "latent_dim", - "color_dim", -] - - -@registry.register -class OverfitModel(ImplicitronModelBase): # pyre-ignore: 13 - """ - OverfitModel is a wrapper for the neural implicit - rendering and reconstruction pipeline which consists - of the following sequence of 4 steps: - - - (1) Ray Sampling - ------------------ - Rays are sampled from an image grid based on the target view(s). - β”‚ - β–Ό - (2) Implicit Function Evaluation - ------------------ - Evaluate the implicit function(s) at the sampled ray points - (also optionally pass in a global encoding from global_encoder). - β”‚ - β–Ό - (3) Rendering - ------------------ - Render the image into the target cameras by raymarching along - the sampled rays and aggregating the colors and densities - output by the implicit function in (2). - β”‚ - β–Ό - (4) Loss Computation - ------------------ - Compute losses based on the predicted target image(s). - - - The `forward` function of OverfitModel executes - this sequence of steps. Currently, steps 1, 2, 3 - can be customized by intializing a subclass of the appropriate - base class and adding the newly created module to the registry. - Please see https://github.com/facebookresearch/pytorch3d/blob/main/projects/implicitron_trainer/README.md#custom-plugins - for more details on how to create and register a custom component. - - In the config .yaml files for experiments, the parameters below are - contained in the - `model_factory_ImplicitronModelFactory_args.model_OverfitModel_args` - node. As OverfitModel derives from ReplaceableBase, the input arguments are - parsed by the run_auto_creation function to initialize the - necessary member modules. Please see implicitron_trainer/README.md - for more details on this process. - - Args: - mask_images: Whether or not to mask the RGB image background given the - foreground mask (the `fg_probability` argument of `GenericModel.forward`) - mask_depths: Whether or not to mask the depth image background given the - foreground mask (the `fg_probability` argument of `GenericModel.forward`) - render_image_width: Width of the output image to render - render_image_height: Height of the output image to render - mask_threshold: If greater than 0.0, the foreground mask is - thresholded by this value before being applied to the RGB/Depth images - output_rasterized_mc: If True, visualize the Monte-Carlo pixel renders by - splatting onto an image grid. Default: False. - bg_color: RGB values for setting the background color of input image - if mask_images=True. Defaults to (0.0, 0.0, 0.0). Each renderer has its own - way to determine the background color of its output, unrelated to this. - chunk_size_grid: The total number of points which can be rendered - per chunk. This is used to compute the number of rays used - per chunk when the chunked version of the renderer is used (in order - to fit rendering on all rays in memory) - render_features_dimensions: The number of output features to render. - Defaults to 3, corresponding to RGB images. - sampling_mode_training: The sampling method to use during training. Must be - a value from the RenderSamplingMode Enum. - sampling_mode_evaluation: Same as above but for evaluation. - global_encoder_class_type: The name of the class to use for global_encoder, - which must be available in the registry. Or `None` to disable global encoder. - global_encoder: An instance of `GlobalEncoder`. This is used to generate an encoding - of the image (referred to as the global_code) that can be used to model aspects of - the scene such as multiple objects or morphing objects. It is up to the implicit - function definition how to use it, but the most typical way is to broadcast and - concatenate to the other inputs for the implicit function. - raysampler_class_type: The name of the raysampler class which is available - in the global registry. - raysampler: An instance of RaySampler which is used to emit - rays from the target view(s). - renderer_class_type: The name of the renderer class which is available in the global - registry. - renderer: A renderer class which inherits from BaseRenderer. This is used to - generate the images from the target view(s). - share_implicit_function_across_passes: If set to True - coarse_implicit_function is automatically set as implicit_function - (coarse_implicit_function=implicit_funciton). The - implicit_functions are then run sequentially during the rendering. - implicit_function_class_type: The type of implicit function to use which - is available in the global registry. - implicit_function: An instance of ImplicitFunctionBase. - coarse_implicit_function_class_type: The type of implicit function to use which - is available in the global registry. - coarse_implicit_function: An instance of ImplicitFunctionBase. - If set and `share_implicit_function_across_passes` is set to False, - coarse_implicit_function is instantiated on itself. It - is then used as the second pass during the rendering. - If set to None, we only do a single pass with implicit_function. - view_metrics: An instance of ViewMetricsBase used to compute loss terms which - are independent of the model's parameters. - view_metrics_class_type: The type of view metrics to use, must be available in - the global registry. - regularization_metrics: An instance of RegularizationMetricsBase used to compute - regularization terms which can depend on the model's parameters. - regularization_metrics_class_type: The type of regularization metrics to use, - must be available in the global registry. - loss_weights: A dictionary with a {loss_name: weight} mapping; see documentation - for `ViewMetrics` class for available loss functions. - log_vars: A list of variable names which should be logged. - The names should correspond to a subset of the keys of the - dict `preds` output by the `forward` function. - """ # noqa: B950 - - mask_images: bool = True - mask_depths: bool = True - render_image_width: int = 400 - render_image_height: int = 400 - mask_threshold: float = 0.5 - output_rasterized_mc: bool = False - bg_color: Tuple[float, float, float] = (0.0, 0.0, 0.0) - chunk_size_grid: int = 4096 - render_features_dimensions: int = 3 - tqdm_trigger_threshold: int = 16 - - n_train_target_views: int = 1 - sampling_mode_training: str = "mask_sample" - sampling_mode_evaluation: str = "full_grid" - - # ---- global encoder settings - global_encoder_class_type: Optional[str] = None - global_encoder: Optional[GlobalEncoderBase] - - # ---- raysampler - raysampler_class_type: str = "AdaptiveRaySampler" - raysampler: RaySamplerBase - - # ---- renderer configs - renderer_class_type: str = "MultiPassEmissionAbsorptionRenderer" - renderer: BaseRenderer - - # ---- implicit function settings - share_implicit_function_across_passes: bool = False - implicit_function_class_type: str = "NeuralRadianceFieldImplicitFunction" - implicit_function: ImplicitFunctionBase - coarse_implicit_function_class_type: Optional[str] = None - coarse_implicit_function: Optional[ImplicitFunctionBase] - - # ----- metrics - view_metrics: ViewMetricsBase - view_metrics_class_type: str = "ViewMetrics" - - regularization_metrics: RegularizationMetricsBase - regularization_metrics_class_type: str = "RegularizationMetrics" - - # ---- loss weights - loss_weights: Dict[str, float] = field( - default_factory=lambda: { - "loss_rgb_mse": 1.0, - "loss_prev_stage_rgb_mse": 1.0, - "loss_mask_bce": 0.0, - "loss_prev_stage_mask_bce": 0.0, - } - ) - - # ---- variables to be logged (logger automatically ignores if not computed) - log_vars: List[str] = field( - default_factory=lambda: [ - "loss_rgb_psnr_fg", - "loss_rgb_psnr", - "loss_rgb_mse", - "loss_rgb_huber", - "loss_depth_abs", - "loss_depth_abs_fg", - "loss_mask_neg_iou", - "loss_mask_bce", - "loss_mask_beta_prior", - "loss_eikonal", - "loss_density_tv", - "loss_depth_neg_penalty", - "loss_autodecoder_norm", - # metrics that are only logged in 2+stage renderes - "loss_prev_stage_rgb_mse", - "loss_prev_stage_rgb_psnr_fg", - "loss_prev_stage_rgb_psnr", - "loss_prev_stage_mask_bce", - # basic metrics - "objective", - "epoch", - "sec/it", - ] - ) - - @classmethod - def pre_expand(cls) -> None: - # use try/finally to bypass cinder's lazy imports - try: - from pytorch3d.implicitron.models.implicit_function.idr_feature_field import ( # noqa: F401, B950 - IdrFeatureField, - ) - from pytorch3d.implicitron.models.implicit_function.neural_radiance_field import ( # noqa: F401, B950 - NeuralRadianceFieldImplicitFunction, - ) - from pytorch3d.implicitron.models.implicit_function.scene_representation_networks import ( # noqa: F401, B950 - SRNImplicitFunction, - ) - from pytorch3d.implicitron.models.renderer.lstm_renderer import ( # noqa: F401 - LSTMRenderer, - ) - from pytorch3d.implicitron.models.renderer.multipass_ea import ( # noqa: F401 - MultiPassEmissionAbsorptionRenderer, - ) - from pytorch3d.implicitron.models.renderer.sdf_renderer import ( # noqa: F401 - SignedDistanceFunctionRenderer, - ) - finally: - pass - - def __post_init__(self): - # The attribute will be filled by run_auto_creation - run_auto_creation(self) - log_loss_weights(self.loss_weights, logger) - # We need to set it here since run_auto_creation - # will create coarse_implicit_function before implicit_function - if self.share_implicit_function_across_passes: - self.coarse_implicit_function = self.implicit_function - - def forward( - self, - *, # force keyword-only arguments - image_rgb: Optional[torch.Tensor], - camera: CamerasBase, - fg_probability: Optional[torch.Tensor] = None, - mask_crop: Optional[torch.Tensor] = None, - depth_map: Optional[torch.Tensor] = None, - sequence_name: Optional[List[str]] = None, - frame_timestamp: Optional[torch.Tensor] = None, - evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION, - **kwargs, - ) -> Dict[str, Any]: - """ - Args: - image_rgb: A tensor of shape `(B, 3, H, W)` containing a batch of rgb images; - the first `min(B, n_train_target_views)` images are considered targets and - are used to supervise the renders; the rest corresponding to the source - viewpoints from which features will be extracted. - camera: An instance of CamerasBase containing a batch of `B` cameras corresponding - to the viewpoints of target images, from which the rays will be sampled, - and source images, which will be used for intersecting with target rays. - fg_probability: A tensor of shape `(B, 1, H, W)` containing a batch of - foreground masks. - mask_crop: A binary tensor of shape `(B, 1, H, W)` deonting valid - regions in the input images (i.e. regions that do not correspond - to, e.g., zero-padding). When the `RaySampler`'s sampling mode is set to - "mask_sample", rays will be sampled in the non zero regions. - depth_map: A tensor of shape `(B, 1, H, W)` containing a batch of depth maps. - sequence_name: A list of `B` strings corresponding to the sequence names - from which images `image_rgb` were extracted. They are used to match - target frames with relevant source frames. - frame_timestamp: Optionally a tensor of shape `(B,)` containing a batch - of frame timestamps. - evaluation_mode: one of EvaluationMode.TRAINING or - EvaluationMode.EVALUATION which determines the settings used for - rendering. - - Returns: - preds: A dictionary containing all outputs of the forward pass including the - rendered images, depths, masks, losses and other metrics. - """ - image_rgb, fg_probability, depth_map = preprocess_input( - image_rgb, - fg_probability, - depth_map, - self.mask_images, - self.mask_depths, - self.mask_threshold, - self.bg_color, - ) - - # Determine the used ray sampling mode. - sampling_mode = RenderSamplingMode( - self.sampling_mode_training - if evaluation_mode == EvaluationMode.TRAINING - else self.sampling_mode_evaluation - ) - - # (1) Sample rendering rays with the ray sampler. - # pyre-ignore[29] - ray_bundle: ImplicitronRayBundle = self.raysampler( - camera, - evaluation_mode, - mask=mask_crop - if mask_crop is not None and sampling_mode == RenderSamplingMode.MASK_SAMPLE - else None, - ) - - inputs_to_be_chunked = {} - if fg_probability is not None and self.renderer.requires_object_mask(): - sampled_fb_prob = rend_utils.ndc_grid_sample( - fg_probability, ray_bundle.xys, mode="nearest" - ) - inputs_to_be_chunked["object_mask"] = sampled_fb_prob > 0.5 - - # (2)-(3) Implicit function evaluation and Rendering - implicit_functions: List[Union[Callable, ImplicitFunctionBase]] = [ - self.implicit_function - ] - if self.coarse_implicit_function is not None: - implicit_functions = [self.coarse_implicit_function, self.implicit_function] - - if self.global_encoder is not None: - global_code = self.global_encoder( # pyre-fixme[29] - sequence_name=sequence_name, - frame_timestamp=frame_timestamp, - ) - implicit_functions = [ - functools.partial(implicit_function, global_code=global_code) - if isinstance(implicit_function, Callable) - else functools.partial( - implicit_function.forward, global_code=global_code - ) - for implicit_function in implicit_functions - ] - rendered = self._render( - ray_bundle=ray_bundle, - sampling_mode=sampling_mode, - evaluation_mode=evaluation_mode, - implicit_functions=implicit_functions, - inputs_to_be_chunked=inputs_to_be_chunked, - ) - - # A dict to store losses as well as rendering results. - preds: Dict[str, Any] = self.view_metrics( - results={}, - raymarched=rendered, - ray_bundle=ray_bundle, - image_rgb=image_rgb, - depth_map=depth_map, - fg_probability=fg_probability, - mask_crop=mask_crop, - ) - - preds.update( - self.regularization_metrics( - results=preds, - model=self, - ) - ) - - if sampling_mode == RenderSamplingMode.MASK_SAMPLE: - if self.output_rasterized_mc: - # Visualize the monte-carlo pixel renders by splatting onto - # an image grid. - ( - preds["images_render"], - preds["depths_render"], - preds["masks_render"], - ) = rasterize_sparse_ray_bundle( - ray_bundle, - rendered.features, - (self.render_image_height, self.render_image_width), - rendered.depths, - masks=rendered.masks, - ) - elif sampling_mode == RenderSamplingMode.FULL_GRID: - preds["images_render"] = rendered.features.permute(0, 3, 1, 2) - preds["depths_render"] = rendered.depths.permute(0, 3, 1, 2) - preds["masks_render"] = rendered.masks.permute(0, 3, 1, 2) - - preds["implicitron_render"] = ImplicitronRender( - image_render=preds["images_render"], - depth_render=preds["depths_render"], - mask_render=preds["masks_render"], - ) - else: - raise AssertionError("Unreachable state") - - # (4) Compute losses - # finally get the optimization objective using self.loss_weights - objective = self._get_objective(preds) - if objective is not None: - preds["objective"] = objective - - return preds - - def _get_objective(self, preds: Dict[str, torch.Tensor]) -> Optional[torch.Tensor]: - """ - A helper function to compute the overall loss as the dot product - of individual loss functions with the corresponding weights. - """ - return weighted_sum_losses(preds, self.loss_weights) - - def visualize( - self, - viz: Optional["Visdom"], - visdom_env_imgs: str, - preds: Dict[str, Any], - prefix: str, - ) -> None: - """ - Helper function to visualize the predictions generated - in the forward pass. - - Args: - viz: Visdom connection object - visdom_env_imgs: name of visdom environment for the images. - preds: predictions dict like returned by forward() - prefix: prepended to the names of images - """ - if viz is None or not viz.check_connection(): - logger.info("no visdom server! -> skipping batch vis") - return - - idx_image = 0 - title = f"{prefix}_im{idx_image}" - - vis_utils.visualize_basics(viz, preds, visdom_env_imgs, title=title) - - def _render( - self, - *, - ray_bundle: ImplicitronRayBundle, - inputs_to_be_chunked: Dict[str, torch.Tensor], - sampling_mode: RenderSamplingMode, - **kwargs, - ) -> RendererOutput: - """ - Args: - ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the - sampled rendering rays. - inputs_to_be_chunked: A collection of tensor of shape `(B, _, H, W)`. E.g. - SignedDistanceFunctionRenderer requires "object_mask", shape - (B, 1, H, W), the silhouette of the object in the image. When - chunking, they are passed to the renderer as shape - `(B, _, chunksize)`. - sampling_mode: The sampling method to use. Must be a value from the - RenderSamplingMode Enum. - - Returns: - An instance of RendererOutput - """ - if sampling_mode == RenderSamplingMode.FULL_GRID and self.chunk_size_grid > 0: - return apply_chunked( - self.renderer, - chunk_generator( - self.chunk_size_grid, - ray_bundle, - inputs_to_be_chunked, - self.tqdm_trigger_threshold, - **kwargs, - ), - lambda batch: torch.cat(batch, dim=1).reshape( - *ray_bundle.lengths.shape[:-1], -1 - ), - ) - else: - # pyre-fixme[29]: `BaseRenderer` is not a function. - return self.renderer( - ray_bundle=ray_bundle, - **inputs_to_be_chunked, - **kwargs, - ) - - @classmethod - def raysampler_tweak_args(cls, type, args: DictConfig) -> None: - """ - We don't expose certain fields of the raysampler because we want to set - them from our own members. - """ - del args["sampling_mode_training"] - del args["sampling_mode_evaluation"] - del args["image_width"] - del args["image_height"] - - def create_raysampler(self): - extra_args = { - "sampling_mode_training": self.sampling_mode_training, - "sampling_mode_evaluation": self.sampling_mode_evaluation, - "image_width": self.render_image_width, - "image_height": self.render_image_height, - } - raysampler_args = getattr( - self, "raysampler_" + self.raysampler_class_type + "_args" - ) - self.raysampler = registry.get(RaySamplerBase, self.raysampler_class_type)( - **raysampler_args, **extra_args - ) - - @classmethod - def renderer_tweak_args(cls, type, args: DictConfig) -> None: - """ - We don't expose certain fields of the renderer because we want to set - them based on other inputs. - """ - args.pop("render_features_dimensions", None) - args.pop("object_bounding_sphere", None) - - def create_renderer(self): - extra_args = {} - - if self.renderer_class_type == "SignedDistanceFunctionRenderer": - extra_args["render_features_dimensions"] = self.render_features_dimensions - if not hasattr(self.raysampler, "scene_extent"): - raise ValueError( - "SignedDistanceFunctionRenderer requires" - + " a raysampler that defines the 'scene_extent' field" - + " (this field is supported by, e.g., the adaptive raysampler - " - + " self.raysampler_class_type='AdaptiveRaySampler')." - ) - extra_args["object_bounding_sphere"] = self.raysampler.scene_extent - - renderer_args = getattr(self, "renderer_" + self.renderer_class_type + "_args") - self.renderer = registry.get(BaseRenderer, self.renderer_class_type)( - **renderer_args, **extra_args - ) - - @classmethod - def implicit_function_tweak_args(cls, type, args: DictConfig) -> None: - """ - We don't expose certain implicit_function fields because we want to set - them based on other inputs. - """ - for arg in IMPLICIT_FUNCTION_ARGS_TO_REMOVE: - args.pop(arg, None) - - @classmethod - def coarse_implicit_function_tweak_args(cls, type, args: DictConfig) -> None: - """ - We don't expose certain implicit_function fields because we want to set - them based on other inputs. - """ - for arg in IMPLICIT_FUNCTION_ARGS_TO_REMOVE: - args.pop(arg, None) - - def _create_extra_args_for_implicit_function(self) -> Dict[str, Any]: - extra_args = {} - global_encoder_dim = ( - 0 if self.global_encoder is None else self.global_encoder.get_encoding_dim() - ) - if self.implicit_function_class_type in ( - "NeuralRadianceFieldImplicitFunction", - "NeRFormerImplicitFunction", - ): - extra_args["latent_dim"] = global_encoder_dim - extra_args["color_dim"] = self.render_features_dimensions - - if self.implicit_function_class_type == "IdrFeatureField": - extra_args["feature_work_size"] = global_encoder_dim - extra_args["feature_vector_size"] = self.render_features_dimensions - - if self.implicit_function_class_type == "SRNImplicitFunction": - extra_args["latent_dim"] = global_encoder_dim - return extra_args - - def create_implicit_function(self) -> None: - implicit_function_type = registry.get( - ImplicitFunctionBase, self.implicit_function_class_type - ) - expand_args_fields(implicit_function_type) - - config_name = f"implicit_function_{self.implicit_function_class_type}_args" - config = getattr(self, config_name, None) - if config is None: - raise ValueError(f"{config_name} not present") - - extra_args = self._create_extra_args_for_implicit_function() - self.implicit_function = implicit_function_type(**config, **extra_args) - - def create_coarse_implicit_function(self) -> None: - # If coarse_implicit_function_class_type has been defined - # then we init a module based on its arguments - if ( - self.coarse_implicit_function_class_type is not None - and not self.share_implicit_function_across_passes - ): - config_name = "coarse_implicit_function_{0}_args".format( - self.coarse_implicit_function_class_type - ) - config = getattr(self, config_name, {}) - - implicit_function_type = registry.get( - ImplicitFunctionBase, - # pyre-ignore: config is None allow to check if this is None. - self.coarse_implicit_function_class_type, - ) - expand_args_fields(implicit_function_type) - - extra_args = self._create_extra_args_for_implicit_function() - self.coarse_implicit_function = implicit_function_type( - **config, **extra_args - ) - elif self.share_implicit_function_across_passes: - # Since coarse_implicit_function is initialised before - # implicit_function we handle this case in the post_init. - pass - else: - self.coarse_implicit_function = None diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/__init__.py b/pytorch3d/pytorch3d/implicitron/models/renderer/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/renderer/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/base.py b/pytorch3d/pytorch3d/implicitron/models/renderer/base.py deleted file mode 100644 index 3e891bf763454f540f1d22717c33d9a6c375593a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/renderer/base.py +++ /dev/null @@ -1,411 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from __future__ import annotations - -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from enum import Enum -from typing import Any, Dict, List, Optional, Tuple - -import torch -from pytorch3d.implicitron.tools.config import ReplaceableBase -from pytorch3d.ops import packed_to_padded -from pytorch3d.renderer.implicit.utils import ray_bundle_variables_to_ray_points - - -class EvaluationMode(Enum): - TRAINING = "training" - EVALUATION = "evaluation" - - -class RenderSamplingMode(Enum): - MASK_SAMPLE = "mask_sample" - FULL_GRID = "full_grid" - - -class ImplicitronRayBundle: - """ - Parametrizes points along projection rays by storing ray `origins`, - `directions` vectors and `lengths` at which the ray-points are sampled. - Furthermore, the xy-locations (`xys`) of the ray pixels are stored as well. - Note that `directions` don't have to be normalized; they define unit vectors - in the respective 1D coordinate systems; see documentation for - :func:`ray_bundle_to_ray_points` for the conversion formula. - - Ray bundle may represent rays from multiple cameras. In that case, cameras - are stored in the packed form (i.e. rays from the same camera are stored in - the consecutive elements). The following indices will be set: - camera_ids: A tensor of shape (N, ) which indicates which camera - was used to sample the rays. `N` is the number of different - sampled cameras. - camera_counts: A tensor of shape (N, ) which how many times the - coresponding camera in `camera_ids` was sampled. - `sum(camera_counts) == minibatch`, where `minibatch = origins.shape[0]`. - - Attributes: - origins: A tensor of shape `(..., 3)` denoting the - origins of the sampling rays in world coords. - directions: A tensor of shape `(..., 3)` containing the direction - vectors of sampling rays in world coords. They don't have to be normalized; - they define unit vectors in the respective 1D coordinate systems; see - documentation for :func:`ray_bundle_to_ray_points` for the conversion formula. - lengths: A tensor of shape `(..., num_points_per_ray)` - containing the lengths at which the rays are sampled. - xys: A tensor of shape `(..., 2)`, the xy-locations (`xys`) of the ray pixels - camera_ids: An optional tensor of shape (N, ) which indicates which camera - was used to sample the rays. `N` is the number of unique sampled cameras. - camera_counts: An optional tensor of shape (N, ) indicates how many times the - coresponding camera in `camera_ids` was sampled. - `sum(camera_counts)==total_number_of_rays`. - bins: An optional tensor of shape `(..., num_points_per_ray + 1)` - containing the bins at which the rays are sampled. In this case - lengths should be equal to the midpoints of bins `(..., num_points_per_ray)`. - pixel_radii_2d: An optional tensor of shape `(..., 1)` - base radii of the conical frustums. - - Raises: - ValueError: If either bins or lengths are not provided. - ValueError: If bins is provided and the last dim is inferior or equal to 1. - """ - - def __init__( - self, - origins: torch.Tensor, - directions: torch.Tensor, - lengths: Optional[torch.Tensor], - xys: torch.Tensor, - camera_ids: Optional[torch.LongTensor] = None, - camera_counts: Optional[torch.LongTensor] = None, - bins: Optional[torch.Tensor] = None, - pixel_radii_2d: Optional[torch.Tensor] = None, - ): - if bins is not None and bins.shape[-1] <= 1: - raise ValueError( - "The last dim of bins must be at least superior or equal to 2." - ) - - if bins is None and lengths is None: - raise ValueError( - "Please set either bins or lengths to initialize an ImplicitronRayBundle." - ) - - self.origins = origins - self.directions = directions - self._lengths = lengths if bins is None else None - self.xys = xys - self.bins = bins - self.pixel_radii_2d = pixel_radii_2d - self.camera_ids = camera_ids - self.camera_counts = camera_counts - - @property - def lengths(self) -> torch.Tensor: - if self.bins is not None: - # equivalent to: 0.5 * (bins[..., 1:] + bins[..., :-1]) but more efficient - # pyre-ignore - return torch.lerp(self.bins[..., :-1], self.bins[..., 1:], 0.5) - return self._lengths - - @lengths.setter - def lengths(self, value): - if self.bins is not None: - raise ValueError( - "If the bins attribute is not None you cannot set the lengths attribute." - ) - else: - self._lengths = value - - def is_packed(self) -> bool: - """ - Returns whether the ImplicitronRayBundle carries data in packed state - """ - return self.camera_ids is not None and self.camera_counts is not None - - def get_padded_xys(self) -> Tuple[torch.Tensor, torch.LongTensor, int]: - """ - For a packed ray bundle, returns padded rays. Assumes the input bundle is packed - (i.e. `camera_ids` and `camera_counts` are set). - - Returns: - - xys: Tensor of shape (N, max_size, ...) containing the padded - representation of the pixel coordinated; - where max_size is max of `camera_counts`. The values for camera id `i` - will be copied to `xys[i, :]`, with zeros padding out the extra inputs. - - first_idxs: cumulative sum of `camera_counts` defininf the boundaries - between cameras in the packed representation - - num_inputs: the number of cameras in the bundle. - """ - if not self.is_packed(): - raise ValueError("get_padded_xys can be called only on a packed bundle") - - camera_counts = self.camera_counts - assert camera_counts is not None - - cumsum = torch.cumsum(camera_counts, dim=0, dtype=torch.long) - first_idxs = torch.cat( - (camera_counts.new_zeros((1,), dtype=torch.long), cumsum[:-1]) - ) - num_inputs = camera_counts.sum().item() - max_size = torch.max(camera_counts).item() - xys = packed_to_padded(self.xys, first_idxs, max_size) - # pyre-ignore [7] pytorch typeshed inaccuracy - return xys, first_idxs, num_inputs - - -@dataclass -class RendererOutput: - """ - A structure for storing the output of a renderer. - - Args: - features: rendered features (usually RGB colors), (B, ..., C) tensor. - depth: rendered ray-termination depth map, in NDC coordinates, (B, ..., 1) tensor. - mask: rendered object mask, values in [0, 1], (B, ..., 1) tensor. - prev_stage: for multi-pass renderers (e.g. in NeRF), - a reference to the output of the previous stage. - normals: surface normals, for renderers that estimate them; (B, ..., 3) tensor. - points: ray-termination points in the world coordinates, (B, ..., 3) tensor. - aux: dict for implementation-specific renderer outputs. - """ - - features: torch.Tensor - depths: torch.Tensor - masks: torch.Tensor - prev_stage: Optional[RendererOutput] = None - normals: Optional[torch.Tensor] = None - points: Optional[torch.Tensor] = None # TODO: redundant with depths - weights: Optional[torch.Tensor] = None - aux: Dict[str, Any] = field(default_factory=lambda: {}) - - -class ImplicitFunctionWrapper(torch.nn.Module): - def __init__(self, fn: torch.nn.Module): - super().__init__() - self._fn = fn - self.bound_args = {} - - def bind_args(self, **bound_args): - self.bound_args = bound_args - self._fn.on_bind_args() - - def unbind_args(self): - self.bound_args = {} - - def forward(self, *args, **kwargs): - return self._fn(*args, **{**kwargs, **self.bound_args}) - - -class BaseRenderer(ABC, ReplaceableBase): - """ - Base class for all Renderer implementations. - """ - - def requires_object_mask(self) -> bool: - """ - Whether `forward` needs the object_mask. - """ - return False - - @abstractmethod - def forward( - self, - ray_bundle: ImplicitronRayBundle, - implicit_functions: List[ImplicitFunctionWrapper], - evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION, - **kwargs, - ) -> RendererOutput: - """ - Each Renderer should implement its own forward function - that returns an instance of RendererOutput. - - Args: - ray_bundle: An ImplicitronRayBundle object containing the following variables: - origins: A tensor of shape (minibatch, ..., 3) denoting - the origins of the rendering rays. - directions: A tensor of shape (minibatch, ..., 3) - containing the direction vectors of rendering rays. - lengths: A tensor of shape - (minibatch, ..., num_points_per_ray)containing the - lengths at which the ray points are sampled. - The coordinates of the points on the rays are thus computed - as `origins + lengths * directions`. - xys: A tensor of shape - (minibatch, ..., 2) containing the - xy locations of each ray's pixel in the NDC screen space. - camera_ids: A tensor of shape (N, ) which indicates which camera - was used to sample the rays. `N` is the number of different - sampled cameras. - camera_counts: A tensor of shape (N, ) which how many times the - coresponding camera in `camera_ids` was sampled. - `sum(camera_counts)==minibatch` - implicit_functions: List of ImplicitFunctionWrappers which define the - implicit function methods to be used. Most Renderers only allow - a single implicit function. Currently, only the - MultiPassEmissionAbsorptionRenderer allows specifying mulitple - values in the list. - evaluation_mode: one of EvaluationMode.TRAINING or - EvaluationMode.EVALUATION which determines the settings used for - rendering. - **kwargs: In addition to the name args, custom keyword args can be specified. - For example in the SignedDistanceFunctionRenderer, an object_mask is - required which needs to be passed via the kwargs. - - Returns: - instance of RendererOutput - """ - pass - - -def compute_3d_diagonal_covariance_gaussian( - rays_directions: torch.Tensor, - rays_dir_variance: torch.Tensor, - radii_variance: torch.Tensor, - eps: float = 1e-6, -) -> torch.Tensor: - """ - Transform the variances (rays_dir_variance, radii_variance) of the gaussians from - the coordinate frame of the conical frustum to 3D world coordinates. - - It follows the equation 16 of `MIP-NeRF `_ - - Args: - rays_directions: A tensor of shape `(..., 3)` - rays_dir_variance: A tensor of shape `(..., num_intervals)` representing - the variance of the conical frustum with respect to the rays direction. - radii_variance: A tensor of shape `(..., num_intervals)` representing - the variance of the conical frustum with respect to its radius. - eps: a small number to prevent division by zero. - - Returns: - A tensor of shape `(..., num_intervals, 3)` containing the diagonal - of the covariance matrix. - """ - d_outer_diag = torch.pow(rays_directions, 2) - dir_mag_sq = torch.clamp(torch.sum(d_outer_diag, dim=-1, keepdim=True), min=eps) - - null_outer_diag = 1 - d_outer_diag / dir_mag_sq - ray_dir_cov_diag = rays_dir_variance[..., None] * d_outer_diag[..., None, :] - xy_cov_diag = radii_variance[..., None] * null_outer_diag[..., None, :] - return ray_dir_cov_diag + xy_cov_diag - - -def approximate_conical_frustum_as_gaussians( - bins: torch.Tensor, radii: torch.Tensor -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Approximates a conical frustum as two Gaussian distributions. - - The Gaussian distributions are characterized by - three values: - - - rays_dir_mean: mean along the rays direction - (defined as t in the parametric representation of a cone). - - rays_dir_variance: the variance of the conical frustum along the rays direction. - - radii_variance: variance of the conical frustum with respect to its radius. - - - The computation is stable and follows equation 7 - of `MIP-NeRF `_. - - For more information on how the mean and variances are computed - refers to the appendix of the paper. - - Args: - bins: A tensor of shape `(..., num_points_per_ray + 1)` - containing the bins at which the rays are sampled. - `bin[..., t]` and `bin[..., t+1]` represent respectively - the left and right coordinates of the interval. - t0: A tensor of shape `(..., num_points_per_ray)` - containing the left coordinates of the intervals - on which the rays are sampled. - t1: A tensor of shape `(..., num_points_per_ray)` - containing the rights coordinates of the intervals - on which the rays are sampled. - radii: A tensor of shape `(..., 1)` - base radii of the conical frustums. - - Returns: - rays_dir_mean: A tensor of shape `(..., num_intervals)` representing - the mean along the rays direction - (t in the parametric represention of the cone) - rays_dir_variance: A tensor of shape `(..., num_intervals)` representing - the variance of the conical frustum along the rays - (t in the parametric represention of the cone). - radii_variance: A tensor of shape `(..., num_intervals)` representing - the variance of the conical frustum with respect to its radius. - """ - t_mu = torch.lerp(bins[..., 1:], bins[..., :-1], 0.5) - t_delta = torch.diff(bins, dim=-1) / 2 - - t_mu_pow2 = torch.pow(t_mu, 2) - t_delta_pow2 = torch.pow(t_delta, 2) - t_delta_pow4 = torch.pow(t_delta, 4) - - den = 3 * t_mu_pow2 + t_delta_pow2 - - # mean along the rays direction - rays_dir_mean = t_mu + 2 * t_mu * t_delta_pow2 / den - - # Variance of the conical frustum with along the rays directions - rays_dir_variance = t_delta_pow2 / 3 - (4 / 15) * ( - t_delta_pow4 * (12 * t_mu_pow2 - t_delta_pow2) / torch.pow(den, 2) - ) - - # Variance of the conical frustum with respect to its radius - radii_variance = torch.pow(radii, 2) * ( - t_mu_pow2 / 4 + (5 / 12) * t_delta_pow2 - 4 / 15 * (t_delta_pow4) / den - ) - return rays_dir_mean, rays_dir_variance, radii_variance - - -def conical_frustum_to_gaussian( - ray_bundle: ImplicitronRayBundle, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Approximate a conical frustum following a ray bundle as a Gaussian. - - Args: - ray_bundle: A `RayBundle` or `HeterogeneousRayBundle` object with fields: - origins: A tensor of shape `(..., 3)` - directions: A tensor of shape `(..., 3)` - lengths: A tensor of shape `(..., num_points_per_ray)` - bins: A tensor of shape `(..., num_points_per_ray + 1)` - containing the bins at which the rays are sampled. . - pixel_radii_2d: A tensor of shape `(..., 1)` - base radii of the conical frustums. - - Returns: - means: A tensor of shape `(..., num_points_per_ray - 1, 3)` - representing the means of the Gaussians - approximating the conical frustums. - diag_covariances: A tensor of shape `(...,num_points_per_ray -1, 3)` - representing the diagonal covariance matrices of our Gaussians. - """ - - if ray_bundle.pixel_radii_2d is None or ray_bundle.bins is None: - raise ValueError( - "RayBundle pixel_radii_2d or bins have not been provided." - " Look at pytorch3d.renderer.implicit.renderer.ray_sampler::" - "AbstractMaskRaySampler to see how to compute them. Have you forgot to set" - "`cast_ray_bundle_as_cone` to True?" - ) - - ( - rays_dir_mean, - rays_dir_variance, - radii_variance, - ) = approximate_conical_frustum_as_gaussians( - ray_bundle.bins, - ray_bundle.pixel_radii_2d, - ) - means = ray_bundle_variables_to_ray_points( - ray_bundle.origins, ray_bundle.directions, rays_dir_mean - ) - diag_covariances = compute_3d_diagonal_covariance_gaussian( - ray_bundle.directions, rays_dir_variance, radii_variance - ) - return means, diag_covariances diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/lstm_renderer.py b/pytorch3d/pytorch3d/implicitron/models/renderer/lstm_renderer.py deleted file mode 100644 index 19848ed6e7391de1ab46ac82c56894f1a3e6a598..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/renderer/lstm_renderer.py +++ /dev/null @@ -1,187 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import copy -import logging -from typing import List, Optional, Tuple - -import torch -from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle -from pytorch3d.implicitron.tools.config import registry - -from .base import BaseRenderer, EvaluationMode, ImplicitFunctionWrapper, RendererOutput - - -logger = logging.getLogger(__name__) - - -@registry.register -class LSTMRenderer(BaseRenderer, torch.nn.Module): - """ - Implements the learnable LSTM raymarching function from SRN [1]. - This requires there to be one implicit function, and it is expected to be - like SRNImplicitFunction or SRNHyperNetImplicitFunction. - - Settings: - num_raymarch_steps: The number of LSTM raymarching steps. - init_depth: Initializes the bias of the last raymarching LSTM layer so that - the farthest point from the camera reaches a far z-plane that - lies `init_depth` units from the camera plane. - init_depth_noise_std: The standard deviation of the random normal noise - added to the initial depth of each marched ray. - hidden_size: The dimensionality of the LSTM's hidden state. - n_feature_channels: The number of feature channels returned by the - implicit_function evaluated at each raymarching step. - bg_color: If supplied, used as the background color. Otherwise the pixel - generator is used everywhere. This has to have length either 1 - (for a constant value for all output channels) or equal to the number - of output channels (which is `out_features` on the pixel generator, - typically 3.) - verbose: If `True`, logs raymarching debug info. - - References: - [1] Sitzmann, V. and ZollhΓΆfer, M. and Wetzstein, G.. - "Scene representation networks: Continuous 3d-structure-aware - neural scene representations." NeurIPS 2019. - """ - - num_raymarch_steps: int = 10 - init_depth: float = 17.0 - init_depth_noise_std: float = 5e-4 - hidden_size: int = 16 - n_feature_channels: int = 256 - bg_color: Optional[List[float]] = None - verbose: bool = False - - def __post_init__(self): - self._lstm = torch.nn.LSTMCell( - input_size=self.n_feature_channels, - hidden_size=self.hidden_size, - ) - self._lstm.apply(_init_recurrent_weights) - _lstm_forget_gate_init(self._lstm) - self._out_layer = torch.nn.Linear(self.hidden_size, 1) - - one_step = self.init_depth / self.num_raymarch_steps - self._out_layer.bias.data.fill_(one_step) - self._out_layer.weight.data.normal_(mean=0.0, std=1e-3) - - def forward( - self, - ray_bundle: ImplicitronRayBundle, - implicit_functions: List[ImplicitFunctionWrapper], - evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION, - **kwargs, - ) -> RendererOutput: - """ - - Args: - ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the - sampled rendering rays. - implicit_functions: A single-element list of ImplicitFunctionWrappers which - defines the implicit function to be used. - evaluation_mode: one of EvaluationMode.TRAINING or - EvaluationMode.EVALUATION which determines the settings used for - rendering, specifically the RayPointRefiner and the density_noise_std. - - Returns: - instance of RendererOutput - """ - if len(implicit_functions) != 1: - raise ValueError("LSTM renderer expects a single implicit function.") - - implicit_function = implicit_functions[0] - - if ray_bundle.lengths.shape[-1] != 1: - raise ValueError( - "LSTM renderer requires a ray-bundle with a single point per ray" - + " which is the initial raymarching point." - ) - - # jitter the initial depths - - ray_bundle_t = copy.copy(ray_bundle) - ray_bundle_t.lengths = ( - ray_bundle.lengths - + torch.randn_like(ray_bundle.lengths) * self.init_depth_noise_std - ) - - states: List[Optional[Tuple[torch.Tensor, torch.Tensor]]] = [None] - signed_distance = torch.zeros_like(ray_bundle_t.lengths) - raymarch_features = None - for t in range(self.num_raymarch_steps + 1): - # move signed_distance along each ray - ray_bundle_t.lengths += signed_distance - - # eval the raymarching function - raymarch_features, _ = implicit_function( - ray_bundle=ray_bundle_t, - raymarch_features=None, - ) - if self.verbose: - msg = ( - f"{t}: mu={float(signed_distance.mean()):1.2e};" - + f" std={float(signed_distance.std()):1.2e};" - + f" mu_d={float(ray_bundle_t.lengths.mean()):1.2e};" - + f" std_d={float(ray_bundle_t.lengths.std()):1.2e};" - ) - logger.info(msg) - if t == self.num_raymarch_steps: - break - - # run the lstm marcher - state_h, state_c = self._lstm( - raymarch_features.view(-1, raymarch_features.shape[-1]), - states[-1], - ) - if state_h.requires_grad: - state_h.register_hook(lambda x: x.clamp(min=-10, max=10)) - # predict the next step size - signed_distance = self._out_layer(state_h).view(ray_bundle_t.lengths.shape) - # log the lstm states - states.append((state_h, state_c)) - - opacity_logits, features = implicit_function( - raymarch_features=raymarch_features, - ray_bundle=ray_bundle_t, - ) - mask = torch.sigmoid(opacity_logits) - depth = ray_bundle_t.lengths * ray_bundle_t.directions.norm( - dim=-1, keepdim=True - ) - - if self.bg_color is not None: - background = features.new_tensor(self.bg_color) - features = torch.lerp(background, features, mask) - - return RendererOutput( - features=features[..., 0, :], - depths=depth, - masks=mask[..., 0, :], - ) - - -def _init_recurrent_weights(self) -> None: - # copied from SRN codebase - for m in self.modules(): - if type(m) in [torch.nn.GRU, torch.nn.LSTM, torch.nn.RNN]: - for name, param in m.named_parameters(): - if "weight_ih" in name: - torch.nn.init.kaiming_normal_(param.data) - elif "weight_hh" in name: - torch.nn.init.orthogonal_(param.data) - elif "bias" in name: - param.data.fill_(0) - - -def _lstm_forget_gate_init(lstm_layer) -> None: - # copied from SRN codebase - for name, parameter in lstm_layer.named_parameters(): - if "bias" not in name: - continue - n = parameter.size(0) - start, end = n // 4, n // 2 - parameter.data[start:end].fill_(1.0) diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/multipass_ea.py b/pytorch3d/pytorch3d/implicitron/models/renderer/multipass_ea.py deleted file mode 100644 index 92042e131ae2ce9f171cb8cd8174b5ad4c992da8..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/renderer/multipass_ea.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List - -import torch -from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle -from pytorch3d.implicitron.tools.config import registry, run_auto_creation - -from .base import BaseRenderer, EvaluationMode, ImplicitFunctionWrapper, RendererOutput -from .ray_point_refiner import RayPointRefiner -from .raymarcher import RaymarcherBase - - -@registry.register -class MultiPassEmissionAbsorptionRenderer( # pyre-ignore: 13 - BaseRenderer, torch.nn.Module -): - """ - Implements the multi-pass rendering function, in particular, - with emission-absorption ray marching used in NeRF [1]. First, it evaluates - opacity-based ray-point weights and then optionally (in case more implicit - functions are given) resamples points using importance sampling and evaluates - new weights. - - During each ray marching pass, features, depth map, and masks - are integrated: Let o_i be the opacity estimated by the implicit function, - and d_i be the offset between points `i` and `i+1` along the respective ray. - Ray marching is performed using the following equations:: - - ray_opacity_n = cap_fn(sum_i=1^n cap_fn(d_i * o_i)), - weight_n = weight_fn(cap_fn(d_i * o_i), 1 - ray_opacity_{n-1}), - - and the final rendered quantities are computed by a dot-product of ray values - with the weights, e.g. `features = sum_n(weight_n * ray_features_n)`. - - By default, for the EA raymarcher from [1] ( - activated with `self.raymarcher_class_type="EmissionAbsorptionRaymarcher"` - ):: - - cap_fn(x) = 1 - exp(-x), - weight_fn(x) = w * x. - - Note that the latter can altered by changing `self.raymarcher_class_type`, - e.g. to "CumsumRaymarcher" which implements the cumulative-sum raymarcher - from NeuralVolumes [2]. - - Settings: - n_pts_per_ray_fine_training: The number of points sampled per ray for the - fine rendering pass during training. - n_pts_per_ray_fine_evaluation: The number of points sampled per ray for the - fine rendering pass during evaluation. - stratified_sampling_coarse_training: Enable/disable stratified sampling in the - refiner during training. Only matters if there are multiple implicit - functions (i.e. in GenericModel if num_passes>1). - stratified_sampling_coarse_evaluation: Enable/disable stratified sampling in - the refiner during evaluation. Only matters if there are multiple implicit - functions (i.e. in GenericModel if num_passes>1). - append_coarse_samples_to_fine: Add the fine ray points to the coarse points - after sampling. - density_noise_std_train: Standard deviation of the noise added to the - opacity field. - return_weights: Enables returning the rendering weights of the EA raymarcher. - Setting to `True` can lead to a prohibitivelly large memory consumption. - blurpool_weights: Use blurpool defined in [3], on the input weights of - each implicit_function except the first (implicit_functions[0]). - sample_pdf_eps: Padding applied to the weights (alpha in equation 18 of [3]). - raymarcher_class_type: The type of self.raymarcher corresponding to - a child of `RaymarcherBase` in the registry. - raymarcher: The raymarcher object used to convert per-point features - and opacities to a feature render. - - References: - [1] Mildenhall, Ben, et al. "Nerf: Representing Scenes as Neural Radiance - Fields for View Synthesis." ECCV 2020. - [2] Lombardi, Stephen, et al. "Neural Volumes: Learning Dynamic Renderable - Volumes from Images." SIGGRAPH 2019. - [3] Jonathan T. Barron, et al. "Mip-NeRF: A Multiscale Representation - for Anti-Aliasing Neural Radiance Fields." ICCV 2021. - - """ - - raymarcher_class_type: str = "EmissionAbsorptionRaymarcher" - raymarcher: RaymarcherBase - - n_pts_per_ray_fine_training: int = 64 - n_pts_per_ray_fine_evaluation: int = 64 - stratified_sampling_coarse_training: bool = True - stratified_sampling_coarse_evaluation: bool = False - append_coarse_samples_to_fine: bool = True - density_noise_std_train: float = 0.0 - return_weights: bool = False - blurpool_weights: bool = False - sample_pdf_eps: float = 1e-5 - - def __post_init__(self): - self._refiners = { - EvaluationMode.TRAINING: RayPointRefiner( - n_pts_per_ray=self.n_pts_per_ray_fine_training, - random_sampling=self.stratified_sampling_coarse_training, - add_input_samples=self.append_coarse_samples_to_fine, - blurpool_weights=self.blurpool_weights, - sample_pdf_eps=self.sample_pdf_eps, - ), - EvaluationMode.EVALUATION: RayPointRefiner( - n_pts_per_ray=self.n_pts_per_ray_fine_evaluation, - random_sampling=self.stratified_sampling_coarse_evaluation, - add_input_samples=self.append_coarse_samples_to_fine, - blurpool_weights=self.blurpool_weights, - sample_pdf_eps=self.sample_pdf_eps, - ), - } - run_auto_creation(self) - - def forward( - self, - ray_bundle: ImplicitronRayBundle, - implicit_functions: List[ImplicitFunctionWrapper], - evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION, - **kwargs, - ) -> RendererOutput: - """ - Args: - ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the - sampled rendering rays. - implicit_functions: List of ImplicitFunctionWrappers which - define the implicit functions to be used sequentially in - the raymarching step. The output of raymarching with - implicit_functions[n-1] is refined, and then used as - input for raymarching with implicit_functions[n]. - evaluation_mode: one of EvaluationMode.TRAINING or - EvaluationMode.EVALUATION which determines the settings used for - rendering - - Returns: - instance of RendererOutput - """ - if not implicit_functions: - raise ValueError("EA renderer expects implicit functions") - - return self._run_raymarcher( - ray_bundle, - implicit_functions, - None, - evaluation_mode, - ) - - def _run_raymarcher( - self, ray_bundle, implicit_functions, prev_stage, evaluation_mode - ): - density_noise_std = ( - self.density_noise_std_train - if evaluation_mode == EvaluationMode.TRAINING - else 0.0 - ) - - ray_deltas = ( - None if ray_bundle.bins is None else torch.diff(ray_bundle.bins, dim=-1) - ) - output = self.raymarcher( - *implicit_functions[0](ray_bundle=ray_bundle), - ray_lengths=ray_bundle.lengths, - ray_deltas=ray_deltas, - density_noise_std=density_noise_std, - ) - output.prev_stage = prev_stage - - weights = output.weights - if not self.return_weights: - output.weights = None - - # we may need to make a recursive call - if len(implicit_functions) > 1: - fine_ray_bundle = self._refiners[evaluation_mode](ray_bundle, weights) - output = self._run_raymarcher( - fine_ray_bundle, - implicit_functions[1:], - output, - evaluation_mode, - ) - - return output diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/ray_point_refiner.py b/pytorch3d/pytorch3d/implicitron/models/renderer/ray_point_refiner.py deleted file mode 100644 index b71574d23f6a4d7f3407c3fa7157a47ff63f7bc2..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/renderer/ray_point_refiner.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import copy - -import torch -from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle -from pytorch3d.implicitron.tools.config import Configurable, expand_args_fields - -from pytorch3d.renderer.implicit.sample_pdf import sample_pdf - - -@expand_args_fields -# pyre-fixme[13]: Attribute `n_pts_per_ray` is never initialized. -# pyre-fixme[13]: Attribute `random_sampling` is never initialized. -class RayPointRefiner(Configurable, torch.nn.Module): - """ - Implements the importance sampling of points along rays. - The input is a `RayBundle` object with a `ray_weights` tensor - which specifies the probabilities of sampling a point along each ray. - - This raysampler is used for the fine rendering pass of NeRF. - As such, the forward pass accepts the RayBundle output by the - raysampling of the coarse rendering pass. Hence, it does not - take cameras as input. - - Args: - n_pts_per_ray: The number of points to sample along each ray. - random_sampling: If `False`, returns equispaced percentiles of the - distribution defined by the input weights, otherwise performs - sampling from that distribution. - add_input_samples: Concatenates and returns the sampled values - together with the input samples. - blurpool_weights: Use blurpool defined in [1], on the input weights. - sample_pdf_eps: A constant preventing division by zero in case empty bins - are present. - - References: - [1] Jonathan T. Barron, et al. "Mip-NeRF: A Multiscale Representation - for Anti-Aliasing Neural Radiance Fields." ICCV 2021. - """ - - n_pts_per_ray: int - random_sampling: bool - add_input_samples: bool = True - blurpool_weights: bool = False - sample_pdf_eps: float = 1e-5 - - def forward( - self, - input_ray_bundle: ImplicitronRayBundle, - ray_weights: torch.Tensor, - blurpool_weights: bool = False, - sample_pdf_padding: float = 1e-5, - **kwargs, - ) -> ImplicitronRayBundle: - """ - Args: - input_ray_bundle: An instance of `ImplicitronRayBundle` specifying the - source rays for sampling of the probability distribution. - ray_weights: A tensor of shape - `(..., input_ray_bundle.lengths.shape[-1])` with non-negative - elements defining the probability distribution to sample - ray points from. - blurpool_weights: Use blurpool defined in [1], on the input weights. - sample_pdf_padding: A constant preventing division by zero in case empty bins - are present. - - Returns: - ray_bundle: A new `ImplicitronRayBundle` instance containing the input ray - points together with `n_pts_per_ray` additionally sampled - points per ray. For each ray, the lengths are sorted. - - References: - [1] Jonathan T. Barron, et al. "Mip-NeRF: A Multiscale Representation - for Anti-Aliasing Neural Radiance Fields." ICCV 2021. - - """ - - with torch.no_grad(): - if self.blurpool_weights: - ray_weights = apply_blurpool_on_weights(ray_weights) - - n_pts_per_ray = self.n_pts_per_ray - ray_weights = ray_weights.view(-1, ray_weights.shape[-1]) - if input_ray_bundle.bins is None: - z_vals: torch.Tensor = input_ray_bundle.lengths - ray_weights = ray_weights[..., 1:-1] - bins = torch.lerp(z_vals[..., 1:], z_vals[..., :-1], 0.5) - else: - z_vals = input_ray_bundle.bins - n_pts_per_ray += 1 - bins = z_vals - z_samples = sample_pdf( - bins.view(-1, bins.shape[-1]), - ray_weights, - n_pts_per_ray, - det=not self.random_sampling, - eps=self.sample_pdf_eps, - ).view(*z_vals.shape[:-1], n_pts_per_ray) - - if self.add_input_samples: - z_vals = torch.cat((z_vals, z_samples), dim=-1) - else: - z_vals = z_samples - # Resort by depth. - z_vals, _ = torch.sort(z_vals, dim=-1) - ray_bundle = copy.copy(input_ray_bundle) - if input_ray_bundle.bins is None: - ray_bundle.lengths = z_vals - else: - ray_bundle.bins = z_vals - - return ray_bundle - - -def apply_blurpool_on_weights(weights) -> torch.Tensor: - """ - Filter weights with a 2-tap max filters followed by a 2-tap blur filter, - which produces a wide and smooth upper envelope on the weights. - - Args: - weights: Tensor of shape `(..., dim)` - - Returns: - blured_weights: Tensor of shape `(..., dim)` - """ - weights_pad = torch.concatenate( - [ - weights[..., :1], - weights, - weights[..., -1:], - ], - dim=-1, - ) - - weights_max = torch.nn.functional.max_pool1d( - weights_pad.flatten(end_dim=-2), 2, stride=1 - ) - return torch.lerp(weights_max[..., :-1], weights_max[..., 1:], 0.5).reshape_as( - weights - ) diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/ray_sampler.py b/pytorch3d/pytorch3d/implicitron/models/renderer/ray_sampler.py deleted file mode 100644 index fe464f67076f501591edd281d8d488207033c582..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/renderer/ray_sampler.py +++ /dev/null @@ -1,381 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Optional, Tuple - -import torch -from pytorch3d.implicitron.tools import camera_utils -from pytorch3d.implicitron.tools.config import registry, ReplaceableBase -from pytorch3d.renderer import NDCMultinomialRaysampler -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.renderer.implicit.utils import HeterogeneousRayBundle - -from .base import EvaluationMode, ImplicitronRayBundle, RenderSamplingMode - - -class RaySamplerBase(ReplaceableBase): - """ - Base class for ray samplers. - """ - - def forward( - self, - cameras: CamerasBase, - evaluation_mode: EvaluationMode, - mask: Optional[torch.Tensor] = None, - ) -> ImplicitronRayBundle: - """ - Args: - cameras: A batch of `batch_size` cameras from which the rays are emitted. - evaluation_mode: one of `EvaluationMode.TRAINING` or - `EvaluationMode.EVALUATION` which determines the sampling mode - that is used. - mask: Active for the `RenderSamplingMode.MASK_SAMPLE` sampling mode. - Defines a non-negative mask of shape - `(batch_size, image_height, image_width)` where each per-pixel - value is proportional to the probability of sampling the - corresponding pixel's ray. - - Returns: - ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the - sampled rendering rays. - """ - raise NotImplementedError() - - -class AbstractMaskRaySampler(RaySamplerBase, torch.nn.Module): - """ - Samples a fixed number of points along rays which are in turn sampled for - each camera in a batch. - - This class utilizes `NDCMultinomialRaysampler` which allows to either - randomly sample rays from an input foreground saliency mask - (`RenderSamplingMode.MASK_SAMPLE`), or on a rectangular image grid - (`RenderSamplingMode.FULL_GRID`). The sampling mode can be set separately - for training and evaluation by setting `self.sampling_mode_training` - and `self.sampling_mode_training` accordingly. - - The class allows to adjust the sampling points along rays by overwriting the - `AbstractMaskRaySampler._get_min_max_depth_bounds` function which returns - the near/far planes (`min_depth`/`max_depth`) `NDCMultinomialRaysampler`. - - Settings: - image_width: The horizontal size of the image grid. - image_height: The vertical size of the image grid. - sampling_mode_training: The ray sampling mode for training. This should be a str - option from the RenderSamplingMode Enum - sampling_mode_evaluation: Same as above but for evaluation. - n_pts_per_ray_training: The number of points sampled along each ray during training. - n_pts_per_ray_evaluation: The number of points sampled along each ray during evaluation. - n_rays_per_image_sampled_from_mask: The amount of rays to be sampled from the image - grid. Given a batch of image grids, this many is sampled from each. - `n_rays_per_image_sampled_from_mask` and `n_rays_total_training` cannot both be - defined. - n_rays_total_training: (optional) How many rays in total to sample from the entire - batch of provided image grid. The result is as if `n_rays_total_training` - cameras/image grids were sampled with replacement from the cameras / image grids - provided and for every camera one ray was sampled. - `n_rays_per_image_sampled_from_mask` and `n_rays_total_training` cannot both be - defined, to use you have to set `n_rays_per_image` to None. - Used only for EvaluationMode.TRAINING. - stratified_point_sampling_training: if set, performs stratified random sampling - along the ray; otherwise takes ray points at deterministic offsets. - stratified_point_sampling_evaluation: Same as above but for evaluation. - cast_ray_bundle_as_cone: If True, the sampling will generate the bins and radii - attribute of ImplicitronRayBundle. The `bins` contain the z-coordinate - (=depth) of each ray in world units and are of shape - `(batch_size, n_rays_per_image, n_pts_per_ray_training/evaluation + 1)` - while `lengths` is equal to the midpoint of the bins: - (0.5 * (bins[..., 1:] + bins[..., :-1]). - If False, `bins` is None, `radii` is None and `lengths` contains - the z-coordinate (=depth) of each ray in world units and are of shape - `(batch_size, n_rays_per_image, n_pts_per_ray_training/evaluation)` - - Raises: - TypeError: if cast_ray_bundle_as_cone is set to True and n_rays_total_training - is not None will result in an error. HeterogeneousRayBundle is - not supported for conical frustum computation yet. - """ - - image_width: int = 400 - image_height: int = 400 - sampling_mode_training: str = "mask_sample" - sampling_mode_evaluation: str = "full_grid" - n_pts_per_ray_training: int = 64 - n_pts_per_ray_evaluation: int = 64 - n_rays_per_image_sampled_from_mask: Optional[int] = 1024 - n_rays_total_training: Optional[int] = None - # stratified sampling vs taking points at deterministic offsets - stratified_point_sampling_training: bool = True - stratified_point_sampling_evaluation: bool = False - cast_ray_bundle_as_cone: bool = False - - def __post_init__(self): - if (self.n_rays_per_image_sampled_from_mask is not None) and ( - self.n_rays_total_training is not None - ): - raise ValueError( - "Cannot both define n_rays_total_training and " - "n_rays_per_image_sampled_from_mask." - ) - - self._sampling_mode = { - EvaluationMode.TRAINING: RenderSamplingMode(self.sampling_mode_training), - EvaluationMode.EVALUATION: RenderSamplingMode( - self.sampling_mode_evaluation - ), - } - - n_pts_per_ray_training = ( - self.n_pts_per_ray_training + 1 - if self.cast_ray_bundle_as_cone - else self.n_pts_per_ray_training - ) - n_pts_per_ray_evaluation = ( - self.n_pts_per_ray_evaluation + 1 - if self.cast_ray_bundle_as_cone - else self.n_pts_per_ray_evaluation - ) - self._training_raysampler = NDCMultinomialRaysampler( - image_width=self.image_width, - image_height=self.image_height, - n_pts_per_ray=n_pts_per_ray_training, - min_depth=0.0, - max_depth=0.0, - n_rays_per_image=self.n_rays_per_image_sampled_from_mask - if self._sampling_mode[EvaluationMode.TRAINING] - == RenderSamplingMode.MASK_SAMPLE - else None, - n_rays_total=self.n_rays_total_training, - unit_directions=True, - stratified_sampling=self.stratified_point_sampling_training, - ) - - self._evaluation_raysampler = NDCMultinomialRaysampler( - image_width=self.image_width, - image_height=self.image_height, - n_pts_per_ray=n_pts_per_ray_evaluation, - min_depth=0.0, - max_depth=0.0, - n_rays_per_image=self.n_rays_per_image_sampled_from_mask - if self._sampling_mode[EvaluationMode.EVALUATION] - == RenderSamplingMode.MASK_SAMPLE - else None, - unit_directions=True, - stratified_sampling=self.stratified_point_sampling_evaluation, - ) - - max_y, min_y = self._training_raysampler.max_y, self._training_raysampler.min_y - max_x, min_x = self._training_raysampler.max_x, self._training_raysampler.min_x - self.pixel_height: float = (max_y - min_y) / (self.image_height - 1) - self.pixel_width: float = (max_x - min_x) / (self.image_width - 1) - - def _get_min_max_depth_bounds(self, cameras: CamerasBase) -> Tuple[float, float]: - raise NotImplementedError() - - def forward( - self, - cameras: CamerasBase, - evaluation_mode: EvaluationMode, - mask: Optional[torch.Tensor] = None, - ) -> ImplicitronRayBundle: - """ - - Args: - cameras: A batch of `batch_size` cameras from which the rays are emitted. - evaluation_mode: one of `EvaluationMode.TRAINING` or - `EvaluationMode.EVALUATION` which determines the sampling mode - that is used. - mask: Active for the `RenderSamplingMode.MASK_SAMPLE` sampling mode. - Defines a non-negative mask of shape - `(batch_size, image_height, image_width)` where each per-pixel - value is proportional to the probability of sampling the - corresponding pixel's ray. - - Returns: - ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the - sampled rendering rays. - """ - sample_mask = None - if ( - self._sampling_mode[evaluation_mode] == RenderSamplingMode.MASK_SAMPLE - and mask is not None - ): - sample_mask = torch.nn.functional.interpolate( - mask, - size=[self.image_height, self.image_width], - mode="nearest", - )[:, 0] - - min_depth, max_depth = self._get_min_max_depth_bounds(cameras) - - raysampler = { - EvaluationMode.TRAINING: self._training_raysampler, - EvaluationMode.EVALUATION: self._evaluation_raysampler, - }[evaluation_mode] - - ray_bundle = raysampler( - cameras=cameras, - mask=sample_mask, - min_depth=min_depth, - max_depth=max_depth, - ) - if self.cast_ray_bundle_as_cone and isinstance( - ray_bundle, HeterogeneousRayBundle - ): - # If this error rises it means that raysampler has among - # its arguments `n_ray_totals`. If it is the case - # then you should update the radii computation and lengths - # computation to handle padding and unpadding. - raise TypeError( - "Heterogeneous ray bundle is not supported for conical frustum computation yet" - ) - elif self.cast_ray_bundle_as_cone: - pixel_hw: Tuple[float, float] = (self.pixel_height, self.pixel_width) - pixel_radii_2d = compute_radii(cameras, ray_bundle.xys[..., :2], pixel_hw) - return ImplicitronRayBundle( - directions=ray_bundle.directions, - origins=ray_bundle.origins, - lengths=None, - xys=ray_bundle.xys, - bins=ray_bundle.lengths, - pixel_radii_2d=pixel_radii_2d, - ) - - return ImplicitronRayBundle( - directions=ray_bundle.directions, - origins=ray_bundle.origins, - lengths=ray_bundle.lengths, - xys=ray_bundle.xys, - camera_counts=getattr(ray_bundle, "camera_counts", None), - camera_ids=getattr(ray_bundle, "camera_ids", None), - ) - - -@registry.register -class AdaptiveRaySampler(AbstractMaskRaySampler): - """ - Adaptively samples points on each ray between near and far planes whose - depths are determined based on the distance from the camera center - to a predefined scene center. - - More specifically, - `min_depth = max( - (self.scene_center-camera_center).norm() - self.scene_extent, eps - )` and - `max_depth = (self.scene_center-camera_center).norm() + self.scene_extent`. - - This sampling is ideal for object-centric scenes whose contents are - centered around a known `self.scene_center` and fit into a bounding sphere - with a radius of `self.scene_extent`. - - Args: - scene_center: The xyz coordinates of the center of the scene used - along with `scene_extent` to compute the min and max depth planes - for sampling ray-points. - scene_extent: The radius of the scene bounding box centered at `scene_center`. - """ - - scene_extent: float = 8.0 - scene_center: Tuple[float, float, float] = (0.0, 0.0, 0.0) - - def __post_init__(self): - super().__post_init__() - if self.scene_extent <= 0.0: - raise ValueError("Adaptive raysampler requires self.scene_extent > 0.") - self._scene_center = torch.FloatTensor(self.scene_center) - - def _get_min_max_depth_bounds(self, cameras: CamerasBase) -> Tuple[float, float]: - """ - Returns the adaptively calculated near/far planes. - """ - min_depth, max_depth = camera_utils.get_min_max_depth_bounds( - cameras, self._scene_center, self.scene_extent - ) - return float(min_depth[0]), float(max_depth[0]) - - -@registry.register -class NearFarRaySampler(AbstractMaskRaySampler): - """ - Samples a fixed number of points between fixed near and far z-planes. - Specifically, samples points along each ray with approximately uniform spacing - of z-coordinates between the minimum depth `self.min_depth` and the maximum depth - `self.max_depth`. This sampling is useful for rendering scenes where the camera is - in a constant distance from the focal point of the scene. - - Args: - min_depth: The minimum depth of a ray-point. - max_depth: The maximum depth of a ray-point. - """ - - min_depth: float = 0.1 - max_depth: float = 8.0 - - def _get_min_max_depth_bounds(self, cameras: CamerasBase) -> Tuple[float, float]: - """ - Returns the stored near/far planes. - """ - return self.min_depth, self.max_depth - - -def compute_radii( - cameras: CamerasBase, - xy_grid: torch.Tensor, - pixel_hw_ndc: Tuple[float, float], -) -> torch.Tensor: - """ - Compute radii of conical frustums in world coordinates. - - Args: - cameras: cameras object representing a batch of cameras. - xy_grid: torch.tensor grid of image xy coords. - pixel_hw_ndc: pixel height and width in NDC - - Returns: - radii: A tensor of shape `(..., 1)` radii of a cone. - """ - batch_size = xy_grid.shape[0] - spatial_size = xy_grid.shape[1:-1] - n_rays_per_image = spatial_size.numel() - - xy = xy_grid.view(batch_size, n_rays_per_image, 2) - - # [batch_size, 3 * n_rays_per_image, 2] - xy = torch.cat( - [ - xy, - # Will allow to find the norm on the x axis - xy + torch.tensor([pixel_hw_ndc[1], 0], device=xy.device), - # Will allow to find the norm on the y axis - xy + torch.tensor([0, pixel_hw_ndc[0]], device=xy.device), - ], - dim=1, - ) - # [batch_size, 3 * n_rays_per_image, 3] - xyz = torch.cat( - ( - xy, - xy.new_ones(batch_size, 3 * n_rays_per_image, 1), - ), - dim=-1, - ) - - # unproject the points - unprojected_xyz = cameras.unproject_points(xyz, from_ndc=True) - - plane_world, plane_world_dx, plane_world_dy = torch.split( - unprojected_xyz, n_rays_per_image, dim=1 - ) - - # Distance from each unit-norm direction vector to its neighbors. - dx_norm = torch.linalg.norm(plane_world_dx - plane_world, dim=-1, keepdims=True) - dy_norm = torch.linalg.norm(plane_world_dy - plane_world, dim=-1, keepdims=True) - # Cut the distance in half to obtain the base radius: (dx_norm + dy_norm) * 0.5 - # Scale it by 2/12**0.5 to match the variance of the pixel’s footprint - radii = (dx_norm + dy_norm) / 12**0.5 - - return radii.view(batch_size, *spatial_size, 1) diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/ray_tracing.py b/pytorch3d/pytorch3d/implicitron/models/renderer/ray_tracing.py deleted file mode 100644 index 5c0dd0a40cd4987e19ddaaa196fd56c23ba35800..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/renderer/ray_tracing.py +++ /dev/null @@ -1,590 +0,0 @@ -# @lint-ignore-every LICENSELINT -# Adapted from https://github.com/lioryariv/idr -# Copyright (c) 2020 Lior Yariv - -from typing import Any, Callable, Tuple - -import torch -import torch.nn as nn -from pytorch3d.implicitron.tools.config import Configurable - - -class RayTracing(Configurable, nn.Module): - """ - Finds the intersection points of rays with the implicit surface defined - by a signed distance function (SDF). The algorithm follows the pipeline: - 1. Initialise start and end points on rays by the intersections with - the circumscribing sphere. - 2. Run sphere tracing from both ends. - 3. Divide the untraced segments of non-convergent rays into uniform - intervals and find the one with the sign transition. - 4. Run the secant method to estimate the point of the sign transition. - - Args: - object_bounding_sphere: The radius of the initial sphere circumscribing - the object. - sdf_threshold: Absolute SDF value small enough for the sphere tracer - to consider it a surface. - line_search_step: Length of the backward correction on sphere tracing - iterations. - line_step_iters: Number of backward correction iterations. - sphere_tracing_iters: Maximum number of sphere tracing iterations - (the actual number of iterations may be smaller if all ray - intersections are found). - n_steps: Number of intervals sampled for unconvergent rays. - n_secant_steps: Number of iterations in the secant algorithm. - """ - - object_bounding_sphere: float = 1.0 - sdf_threshold: float = 5.0e-5 - line_search_step: float = 0.5 - line_step_iters: int = 1 - sphere_tracing_iters: int = 10 - n_steps: int = 100 - n_secant_steps: int = 8 - - def forward( - self, - sdf: Callable[[torch.Tensor], torch.Tensor], - cam_loc: torch.Tensor, - object_mask: torch.BoolTensor, - ray_directions: torch.Tensor, - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Args: - sdf: A callable that takes a (N, 3) tensor of points and returns - a tensor of (N,) SDF values. - cam_loc: A tensor of (B, N, 3) ray origins. - object_mask: A (N, 3) tensor of indicators whether a sampled pixel - corresponds to the rendered object or background. - ray_directions: A tensor of (B, N, 3) ray directions. - - Returns: - curr_start_points: A tensor of (B*N, 3) found intersection points - with the implicit surface. - network_object_mask: A tensor of (B*N,) indicators denoting whether - intersections were found. - acc_start_dis: A tensor of (B*N,) distances from the ray origins - to intersrection points. - """ - batch_size, num_pixels, _ = ray_directions.shape - device = cam_loc.device - - sphere_intersections, mask_intersect = _get_sphere_intersection( - cam_loc, ray_directions, r=self.object_bounding_sphere - ) - - ( - curr_start_points, - unfinished_mask_start, - acc_start_dis, - acc_end_dis, - min_dis, - max_dis, - ) = self.sphere_tracing( - batch_size, - num_pixels, - sdf, - cam_loc, - ray_directions, - mask_intersect, - sphere_intersections, - ) - - network_object_mask = acc_start_dis < acc_end_dis - - # The non convergent rays should be handled by the sampler - sampler_mask = unfinished_mask_start - sampler_net_obj_mask = torch.zeros_like( - sampler_mask, dtype=torch.bool, device=device - ) - if sampler_mask.sum() > 0: - sampler_min_max = torch.zeros((batch_size, num_pixels, 2), device=device) - sampler_min_max.reshape(-1, 2)[sampler_mask, 0] = acc_start_dis[ - sampler_mask - ] - sampler_min_max.reshape(-1, 2)[sampler_mask, 1] = acc_end_dis[sampler_mask] - - sampler_pts, sampler_net_obj_mask, sampler_dists = self.ray_sampler( - sdf, cam_loc, object_mask, ray_directions, sampler_min_max, sampler_mask - ) - - curr_start_points[sampler_mask] = sampler_pts[sampler_mask] - acc_start_dis[sampler_mask] = sampler_dists[sampler_mask] - network_object_mask[sampler_mask] = sampler_net_obj_mask[sampler_mask] - - if not self.training: - return curr_start_points, network_object_mask, acc_start_dis - - # in case we are training, we are updating curr_start_points and acc_start_dis for - - ray_directions = ray_directions.reshape(-1, 3) - mask_intersect = mask_intersect.reshape(-1) - # pyre-fixme[9]: object_mask has type `BoolTensor`; used as `Tensor`. - object_mask = object_mask.reshape(-1) - - in_mask = ~network_object_mask & object_mask & ~sampler_mask - out_mask = ~object_mask & ~sampler_mask - - mask_left_out = (in_mask | out_mask) & ~mask_intersect - if ( - mask_left_out.sum() > 0 - ): # project the origin to the not intersect points on the sphere - cam_left_out = cam_loc.reshape(-1, 3)[mask_left_out] - rays_left_out = ray_directions[mask_left_out] - acc_start_dis[mask_left_out] = -torch.bmm( - rays_left_out.view(-1, 1, 3), cam_left_out.view(-1, 3, 1) - ).squeeze() - curr_start_points[mask_left_out] = ( - cam_left_out + acc_start_dis[mask_left_out].unsqueeze(1) * rays_left_out - ) - - mask = (in_mask | out_mask) & mask_intersect - - if mask.sum() > 0: - min_dis[network_object_mask & out_mask] = acc_start_dis[ - network_object_mask & out_mask - ] - - min_mask_points, min_mask_dist = self.minimal_sdf_points( - sdf, cam_loc, ray_directions, mask, min_dis, max_dis - ) - - curr_start_points[mask] = min_mask_points - acc_start_dis[mask] = min_mask_dist - - return curr_start_points, network_object_mask, acc_start_dis - - def sphere_tracing( - self, - batch_size: int, - num_pixels: int, - sdf: Callable[[torch.Tensor], torch.Tensor], - cam_loc: torch.Tensor, - ray_directions: torch.Tensor, - mask_intersect: torch.Tensor, - sphere_intersections: torch.Tensor, - ) -> Tuple[Any, Any, Any, Any, Any, Any]: - """ - Run sphere tracing algorithm for max iterations - from both sides of unit sphere intersection - - Args: - batch_size: - num_pixels: - sdf: - cam_loc: - ray_directions: - mask_intersect: - sphere_intersections: - - Returns: - curr_start_points: - unfinished_mask_start: - acc_start_dis: - acc_end_dis: - min_dis: - max_dis: - """ - - device = cam_loc.device - sphere_intersections_points = ( - cam_loc[..., None, :] - + sphere_intersections[..., None] * ray_directions[..., None, :] - ) - unfinished_mask_start = mask_intersect.reshape(-1).clone() - unfinished_mask_end = mask_intersect.reshape(-1).clone() - - # Initialize start current points - curr_start_points = torch.zeros(batch_size * num_pixels, 3, device=device) - curr_start_points[unfinished_mask_start] = sphere_intersections_points[ - :, :, 0, : - ].reshape(-1, 3)[unfinished_mask_start] - acc_start_dis = torch.zeros(batch_size * num_pixels, device=device) - acc_start_dis[unfinished_mask_start] = sphere_intersections.reshape(-1, 2)[ - unfinished_mask_start, 0 - ] - - # Initialize end current points - curr_end_points = torch.zeros(batch_size * num_pixels, 3, device=device) - curr_end_points[unfinished_mask_end] = sphere_intersections_points[ - :, :, 1, : - ].reshape(-1, 3)[unfinished_mask_end] - acc_end_dis = torch.zeros(batch_size * num_pixels, device=device) - acc_end_dis[unfinished_mask_end] = sphere_intersections.reshape(-1, 2)[ - unfinished_mask_end, 1 - ] - - # Initialise min and max depth - min_dis = acc_start_dis.clone() - max_dis = acc_end_dis.clone() - - # Iterate on the rays (from both sides) till finding a surface - iters = 0 - - # TODO: sdf should also pass info about batches - - next_sdf_start = torch.zeros_like(acc_start_dis) - next_sdf_start[unfinished_mask_start] = sdf( - curr_start_points[unfinished_mask_start] - ) - - next_sdf_end = torch.zeros_like(acc_end_dis) - next_sdf_end[unfinished_mask_end] = sdf(curr_end_points[unfinished_mask_end]) - - while True: - # Update sdf - curr_sdf_start = torch.zeros_like(acc_start_dis) - curr_sdf_start[unfinished_mask_start] = next_sdf_start[ - unfinished_mask_start - ] - curr_sdf_start[curr_sdf_start <= self.sdf_threshold] = 0 - - curr_sdf_end = torch.zeros_like(acc_end_dis) - curr_sdf_end[unfinished_mask_end] = next_sdf_end[unfinished_mask_end] - curr_sdf_end[curr_sdf_end <= self.sdf_threshold] = 0 - - # Update masks - unfinished_mask_start = unfinished_mask_start & ( - curr_sdf_start > self.sdf_threshold - ) - unfinished_mask_end = unfinished_mask_end & ( - curr_sdf_end > self.sdf_threshold - ) - - if ( - unfinished_mask_start.sum() == 0 and unfinished_mask_end.sum() == 0 - ) or iters == self.sphere_tracing_iters: - break - iters += 1 - - # Make step - # Update distance - acc_start_dis = acc_start_dis + curr_sdf_start - acc_end_dis = acc_end_dis - curr_sdf_end - - # Update points - curr_start_points = ( - cam_loc - + acc_start_dis.reshape(batch_size, num_pixels, 1) * ray_directions - ).reshape(-1, 3) - curr_end_points = ( - cam_loc - + acc_end_dis.reshape(batch_size, num_pixels, 1) * ray_directions - ).reshape(-1, 3) - - # Fix points which wrongly crossed the surface - next_sdf_start = torch.zeros_like(acc_start_dis) - next_sdf_start[unfinished_mask_start] = sdf( - curr_start_points[unfinished_mask_start] - ) - - next_sdf_end = torch.zeros_like(acc_end_dis) - next_sdf_end[unfinished_mask_end] = sdf( - curr_end_points[unfinished_mask_end] - ) - - not_projected_start = next_sdf_start < 0 - not_projected_end = next_sdf_end < 0 - not_proj_iters = 0 - while ( - not_projected_start.sum() > 0 or not_projected_end.sum() > 0 - ) and not_proj_iters < self.line_step_iters: - # Step backwards - acc_start_dis[not_projected_start] -= ( - (1 - self.line_search_step) / (2**not_proj_iters) - ) * curr_sdf_start[not_projected_start] - curr_start_points[not_projected_start] = ( - cam_loc - + acc_start_dis.reshape(batch_size, num_pixels, 1) * ray_directions - ).reshape(-1, 3)[not_projected_start] - - acc_end_dis[not_projected_end] += ( - (1 - self.line_search_step) / (2**not_proj_iters) - ) * curr_sdf_end[not_projected_end] - curr_end_points[not_projected_end] = ( - cam_loc - + acc_end_dis.reshape(batch_size, num_pixels, 1) * ray_directions - ).reshape(-1, 3)[not_projected_end] - - # Calc sdf - next_sdf_start[not_projected_start] = sdf( - curr_start_points[not_projected_start] - ) - next_sdf_end[not_projected_end] = sdf( - curr_end_points[not_projected_end] - ) - - # Update mask - not_projected_start = next_sdf_start < 0 - not_projected_end = next_sdf_end < 0 - not_proj_iters += 1 - - unfinished_mask_start = unfinished_mask_start & ( - acc_start_dis < acc_end_dis - ) - unfinished_mask_end = unfinished_mask_end & (acc_start_dis < acc_end_dis) - - return ( - curr_start_points, - unfinished_mask_start, - acc_start_dis, - acc_end_dis, - min_dis, - max_dis, - ) - - def ray_sampler( - self, - sdf: Callable[[torch.Tensor], torch.Tensor], - cam_loc: torch.Tensor, - object_mask: torch.Tensor, - ray_directions: torch.Tensor, - sampler_min_max: torch.Tensor, - sampler_mask: torch.Tensor, - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Sample the ray in a given range and run secant on rays which have sign transition. - - Args: - sdf: - cam_loc: - object_mask: - ray_directions: - sampler_min_max: - sampler_mask: - - Returns: - - """ - - batch_size, num_pixels, _ = ray_directions.shape - device = cam_loc.device - n_total_pxl = batch_size * num_pixels - sampler_pts = torch.zeros(n_total_pxl, 3, device=device) - sampler_dists = torch.zeros(n_total_pxl, device=device) - - intervals_dist = torch.linspace(0, 1, steps=self.n_steps, device=device).view( - 1, 1, -1 - ) - - pts_intervals = sampler_min_max[:, :, 0].unsqueeze(-1) + intervals_dist * ( - sampler_min_max[:, :, 1] - sampler_min_max[:, :, 0] - ).unsqueeze(-1) - points = ( - cam_loc[..., None, :] - + pts_intervals[..., None] * ray_directions[..., None, :] - ) - - # Get the non convergent rays - mask_intersect_idx = torch.nonzero(sampler_mask).flatten() - points = points.reshape((-1, self.n_steps, 3))[sampler_mask, :, :] - pts_intervals = pts_intervals.reshape((-1, self.n_steps))[sampler_mask] - - sdf_val_all = [] - for pnts in torch.split(points.reshape(-1, 3), 100000, dim=0): - sdf_val_all.append(sdf(pnts)) - sdf_val = torch.cat(sdf_val_all).reshape(-1, self.n_steps) - - tmp = torch.sign(sdf_val) * torch.arange( - self.n_steps, 0, -1, device=device, dtype=torch.float32 - ).reshape(1, self.n_steps) - # Force argmin to return the first min value - sampler_pts_ind = torch.argmin(tmp, -1) - sampler_pts[mask_intersect_idx] = points[ - torch.arange(points.shape[0]), sampler_pts_ind, : - ] - sampler_dists[mask_intersect_idx] = pts_intervals[ - torch.arange(pts_intervals.shape[0]), sampler_pts_ind - ] - - true_surface_pts = object_mask.reshape(-1)[sampler_mask] - net_surface_pts = sdf_val[torch.arange(sdf_val.shape[0]), sampler_pts_ind] < 0 - - # take points with minimal SDF value for P_out pixels - p_out_mask = ~(true_surface_pts & net_surface_pts) - n_p_out = p_out_mask.sum() - if n_p_out > 0: - out_pts_idx = torch.argmin(sdf_val[p_out_mask, :], -1) - sampler_pts[mask_intersect_idx[p_out_mask]] = points[p_out_mask, :, :][ - # pyre-fixme[6]: For 1st param expected `Union[bool, float, int]` - # but got `Tensor`. - torch.arange(n_p_out), - out_pts_idx, - :, - ] - sampler_dists[mask_intersect_idx[p_out_mask]] = pts_intervals[ - p_out_mask, - : - # pyre-fixme[6]: For 1st param expected `Union[bool, float, int]` but - # got `Tensor`. - ][torch.arange(n_p_out), out_pts_idx] - - # Get Network object mask - sampler_net_obj_mask = sampler_mask.clone() - sampler_net_obj_mask[mask_intersect_idx[~net_surface_pts]] = False - - # Run Secant method - secant_pts = ( - net_surface_pts & true_surface_pts if self.training else net_surface_pts - ) - n_secant_pts = secant_pts.sum() - if n_secant_pts > 0: - # Get secant z predictions - z_high = pts_intervals[ - torch.arange(pts_intervals.shape[0]), sampler_pts_ind - ][secant_pts] - sdf_high = sdf_val[torch.arange(sdf_val.shape[0]), sampler_pts_ind][ - secant_pts - ] - z_low = pts_intervals[secant_pts][ - # pyre-fixme[6]: For 1st param expected `Union[bool, float, int]` - # but got `Tensor`. - torch.arange(n_secant_pts), - sampler_pts_ind[secant_pts] - 1, - ] - sdf_low = sdf_val[secant_pts][ - # pyre-fixme[6]: For 1st param expected `Union[bool, float, int]` - # but got `Tensor`. - torch.arange(n_secant_pts), - sampler_pts_ind[secant_pts] - 1, - ] - cam_loc_secant = cam_loc.reshape(-1, 3)[mask_intersect_idx[secant_pts]] - ray_directions_secant = ray_directions.reshape((-1, 3))[ - mask_intersect_idx[secant_pts] - ] - z_pred_secant = self.secant( - sdf_low, - sdf_high, - z_low, - z_high, - cam_loc_secant, - ray_directions_secant, - # pyre-fixme[6]: For 7th param expected `Module` but got `(Tensor) - # -> Tensor`. - sdf, - ) - - # Get points - sampler_pts[mask_intersect_idx[secant_pts]] = ( - cam_loc_secant + z_pred_secant.unsqueeze(-1) * ray_directions_secant - ) - sampler_dists[mask_intersect_idx[secant_pts]] = z_pred_secant - - return sampler_pts, sampler_net_obj_mask, sampler_dists - - def secant( - self, - sdf_low: torch.Tensor, - sdf_high: torch.Tensor, - z_low: torch.Tensor, - z_high: torch.Tensor, - cam_loc: torch.Tensor, - ray_directions: torch.Tensor, - sdf: nn.Module, - ) -> torch.Tensor: - """ - Runs the secant method for interval [z_low, z_high] for n_secant_steps - """ - - z_pred = -sdf_low * (z_high - z_low) / (sdf_high - sdf_low) + z_low - for _ in range(self.n_secant_steps): - p_mid = cam_loc + z_pred.unsqueeze(-1) * ray_directions - sdf_mid = sdf(p_mid) - ind_low = sdf_mid > 0 - if ind_low.sum() > 0: - z_low[ind_low] = z_pred[ind_low] - sdf_low[ind_low] = sdf_mid[ind_low] - ind_high = sdf_mid < 0 - if ind_high.sum() > 0: - z_high[ind_high] = z_pred[ind_high] - sdf_high[ind_high] = sdf_mid[ind_high] - - z_pred = -sdf_low * (z_high - z_low) / (sdf_high - sdf_low) + z_low - - return z_pred - - def minimal_sdf_points( - self, - sdf: Callable[[torch.Tensor], torch.Tensor], - cam_loc: torch.Tensor, - ray_directions: torch.Tensor, - mask: torch.Tensor, - min_dis: torch.Tensor, - max_dis: torch.Tensor, - ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Find points with minimal SDF value on rays for P_out pixels - """ - - n_mask_points = mask.sum() - - n = self.n_steps - steps = torch.empty(n, device=cam_loc.device).uniform_(0.0, 1.0) - mask_max_dis = max_dis[mask].unsqueeze(-1) - mask_min_dis = min_dis[mask].unsqueeze(-1) - steps = ( - # pyre-fixme[6]: For 1st param expected `int` but got `Tensor`. - steps.unsqueeze(0).repeat(n_mask_points, 1) * (mask_max_dis - mask_min_dis) - + mask_min_dis - ) - - mask_points = cam_loc.reshape(-1, 3)[mask] - mask_rays = ray_directions[mask, :] - - mask_points_all = mask_points.unsqueeze(1).repeat(1, n, 1) + steps.unsqueeze( - -1 - ) * mask_rays.unsqueeze(1).repeat(1, n, 1) - points = mask_points_all.reshape(-1, 3) - - mask_sdf_all = [] - for pnts in torch.split(points, 100000, dim=0): - mask_sdf_all.append(sdf(pnts)) - - mask_sdf_all = torch.cat(mask_sdf_all).reshape(-1, n) - min_vals, min_idx = mask_sdf_all.min(-1) - min_mask_points = mask_points_all.reshape(-1, n, 3)[ - # pyre-fixme[6]: For 2nd param expected `Union[bool, float, int]` but - # got `Tensor`. - torch.arange(0, n_mask_points), - min_idx, - ] - # pyre-fixme[6]: For 2nd param expected `Union[bool, float, int]` but got - # `Tensor`. - min_mask_dist = steps.reshape(-1, n)[torch.arange(0, n_mask_points), min_idx] - - return min_mask_points, min_mask_dist - - -# TODO: support variable origins -def _get_sphere_intersection( - cam_loc: torch.Tensor, ray_directions: torch.Tensor, r: float = 1.0 -) -> Tuple[torch.Tensor, torch.Tensor]: - # Input: n_images x 3 ; n_images x n_rays x 3 - # Output: n_images * n_rays x 2 (close and far) ; n_images * n_rays - - n_imgs, n_pix, _ = ray_directions.shape - device = cam_loc.device - - # cam_loc = cam_loc.unsqueeze(-1) - # ray_cam_dot = torch.bmm(ray_directions, cam_loc).squeeze() - ray_cam_dot = (ray_directions * cam_loc).sum(-1) # n_images x n_rays - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - under_sqrt = ray_cam_dot**2 - (cam_loc.norm(2, dim=-1) ** 2 - r**2) - - under_sqrt = under_sqrt.reshape(-1) - mask_intersect = under_sqrt > 0 - - sphere_intersections = torch.zeros(n_imgs * n_pix, 2, device=device) - sphere_intersections[mask_intersect] = torch.sqrt( - under_sqrt[mask_intersect] - ).unsqueeze(-1) * torch.tensor([-1.0, 1.0], device=device) - sphere_intersections[mask_intersect] -= ray_cam_dot.reshape(-1)[ - mask_intersect - ].unsqueeze(-1) - - sphere_intersections = sphere_intersections.reshape(n_imgs, n_pix, 2) - sphere_intersections = sphere_intersections.clamp_min(0.0) - mask_intersect = mask_intersect.reshape(n_imgs, n_pix) - - return sphere_intersections, mask_intersect diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/raymarcher.py b/pytorch3d/pytorch3d/implicitron/models/renderer/raymarcher.py deleted file mode 100644 index 9c6addf1aa78ab6523333ba451f758c8c7fe5415..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/renderer/raymarcher.py +++ /dev/null @@ -1,238 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Any, Callable, Dict, Optional, Tuple - -import torch -from pytorch3d.implicitron.models.renderer.base import RendererOutput -from pytorch3d.implicitron.tools.config import registry, ReplaceableBase -from pytorch3d.renderer.implicit.raymarching import _check_raymarcher_inputs - - -_TTensor = torch.Tensor - - -class RaymarcherBase(ReplaceableBase): - """ - Defines a base class for raymarchers. Specifically, a raymarcher is responsible - for taking a set of features and density descriptors along rendering rays - and marching along them in order to generate a feature render. - """ - - def forward( - self, - rays_densities: torch.Tensor, - rays_features: torch.Tensor, - aux: Dict[str, Any], - ) -> RendererOutput: - """ - Args: - rays_densities: Per-ray density values represented with a tensor - of shape `(..., n_points_per_ray, 1)`. - rays_features: Per-ray feature values represented with a tensor - of shape `(..., n_points_per_ray, feature_dim)`. - aux: a dictionary with extra information. - """ - raise NotImplementedError() - - -class AccumulativeRaymarcherBase(RaymarcherBase, torch.nn.Module): - """ - This generalizes the `pytorch3d.renderer.EmissionAbsorptionRaymarcher` - and NeuralVolumes' cumsum ray marcher. It additionally returns - the rendering weights that can be used in the NVS pipeline to carry out - the importance ray-sampling in the refining pass. - Different from `pytorch3d.renderer.EmissionAbsorptionRaymarcher`, it takes raw - (non-exponentiated) densities. - - Args: - surface_thickness: The thickness of the raymarched surface. - bg_color: The background color. A tuple of either 1 element or of D elements, - where D matches the feature dimensionality; it is broadcast when necessary. - replicate_last_interval: If True, the ray length assigned to the last interval - for the opacity delta calculation is copied from the penultimate interval. - background_opacity: The length over which the last raw opacity value - (i.e. before exponentiation) is considered to apply, for the delta - calculation. Ignored if replicate_last_interval=True. - density_relu: If `True`, passes the input density through ReLU before - raymarching. - blend_output: If `True`, alpha-blends the output renders with the - background color using the rendered opacity mask. - - capping_function: The capping function of the raymarcher. - Options: - - "exponential" (`cap_fn(x) = 1 - exp(-x)`) - - "cap1" (`cap_fn(x) = min(x, 1)`) - Set to "exponential" for the standard Emission Absorption raymarching. - weight_function: The weighting function of the raymarcher. - Options: - - "product" (`weight_fn(w, x) = w * x`) - - "minimum" (`weight_fn(w, x) = min(w, x)`) - Set to "product" for the standard Emission Absorption raymarching. - """ - - surface_thickness: int = 1 - bg_color: Tuple[float, ...] = (0.0,) - replicate_last_interval: bool = False - background_opacity: float = 0.0 - density_relu: bool = True - blend_output: bool = False - - @property - def capping_function_type(self) -> str: - raise NotImplementedError() - - @property - def weight_function_type(self) -> str: - raise NotImplementedError() - - def __post_init__(self): - """ - Args: - surface_thickness: Denotes the overlap between the absorption - function and the density function. - """ - bg_color = torch.tensor(self.bg_color) - if bg_color.ndim != 1: - raise ValueError(f"bg_color (shape {bg_color.shape}) should be a 1D tensor") - - self.register_buffer("_bg_color", bg_color, persistent=False) - - self._capping_function: Callable[[_TTensor], _TTensor] = { - "exponential": lambda x: 1.0 - torch.exp(-x), - "cap1": lambda x: x.clamp(max=1.0), - }[self.capping_function_type] - - self._weight_function: Callable[[_TTensor, _TTensor], _TTensor] = { - "product": lambda curr, acc: curr * acc, - "minimum": lambda curr, acc: torch.minimum(curr, acc), - }[self.weight_function_type] - - # pyre-fixme[14]: `forward` overrides method defined in `RaymarcherBase` - # inconsistently. - def forward( - self, - rays_densities: torch.Tensor, - rays_features: torch.Tensor, - aux: Dict[str, Any], - ray_lengths: torch.Tensor, - ray_deltas: Optional[torch.Tensor] = None, - density_noise_std: float = 0.0, - **kwargs, - ) -> RendererOutput: - """ - Args: - rays_densities: Per-ray density values represented with a tensor - of shape `(..., n_points_per_ray, 1)`. - rays_features: Per-ray feature values represented with a tensor - of shape `(..., n_points_per_ray, feature_dim)`. - aux: a dictionary with extra information. - ray_lengths: Per-ray depth values represented with a tensor - of shape `(..., n_points_per_ray, feature_dim)`. - ray_deltas: Optional differences between consecutive elements along the ray bundle - represented with a tensor of shape `(..., n_points_per_ray)`. If None, - these differences are computed from ray_lengths. - density_noise_std: the magnitude of the noise added to densities. - - Returns: - features: A tensor of shape `(..., feature_dim)` containing - the rendered features for each ray. - depth: A tensor of shape `(..., 1)` containing estimated depth. - opacities: A tensor of shape `(..., 1)` containing rendered opacities. - weights: A tensor of shape `(..., n_points_per_ray)` containing - the ray-specific non-negative opacity weights. In general, they - don't sum to 1 but do not overcome it, i.e. - `(weights.sum(dim=-1) <= 1.0).all()` holds. - """ - _check_raymarcher_inputs( - rays_densities, - rays_features, - ray_lengths, - z_can_be_none=True, - features_can_be_none=False, - density_1d=True, - ) - - if ray_deltas is None: - ray_lengths_diffs = torch.diff(ray_lengths, dim=-1) - if self.replicate_last_interval: - last_interval = ray_lengths_diffs[..., -1:] - else: - last_interval = torch.full_like( - ray_lengths[..., :1], self.background_opacity - ) - deltas = torch.cat((ray_lengths_diffs, last_interval), dim=-1) - else: - deltas = ray_deltas - - rays_densities = rays_densities[..., 0] - - if density_noise_std > 0.0: - noise: _TTensor = torch.randn_like(rays_densities).mul(density_noise_std) - rays_densities = rays_densities + noise - if self.density_relu: - rays_densities = torch.relu(rays_densities) - - weighted_densities = deltas * rays_densities - capped_densities = self._capping_function(weighted_densities) - - rays_opacities = self._capping_function( - torch.cumsum(weighted_densities, dim=-1) - ) - opacities = rays_opacities[..., -1:] - absorption_shifted = (-rays_opacities + 1.0).roll( - self.surface_thickness, dims=-1 - ) - absorption_shifted[..., : self.surface_thickness] = 1.0 - - weights = self._weight_function(capped_densities, absorption_shifted) - features = (weights[..., None] * rays_features).sum(dim=-2) - depth = (weights * ray_lengths)[..., None].sum(dim=-2) - - alpha = opacities if self.blend_output else 1 - if self._bg_color.shape[-1] not in [1, features.shape[-1]]: - raise ValueError("Wrong number of background color channels.") - features = alpha * features + (1 - opacities) * self._bg_color - - return RendererOutput( - features=features, - depths=depth, - masks=opacities, - weights=weights, - aux=aux, - ) - - -@registry.register -class EmissionAbsorptionRaymarcher(AccumulativeRaymarcherBase): - """ - Implements the EmissionAbsorption raymarcher. - """ - - background_opacity: float = 1e10 - - @property - def capping_function_type(self) -> str: - return "exponential" - - @property - def weight_function_type(self) -> str: - return "product" - - -@registry.register -class CumsumRaymarcher(AccumulativeRaymarcherBase): - """ - Implements the NeuralVolumes' cumulative-sum raymarcher. - """ - - @property - def capping_function_type(self) -> str: - return "cap1" - - @property - def weight_function_type(self) -> str: - return "minimum" diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/rgb_net.py b/pytorch3d/pytorch3d/implicitron/models/renderer/rgb_net.py deleted file mode 100644 index 6d41d2165b2c7769509925708b387c5db17137d2..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/renderer/rgb_net.py +++ /dev/null @@ -1,138 +0,0 @@ -# @lint-ignore-every LICENSELINT -# Adapted from RenderingNetwork from IDR -# https://github.com/lioryariv/idr/ -# Copyright (c) 2020 Lior Yariv - -import logging -from typing import List, Tuple - -import torch -from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle -from pytorch3d.implicitron.tools.config import enable_get_default_args -from pytorch3d.renderer.implicit import HarmonicEmbedding - -from torch import nn - - -logger = logging.getLogger(__name__) - - -class RayNormalColoringNetwork(torch.nn.Module): - """ - Members: - d_in and feature_vector_size: Sum of these is the input - dimension. These must add up to the sum of - - 3 [for the points] - - 3 unless mode=no_normal [for the normals] - - 3 unless mode=no_view_dir [for view directions] - - the feature size, [number of channels in feature_vectors] - - d_out: dimension of output. - mode: One of "idr", "no_view_dir" or "no_normal" to allow omitting - part of the network input. - dims: list of hidden layer sizes. - weight_norm: whether to apply weight normalization to each layer. - n_harmonic_functions_dir: - If >0, use a harmonic embedding with this number of - harmonic functions for the view direction. Otherwise view directions - are fed without embedding, unless mode is `no_view_dir`. - pooled_feature_dim: If a pooling function is in use (provided as - pooling_fn to forward()) this must be its number of features. - Otherwise this must be set to 0. (If used from GenericModel, - this will be set automatically.) - """ - - def __init__( - self, - feature_vector_size: int = 3, - mode: str = "idr", - d_in: int = 9, - d_out: int = 3, - dims: Tuple[int, ...] = (512, 512, 512, 512), - weight_norm: bool = True, - n_harmonic_functions_dir: int = 0, - pooled_feature_dim: int = 0, - ) -> None: - super().__init__() - - self.mode = mode - self.output_dimensions = d_out - dims_full: List[int] = [d_in + feature_vector_size] + list(dims) + [d_out] - - self.embedview_fn = None - if n_harmonic_functions_dir > 0: - self.embedview_fn = HarmonicEmbedding( - n_harmonic_functions_dir, append_input=True - ) - dims_full[0] += self.embedview_fn.get_output_dim() - 3 - - if pooled_feature_dim > 0: - logger.info("Pooled features in rendering network.") - dims_full[0] += pooled_feature_dim - - self.num_layers = len(dims_full) - - layers = [] - for layer_idx in range(self.num_layers - 1): - out_dim = dims_full[layer_idx + 1] - lin = nn.Linear(dims_full[layer_idx], out_dim) - - if weight_norm: - lin = nn.utils.weight_norm(lin) - - layers.append(lin) - self.linear_layers = torch.nn.ModuleList(layers) - - self.relu = nn.ReLU() - self.tanh = nn.Tanh() - - def forward( - self, - feature_vectors: torch.Tensor, - points, - normals, - ray_bundle: ImplicitronRayBundle, - masks=None, - pooling_fn=None, - ): - if masks is not None and not masks.any(): - return torch.zeros_like(normals) - - view_dirs = ray_bundle.directions - if masks is not None: - # in case of IDR, other outputs are passed here after applying the mask - view_dirs = view_dirs.reshape(view_dirs.shape[0], -1, 3)[ - :, masks.reshape(-1) - ] - - if self.embedview_fn is not None: - view_dirs = self.embedview_fn(view_dirs) - - if self.mode == "idr": - rendering_input = torch.cat( - [points, view_dirs, normals, feature_vectors], dim=-1 - ) - elif self.mode == "no_view_dir": - rendering_input = torch.cat([points, normals, feature_vectors], dim=-1) - elif self.mode == "no_normal": - rendering_input = torch.cat([points, view_dirs, feature_vectors], dim=-1) - else: - raise ValueError(f"Unsupported rendering mode: {self.mode}") - - if pooling_fn is not None: - featspool = pooling_fn(points[None])[0] - rendering_input = torch.cat((rendering_input, featspool), dim=-1) - - x = rendering_input - - for layer_idx in range(self.num_layers - 1): - x = self.linear_layers[layer_idx](x) - - if layer_idx < self.num_layers - 2: - x = self.relu(x) - - x = self.tanh(x) - return x - - -enable_get_default_args(RayNormalColoringNetwork) diff --git a/pytorch3d/pytorch3d/implicitron/models/renderer/sdf_renderer.py b/pytorch3d/pytorch3d/implicitron/models/renderer/sdf_renderer.py deleted file mode 100644 index 12e54b9d38f34e6c870abba2c302ca45fba89907..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/renderer/sdf_renderer.py +++ /dev/null @@ -1,274 +0,0 @@ -# @lint-ignore-every LICENSELINT -# Adapted from https://github.com/lioryariv/idr/blob/main/code/model/ -# implicit_differentiable_renderer.py -# Copyright (c) 2020 Lior Yariv -import functools -from typing import List, Optional, Tuple - -import torch -from omegaconf import DictConfig -from pytorch3d.common.compat import prod -from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle -from pytorch3d.implicitron.tools.config import ( - get_default_args_field, - registry, - run_auto_creation, -) -from pytorch3d.implicitron.tools.utils import evaluating - -from .base import BaseRenderer, EvaluationMode, ImplicitFunctionWrapper, RendererOutput -from .ray_tracing import RayTracing -from .rgb_net import RayNormalColoringNetwork - - -@registry.register -class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module): # pyre-ignore[13] - render_features_dimensions: int = 3 - object_bounding_sphere: float = 1.0 - ray_tracer: RayTracing - ray_normal_coloring_network_args: DictConfig = get_default_args_field( - RayNormalColoringNetwork - ) - bg_color: Tuple[float, ...] = (0.0,) - soft_mask_alpha: float = 50.0 - - def __post_init__( - self, - ): - render_features_dimensions = self.render_features_dimensions - if len(self.bg_color) not in [1, render_features_dimensions]: - raise ValueError( - f"Background color should have {render_features_dimensions} entries." - ) - - run_auto_creation(self) - - self.ray_normal_coloring_network_args[ - "feature_vector_size" - ] = render_features_dimensions - self._rgb_network = RayNormalColoringNetwork( - **self.ray_normal_coloring_network_args - ) - - self.register_buffer("_bg_color", torch.tensor(self.bg_color), persistent=False) - - @classmethod - def ray_tracer_tweak_args(cls, type, args: DictConfig) -> None: - del args["object_bounding_sphere"] - - def create_ray_tracer(self) -> None: - self.ray_tracer = RayTracing( - **self.ray_tracer_args, - object_bounding_sphere=self.object_bounding_sphere, - ) - - def requires_object_mask(self) -> bool: - return True - - def forward( - self, - ray_bundle: ImplicitronRayBundle, - implicit_functions: List[ImplicitFunctionWrapper], - evaluation_mode: EvaluationMode = EvaluationMode.EVALUATION, - object_mask: Optional[torch.Tensor] = None, - **kwargs, - ) -> RendererOutput: - """ - Args: - ray_bundle: A `ImplicitronRayBundle` object containing the parametrizations of the - sampled rendering rays. - implicit_functions: single element list of ImplicitFunctionWrappers which - defines the implicit function to be used. - evaluation_mode: one of EvaluationMode.TRAINING or - EvaluationMode.EVALUATION which determines the settings used for - rendering. - kwargs: - object_mask: BoolTensor, denoting the silhouette of the object. - This is a required keyword argument for SignedDistanceFunctionRenderer - - Returns: - instance of RendererOutput - """ - if len(implicit_functions) != 1: - raise ValueError( - "SignedDistanceFunctionRenderer supports only single pass." - ) - - if object_mask is None: - raise ValueError("Expected object_mask to be provided in the kwargs") - object_mask = object_mask.bool() - - implicit_function = implicit_functions[0] - implicit_function_gradient = functools.partial(_gradient, implicit_function) - - # object_mask: silhouette of the object - batch_size, *spatial_size, _ = ray_bundle.lengths.shape - num_pixels = prod(spatial_size) - - cam_loc = ray_bundle.origins.reshape(batch_size, -1, 3) - ray_dirs = ray_bundle.directions.reshape(batch_size, -1, 3) - object_mask = object_mask.reshape(batch_size, -1) - - with torch.no_grad(), evaluating(implicit_function): - points, network_object_mask, dists = self.ray_tracer( - sdf=lambda x: implicit_function(rays_points_world=x)[ - :, 0 - ], # TODO: get rid of this wrapper - cam_loc=cam_loc, - object_mask=object_mask, - ray_directions=ray_dirs, - ) - - # TODO: below, cam_loc might as well be different - depth = dists.reshape(batch_size, num_pixels, 1) - points = (cam_loc + depth * ray_dirs).reshape(-1, 3) - - sdf_output = implicit_function(rays_points_world=points)[:, 0:1] - # NOTE most of the intermediate variables are flattened for - # no apparent reason (here and in the ray tracer) - ray_dirs = ray_dirs.reshape(-1, 3) - object_mask = object_mask.reshape(-1) - - # TODO: move it to loss computation - if evaluation_mode == EvaluationMode.TRAINING: - surface_mask = network_object_mask & object_mask - surface_points = points[surface_mask] - surface_dists = dists[surface_mask].unsqueeze(-1) - surface_ray_dirs = ray_dirs[surface_mask] - surface_cam_loc = cam_loc.reshape(-1, 3)[surface_mask] - surface_output = sdf_output[surface_mask] - N = surface_points.shape[0] - - # Sample points for the eikonal loss - eik_bounding_box: float = self.object_bounding_sphere - n_eik_points = batch_size * num_pixels // 2 - eikonal_points = torch.empty( - n_eik_points, - 3, - # but got `Union[device, Tensor, Module]`. - device=self._bg_color.device, - ).uniform_(-eik_bounding_box, eik_bounding_box) - eikonal_pixel_points = points.clone() - eikonal_pixel_points = eikonal_pixel_points.detach() - eikonal_points = torch.cat([eikonal_points, eikonal_pixel_points], 0) - - points_all = torch.cat([surface_points, eikonal_points], dim=0) - - output = implicit_function(rays_points_world=surface_points) - surface_sdf_values = output[ - :N, 0:1 - ].detach() # how is it different from sdf_output? - - g = implicit_function_gradient(points_all) - surface_points_grad = g[:N, 0, :].clone().detach() - grad_theta = g[N:, 0, :] - - differentiable_surface_points = _sample_network( - surface_output, - surface_sdf_values, - surface_points_grad, - surface_dists, - surface_cam_loc, - surface_ray_dirs, - ) - - else: - surface_mask = network_object_mask - differentiable_surface_points = points[surface_mask] - grad_theta = None - - empty_render = differentiable_surface_points.shape[0] == 0 - features = implicit_function(rays_points_world=differentiable_surface_points)[ - None, :, 1: - ] - normals_full = features.new_zeros( - batch_size, *spatial_size, 3, requires_grad=empty_render - ) - render_full = ( - features.new_ones( - batch_size, - *spatial_size, - self.render_features_dimensions, - requires_grad=empty_render, - ) - * self._bg_color - ) - mask_full = features.new_ones( - batch_size, *spatial_size, 1, requires_grad=empty_render - ) - if not empty_render: - normals = implicit_function_gradient(differentiable_surface_points)[ - None, :, 0, : - ] - normals_full.view(-1, 3)[surface_mask] = normals - render_full.view(-1, self.render_features_dimensions)[ - surface_mask - ] = self._rgb_network( - features, - differentiable_surface_points[None], - normals, - ray_bundle, - surface_mask[None, :, None], - pooling_fn=None, # TODO - ) - mask_full.view(-1, 1)[~surface_mask] = torch.sigmoid( - # pyre-fixme[6]: For 1st param expected `Tensor` but got `float`. - -self.soft_mask_alpha - * sdf_output[~surface_mask] - ) - - # scatter points with surface_mask - points_full = ray_bundle.origins.detach().clone() - points_full.view(-1, 3)[surface_mask] = differentiable_surface_points - - # TODO: it is sparse here but otherwise dense - return RendererOutput( - features=render_full, - normals=normals_full, - depths=depth.reshape(batch_size, *spatial_size, 1), - masks=mask_full, # this is a differentiable approximation, see (7) in the paper - points=points_full, - aux={"grad_theta": grad_theta}, # TODO: will be moved to eikonal loss - # TODO: do we need sdf_output, grad_theta? Only for loss probably - ) - - -def _sample_network( - surface_output, - surface_sdf_values, - surface_points_grad, - surface_dists, - surface_cam_loc, - surface_ray_dirs, - eps: float = 1e-4, -): - # t -> t(theta) - surface_ray_dirs_0 = surface_ray_dirs.detach() - surface_points_dot = torch.bmm( - surface_points_grad.view(-1, 1, 3), surface_ray_dirs_0.view(-1, 3, 1) - ).squeeze(-1) - dot_sign = (surface_points_dot >= 0).to(surface_points_dot) * 2 - 1 - surface_dists_theta = surface_dists - (surface_output - surface_sdf_values) / ( - surface_points_dot.abs().clip(eps) * dot_sign - ) - - # t(theta) -> x(theta,c,v) - surface_points_theta_c_v = surface_cam_loc + surface_dists_theta * surface_ray_dirs - - return surface_points_theta_c_v - - -@torch.enable_grad() -def _gradient(module, rays_points_world): - rays_points_world.requires_grad_(True) - y = module.forward(rays_points_world=rays_points_world)[:, :1] - d_output = torch.ones_like(y, requires_grad=False, device=y.device) - gradients = torch.autograd.grad( - outputs=y, - inputs=rays_points_world, - grad_outputs=d_output, - create_graph=True, - retain_graph=True, - only_inputs=True, - )[0] - return gradients.unsqueeze(1) diff --git a/pytorch3d/pytorch3d/implicitron/models/utils.py b/pytorch3d/pytorch3d/implicitron/models/utils.py deleted file mode 100644 index b2f7dc668c22d6bb37cb08ff023c4cb23418e283..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/utils.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -# Note: The #noqa comments below are for unused imports of pluggable implementations -# which are part of implicitron. They ensure that the registry is prepopulated. - -import warnings -from logging import Logger -from typing import Any, Dict, Optional, Tuple - -import torch -import tqdm -from pytorch3d.common.compat import prod - -from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle - -from pytorch3d.implicitron.tools import image_utils - -from pytorch3d.implicitron.tools.utils import cat_dataclass - - -def preprocess_input( - image_rgb: Optional[torch.Tensor], - fg_probability: Optional[torch.Tensor], - depth_map: Optional[torch.Tensor], - mask_images: bool, - mask_depths: bool, - mask_threshold: float, - bg_color: Tuple[float, float, float], -) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: - """ - Helper function to preprocess the input images and optional depth maps - to apply masking if required. - - Args: - image_rgb: A tensor of shape `(B, 3, H, W)` containing a batch of rgb images - corresponding to the source viewpoints from which features will be extracted - fg_probability: A tensor of shape `(B, 1, H, W)` containing a batch - of foreground masks with values in [0, 1]. - depth_map: A tensor of shape `(B, 1, H, W)` containing a batch of depth maps. - mask_images: Whether or not to mask the RGB image background given the - foreground mask (the `fg_probability` argument of `GenericModel.forward`) - mask_depths: Whether or not to mask the depth image background given the - foreground mask (the `fg_probability` argument of `GenericModel.forward`) - mask_threshold: If greater than 0.0, the foreground mask is - thresholded by this value before being applied to the RGB/Depth images - bg_color: RGB values for setting the background color of input image - if mask_images=True. Defaults to (0.0, 0.0, 0.0). Each renderer has its own - way to determine the background color of its output, unrelated to this. - - Returns: - Modified image_rgb, fg_mask, depth_map - """ - if image_rgb is not None and image_rgb.ndim == 3: - # The FrameData object is used for both frames and batches of frames, - # and a user might get this error if those were confused. - # Perhaps a user has a FrameData `fd` representing a single frame and - # wrote something like `model(**fd)` instead of - # `model(**fd.collate([fd]))`. - raise ValueError( - "Model received unbatched inputs. " - + "Perhaps they came from a FrameData which had not been collated." - ) - - fg_mask = fg_probability - if fg_mask is not None and mask_threshold > 0.0: - # threshold masks - warnings.warn("Thresholding masks!") - fg_mask = (fg_mask >= mask_threshold).type_as(fg_mask) - - if mask_images and fg_mask is not None and image_rgb is not None: - # mask the image - warnings.warn("Masking images!") - image_rgb = image_utils.mask_background( - image_rgb, fg_mask, dim_color=1, bg_color=torch.tensor(bg_color) - ) - - if mask_depths and fg_mask is not None and depth_map is not None: - # mask the depths - assert ( - mask_threshold > 0.0 - ), "Depths should be masked only with thresholded masks" - warnings.warn("Masking depths!") - depth_map = depth_map * fg_mask - - return image_rgb, fg_mask, depth_map - - -def log_loss_weights(loss_weights: Dict[str, float], logger: Logger) -> None: - """ - Print a table of the loss weights. - """ - loss_weights_message = ( - "-------\nloss_weights:\n" - + "\n".join(f"{k:40s}: {w:1.2e}" for k, w in loss_weights.items()) - + "-------" - ) - logger.info(loss_weights_message) - - -def weighted_sum_losses( - preds: Dict[str, torch.Tensor], loss_weights: Dict[str, float] -) -> Optional[torch.Tensor]: - """ - A helper function to compute the overall loss as the dot product - of individual loss functions with the corresponding weights. - """ - losses_weighted = [ - preds[k] * float(w) - for k, w in loss_weights.items() - if (k in preds and w != 0.0) - ] - if len(losses_weighted) == 0: - warnings.warn("No main objective found.") - return None - loss = sum(losses_weighted) - assert torch.is_tensor(loss) - # pyre-fixme[7]: Expected `Optional[Tensor]` but got `int`. - return loss - - -def apply_chunked(func, chunk_generator, tensor_collator): - """ - Helper function to apply a function on a sequence of - chunked inputs yielded by a generator and collate - the result. - """ - processed_chunks = [ - func(*chunk_args, **chunk_kwargs) - for chunk_args, chunk_kwargs in chunk_generator - ] - - return cat_dataclass(processed_chunks, tensor_collator) - - -def chunk_generator( - chunk_size: int, - ray_bundle: ImplicitronRayBundle, - chunked_inputs: Dict[str, torch.Tensor], - tqdm_trigger_threshold: int, - *args, - **kwargs, -): - """ - Helper function which yields chunks of rays from the - input ray_bundle, to be used when the number of rays is - large and will not fit in memory for rendering. - """ - ( - batch_size, - *spatial_dim, - n_pts_per_ray, - ) = ray_bundle.lengths.shape # B x ... x n_pts_per_ray - if n_pts_per_ray > 0 and chunk_size % n_pts_per_ray != 0: - raise ValueError( - f"chunk_size_grid ({chunk_size}) should be divisible " - f"by n_pts_per_ray ({n_pts_per_ray})" - ) - - n_rays = prod(spatial_dim) - # special handling for raytracing-based methods - n_chunks = -(-n_rays * max(n_pts_per_ray, 1) // chunk_size) - chunk_size_in_rays = -(-n_rays // n_chunks) - - iter = range(0, n_rays, chunk_size_in_rays) - if len(iter) >= tqdm_trigger_threshold: - iter = tqdm.tqdm(iter) - - def _safe_slice( - tensor: Optional[torch.Tensor], start_idx: int, end_idx: int - ) -> Any: - return tensor[start_idx:end_idx] if tensor is not None else None - - for start_idx in iter: - end_idx = min(start_idx + chunk_size_in_rays, n_rays) - bins = ( - None - if ray_bundle.bins is None - else ray_bundle.bins.reshape(batch_size, n_rays, n_pts_per_ray + 1)[ - :, start_idx:end_idx - ] - ) - pixel_radii_2d = ( - None - if ray_bundle.pixel_radii_2d is None - else ray_bundle.pixel_radii_2d.reshape(batch_size, -1, 1)[ - :, start_idx:end_idx - ] - ) - ray_bundle_chunk = ImplicitronRayBundle( - origins=ray_bundle.origins.reshape(batch_size, -1, 3)[:, start_idx:end_idx], - directions=ray_bundle.directions.reshape(batch_size, -1, 3)[ - :, start_idx:end_idx - ], - lengths=ray_bundle.lengths.reshape(batch_size, n_rays, n_pts_per_ray)[ - :, start_idx:end_idx - ], - xys=ray_bundle.xys.reshape(batch_size, -1, 2)[:, start_idx:end_idx], - bins=bins, - pixel_radii_2d=pixel_radii_2d, - camera_ids=_safe_slice(ray_bundle.camera_ids, start_idx, end_idx), - camera_counts=_safe_slice(ray_bundle.camera_counts, start_idx, end_idx), - ) - extra_args = kwargs.copy() - for k, v in chunked_inputs.items(): - extra_args[k] = v.flatten(2)[:, :, start_idx:end_idx] - yield [ray_bundle_chunk, *args], extra_args diff --git a/pytorch3d/pytorch3d/implicitron/models/view_pooler/__init__.py b/pytorch3d/pytorch3d/implicitron/models/view_pooler/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/view_pooler/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/pytorch3d/implicitron/models/view_pooler/feature_aggregator.py b/pytorch3d/pytorch3d/implicitron/models/view_pooler/feature_aggregator.py deleted file mode 100644 index bd9817393f0509ecc560c46f694f8c37804c1d3f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/view_pooler/feature_aggregator.py +++ /dev/null @@ -1,687 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from abc import ABC, abstractmethod -from enum import Enum -from typing import Dict, Optional, Sequence, Tuple, Union - -import torch -import torch.nn.functional as F -from pytorch3d.implicitron.models.view_pooler.view_sampler import ( - cameras_points_cartesian_product, -) -from pytorch3d.implicitron.tools.config import registry, ReplaceableBase -from pytorch3d.ops import wmean -from pytorch3d.renderer.cameras import CamerasBase - - -class ReductionFunction(Enum): - AVG = "avg" # simple average - MAX = "max" # maximum - STD = "std" # standard deviation - STD_AVG = "std_avg" # average of per-dimension standard deviations - - -class FeatureAggregatorBase(ABC, ReplaceableBase): - """ - Base class for aggregating features. - - Typically, the aggregated features and their masks are output by `ViewSampler` - which samples feature tensors extracted from a set of source images. - - Settings: - exclude_target_view: If `True`/`False`, enables/disables pooling - from target view to itself. - exclude_target_view_mask_features: If `True`, - mask the features from the target view before aggregation - concatenate_output: If `True`, - concatenate the aggregated features into a single tensor, - otherwise return a dictionary mapping feature names to tensors. - """ - - exclude_target_view: bool = True - exclude_target_view_mask_features: bool = True - concatenate_output: bool = True - - @abstractmethod - def forward( - self, - feats_sampled: Dict[str, torch.Tensor], - masks_sampled: torch.Tensor, - camera: Optional[CamerasBase] = None, - pts: Optional[torch.Tensor] = None, - **kwargs, - ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: - """ - Args: - feats_sampled: A `dict` of sampled feature tensors `{f_i: t_i}`, - where each `t_i` is a tensor of shape - `(minibatch, n_source_views, n_samples, dim_i)`. - masks_sampled: A binary mask represented as a tensor of shape - `(minibatch, n_source_views, n_samples, 1)` denoting valid - sampled features. - camera: A batch of `n_source_views` `CamerasBase` objects corresponding - to the source view cameras. - pts: A tensor of shape `(minibatch, n_samples, 3)` denoting the - 3D points whose 2D projections to source views were sampled in - order to generate `feats_sampled` and `masks_sampled`. - - Returns: - feats_aggregated: If `concatenate_output==True`, a tensor - of shape `(minibatch, reduce_dim, n_samples, sum(dim_1, ... dim_N))` - containing the concatenation of the aggregated features `feats_sampled`. - `reduce_dim` depends on the specific feature aggregator - implementation and typically equals 1 or `n_source_views`. - If `concatenate_output==False`, the aggregator does not concatenate - the aggregated features and returns a dictionary of per-feature - aggregations `{f_i: t_i_aggregated}` instead. Each `t_i_aggregated` - is of shape `(minibatch, reduce_dim, n_samples, aggr_dim_i)`. - """ - raise NotImplementedError() - - @abstractmethod - def get_aggregated_feature_dim( - self, feats_or_feats_dim: Union[Dict[str, torch.Tensor], int] - ): - """ - Returns the final dimensionality of the output aggregated features. - - Args: - feats_or_feats_dim: Either a `dict` of sampled features `{f_i: t_i}` corresponding - to the `feats_sampled` argument of `forward`, - or an `int` representing the sum of dimensionalities of each `t_i`. - - Returns: - aggregated_feature_dim: The final dimensionality of the output - aggregated features. - """ - raise NotImplementedError() - - def has_aggregation(self) -> bool: - """ - Specifies whether the aggregator reduces the output `reduce_dim` dimension to 1. - - Returns: - has_aggregation: `True` if `reduce_dim==1`, else `False`. - """ - return hasattr(self, "reduction_functions") - - -@registry.register -class IdentityFeatureAggregator(torch.nn.Module, FeatureAggregatorBase): - """ - This aggregator does not perform any feature aggregation. Depending on the - settings the aggregator allows to mask target view features and concatenate - the outputs. - """ - - def get_aggregated_feature_dim( - self, feats_or_feats_dim: Union[Dict[str, torch.Tensor], int] - ): - return _get_reduction_aggregator_feature_dim(feats_or_feats_dim, []) - - def forward( - self, - feats_sampled: Dict[str, torch.Tensor], - masks_sampled: torch.Tensor, - camera: Optional[CamerasBase] = None, - pts: Optional[torch.Tensor] = None, - **kwargs, - ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: - """ - Args: - feats_sampled: A `dict` of sampled feature tensors `{f_i: t_i}`, - where each `t_i` is a tensor of shape - `(minibatch, n_source_views, n_samples, dim_i)`. - masks_sampled: A binary mask represented as a tensor of shape - `(minibatch, n_source_views, n_samples, 1)` denoting valid - sampled features. - camera: A batch of `n_source_views` `CamerasBase` objects - corresponding to the source view cameras. - pts: A tensor of shape `(minibatch, n_samples, 3)` denoting the - 3D points whose 2D projections to source views were sampled in - order to generate `feats_sampled` and `masks_sampled`. - - Returns: - feats_aggregated: If `concatenate_output==True`, a tensor - of shape `(minibatch, 1, n_samples, sum(dim_1, ... dim_N))`. - If `concatenate_output==False`, a dictionary `{f_i: t_i_aggregated}` - with each `t_i_aggregated` of shape - `(minibatch, n_source_views, n_samples, dim_i)`. - """ - if self.exclude_target_view_mask_features: - feats_sampled = _mask_target_view_features(feats_sampled) - feats_aggregated = feats_sampled - if self.concatenate_output: - feats_aggregated = torch.cat(tuple(feats_aggregated.values()), dim=-1) - return feats_aggregated - - -@registry.register -class ReductionFeatureAggregator(torch.nn.Module, FeatureAggregatorBase): - """ - Aggregates using a set of predefined `reduction_functions` and concatenates - the results of each aggregation function along the - channel dimension. The reduction functions singularize the second dimension - of the sampled features which stacks the source views. - - Settings: - reduction_functions: A list of `ReductionFunction`s` that reduce the - the stack of source-view-specific features to a single feature. - """ - - reduction_functions: Tuple[ReductionFunction, ...] = ( - ReductionFunction.AVG, - ReductionFunction.STD, - ) - - def get_aggregated_feature_dim( - self, feats_or_feats_dim: Union[Dict[str, torch.Tensor], int] - ): - return _get_reduction_aggregator_feature_dim( - feats_or_feats_dim, self.reduction_functions - ) - - def forward( - self, - feats_sampled: Dict[str, torch.Tensor], - masks_sampled: torch.Tensor, - camera: Optional[CamerasBase] = None, - pts: Optional[torch.Tensor] = None, - **kwargs, - ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: - """ - Args: - feats_sampled: A `dict` of sampled feature tensors `{f_i: t_i}`, - where each `t_i` is a tensor of shape - `(minibatch, n_source_views, n_samples, dim_i)`. - masks_sampled: A binary mask represented as a tensor of shape - `(minibatch, n_source_views, n_samples, 1)` denoting valid - sampled features. - camera: A batch of `n_source_views` `CamerasBase` objects corresponding - to the source view cameras. - pts: A tensor of shape `(minibatch, n_samples, 3)` denoting the - 3D points whose 2D projections to source views were sampled in - order to generate `feats_sampled` and `masks_sampled`. - - Returns: - feats_aggregated: If `concatenate_output==True`, a tensor - of shape `(minibatch, 1, n_samples, sum(dim_1, ... dim_N))`. - If `concatenate_output==False`, a dictionary `{f_i: t_i_aggregated}` - with each `t_i_aggregated` of shape `(minibatch, 1, n_samples, aggr_dim_i)`. - """ - - pts_batch, n_cameras = masks_sampled.shape[:2] - if self.exclude_target_view_mask_features: - feats_sampled = _mask_target_view_features(feats_sampled) - sampling_mask = _get_view_sampling_mask( - n_cameras, - pts_batch, - masks_sampled.device, - self.exclude_target_view, - ) - aggr_weigths = masks_sampled[..., 0] * sampling_mask[..., None] - feats_aggregated = { - k: _avgmaxstd_reduction_function( - f, - aggr_weigths, - dim=1, - reduction_functions=self.reduction_functions, - ) - for k, f in feats_sampled.items() - } - if self.concatenate_output: - feats_aggregated = torch.cat(tuple(feats_aggregated.values()), dim=-1) - return feats_aggregated - - -@registry.register -class AngleWeightedReductionFeatureAggregator(torch.nn.Module, FeatureAggregatorBase): - """ - Performs a weighted aggregation using a set of predefined `reduction_functions` - and concatenates the results of each aggregation function along the - channel dimension. The weights are proportional to the cosine of the - angle between the target ray and the source ray:: - - weight = ( - dot(target_ray, source_ray) * 0.5 + 0.5 + self.min_ray_angle_weight - )**self.weight_by_ray_angle_gamma - - The reduction functions singularize the second dimension - of the sampled features which stacks the source views. - - Settings: - reduction_functions: A list of `ReductionFunction`s that reduce the - the stack of source-view-specific features to a single feature. - min_ray_angle_weight: The minimum possible aggregation weight - before rasising to the power of `self.weight_by_ray_angle_gamma`. - weight_by_ray_angle_gamma: The exponent of the cosine of the ray angles - used when calculating the angle-based aggregation weights. - """ - - reduction_functions: Tuple[ReductionFunction, ...] = ( - ReductionFunction.AVG, - ReductionFunction.STD, - ) - weight_by_ray_angle_gamma: float = 1.0 - min_ray_angle_weight: float = 0.1 - - def get_aggregated_feature_dim( - self, feats_or_feats_dim: Union[Dict[str, torch.Tensor], int] - ): - return _get_reduction_aggregator_feature_dim( - feats_or_feats_dim, self.reduction_functions - ) - - def forward( - self, - feats_sampled: Dict[str, torch.Tensor], - masks_sampled: torch.Tensor, - camera: Optional[CamerasBase] = None, - pts: Optional[torch.Tensor] = None, - **kwargs, - ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: - """ - Args: - feats_sampled: A `dict` of sampled feature tensors `{f_i: t_i}`, - where each `t_i` is a tensor of shape - `(minibatch, n_source_views, n_samples, dim_i)`. - masks_sampled: A binary mask represented as a tensor of shape - `(minibatch, n_source_views, n_samples, 1)` denoting valid - sampled features. - camera: A batch of `n_source_views` `CamerasBase` objects - corresponding to the source view cameras. - pts: A tensor of shape `(minibatch, n_samples, 3)` denoting the - 3D points whose 2D projections to source views were sampled in - order to generate `feats_sampled` and `masks_sampled`. - - Returns: - feats_aggregated: If `concatenate_output==True`, a tensor - of shape `(minibatch, 1, n_samples, sum(dim_1, ... dim_N))`. - If `concatenate_output==False`, a dictionary `{f_i: t_i_aggregated}` - with each `t_i_aggregated` of shape - `(minibatch, n_source_views, n_samples, dim_i)`. - """ - - if camera is None: - raise ValueError("camera cannot be None for angle weighted aggregation") - - if pts is None: - raise ValueError("Points cannot be None for angle weighted aggregation") - - pts_batch, n_cameras = masks_sampled.shape[:2] - if self.exclude_target_view_mask_features: - feats_sampled = _mask_target_view_features(feats_sampled) - view_sampling_mask = _get_view_sampling_mask( - n_cameras, - pts_batch, - masks_sampled.device, - self.exclude_target_view, - ) - aggr_weights = _get_angular_reduction_weights( - view_sampling_mask, - masks_sampled, - camera, - pts, - self.min_ray_angle_weight, - self.weight_by_ray_angle_gamma, - ) - assert torch.isfinite(aggr_weights).all() - feats_aggregated = { - k: _avgmaxstd_reduction_function( - f, - aggr_weights, - dim=1, - reduction_functions=self.reduction_functions, - ) - for k, f in feats_sampled.items() - } - if self.concatenate_output: - feats_aggregated = torch.cat(tuple(feats_aggregated.values()), dim=-1) - return feats_aggregated - - -@registry.register -class AngleWeightedIdentityFeatureAggregator(torch.nn.Module, FeatureAggregatorBase): - """ - This aggregator does not perform any feature aggregation. It only weights - the features by the weights proportional to the cosine of the - angle between the target ray and the source ray:: - - weight = ( - dot(target_ray, source_ray) * 0.5 + 0.5 + self.min_ray_angle_weight - )**self.weight_by_ray_angle_gamma - - Settings: - min_ray_angle_weight: The minimum possible aggregation weight - before rasising to the power of `self.weight_by_ray_angle_gamma`. - weight_by_ray_angle_gamma: The exponent of the cosine of the ray angles - used when calculating the angle-based aggregation weights. - - Additionally the aggregator allows to mask target view features and to concatenate - the outputs. - """ - - weight_by_ray_angle_gamma: float = 1.0 - min_ray_angle_weight: float = 0.1 - - def get_aggregated_feature_dim( - self, feats_or_feats_dim: Union[Dict[str, torch.Tensor], int] - ): - return _get_reduction_aggregator_feature_dim(feats_or_feats_dim, []) - - def forward( - self, - feats_sampled: Dict[str, torch.Tensor], - masks_sampled: torch.Tensor, - camera: Optional[CamerasBase] = None, - pts: Optional[torch.Tensor] = None, - **kwargs, - ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: - """ - Args: - feats_sampled: A `dict` of sampled feature tensors `{f_i: t_i}`, - where each `t_i` is a tensor of shape - `(minibatch, n_source_views, n_samples, dim_i)`. - masks_sampled: A binary mask represented as a tensor of shape - `(minibatch, n_source_views, n_samples, 1)` denoting valid - sampled features. - camera: A batch of `n_source_views` `CamerasBase` objects corresponding - to the source view cameras. - pts: A tensor of shape `(minibatch, n_samples, 3)` denoting the - 3D points whose 2D projections to source views were sampled in - order to generate `feats_sampled` and `masks_sampled`. - - Returns: - feats_aggregated: If `concatenate_output==True`, a tensor - of shape `(minibatch, n_source_views, n_samples, sum(dim_1, ... dim_N))`. - If `concatenate_output==False`, a dictionary `{f_i: t_i_aggregated}` - with each `t_i_aggregated` of shape - `(minibatch, n_source_views, n_samples, dim_i)`. - """ - - if camera is None: - raise ValueError("camera cannot be None for angle weighted aggregation") - - if pts is None: - raise ValueError("Points cannot be None for angle weighted aggregation") - - pts_batch, n_cameras = masks_sampled.shape[:2] - if self.exclude_target_view_mask_features: - feats_sampled = _mask_target_view_features(feats_sampled) - view_sampling_mask = _get_view_sampling_mask( - n_cameras, - pts_batch, - masks_sampled.device, - self.exclude_target_view, - ) - aggr_weights = _get_angular_reduction_weights( - view_sampling_mask, - masks_sampled, - camera, - pts, - self.min_ray_angle_weight, - self.weight_by_ray_angle_gamma, - ) - feats_aggregated = { - k: f * aggr_weights[..., None] for k, f in feats_sampled.items() - } - if self.concatenate_output: - feats_aggregated = torch.cat(tuple(feats_aggregated.values()), dim=-1) - return feats_aggregated - - -def _get_reduction_aggregator_feature_dim( - feats_or_feats_dim: Union[Dict[str, torch.Tensor], int], - reduction_functions: Sequence[ReductionFunction], -) -> int: - if isinstance(feats_or_feats_dim, int): - feat_dim = feats_or_feats_dim - else: - feat_dim = int(sum(f.shape[1] for f in feats_or_feats_dim.values())) - if len(reduction_functions) == 0: - return feat_dim - return sum( - _get_reduction_function_output_dim( - reduction_function, - feat_dim, - ) - for reduction_function in reduction_functions - ) - - -def _get_reduction_function_output_dim( - reduction_function: ReductionFunction, - feat_dim: int, -) -> int: - if reduction_function == ReductionFunction.STD_AVG: - return 1 - else: - return feat_dim - - -def _get_view_sampling_mask( - n_cameras: int, - pts_batch: int, - device: Union[str, torch.device], - exclude_target_view: bool, -): - return ( - -torch.eye(n_cameras, device=device, dtype=torch.float32) - * float(exclude_target_view) - + 1.0 - )[:pts_batch] - - -def _mask_target_view_features( - feats_sampled: Dict[str, torch.Tensor], -): - # mask out the sampled features to be sure we dont use them - # anywhere later - one_feature_sampled = next(iter(feats_sampled.values())) - pts_batch, n_cameras = one_feature_sampled.shape[:2] - view_sampling_mask = _get_view_sampling_mask( - n_cameras, - pts_batch, - one_feature_sampled.device, - True, - ) - view_sampling_mask = view_sampling_mask.view( - pts_batch, n_cameras, *([1] * (one_feature_sampled.ndim - 2)) - ) - return {k: f * view_sampling_mask for k, f in feats_sampled.items()} - - -def _get_angular_reduction_weights( - view_sampling_mask: torch.Tensor, - masks_sampled: torch.Tensor, - camera: CamerasBase, - pts: torch.Tensor, - min_ray_angle_weight: float, - weight_by_ray_angle_gamma: float, -): - aggr_weights = masks_sampled.clone()[..., 0] - assert not any(v is None for v in [camera, pts]) - angle_weight = _get_ray_angle_weights( - camera, - pts, - min_ray_angle_weight, - weight_by_ray_angle_gamma, - ) - assert torch.isfinite(angle_weight).all() - # multiply the final aggr weights with ray angles - view_sampling_mask = view_sampling_mask.view( - *view_sampling_mask.shape[:2], *([1] * (aggr_weights.ndim - 2)) - ) - aggr_weights = ( - aggr_weights * angle_weight.reshape_as(aggr_weights) * view_sampling_mask - ) - return aggr_weights - - -def _get_ray_dir_dot_prods(camera: CamerasBase, pts: torch.Tensor): - n_cameras = camera.R.shape[0] - pts_batch = pts.shape[0] - - camera_rep, pts_rep = cameras_points_cartesian_product(camera, pts) - - # does not produce nans randomly unlike get_camera_center() below - cam_centers_rep = -torch.bmm( - camera_rep.T[:, None], - camera_rep.R.permute(0, 2, 1), - ).reshape(-1, *([1] * (pts.ndim - 2)), 3) - # cam_centers_rep = camera_rep.get_camera_center().reshape( - # -1, *([1]*(pts.ndim - 2)), 3 - # ) - - ray_dirs = F.normalize(pts_rep - cam_centers_rep, dim=-1) - # camera_rep = [ pts_rep = [ - # camera[0] pts[0], - # camera[0] pts[1], - # camera[0] ..., - # ... pts[batch_pts-1], - # camera[1] pts[0], - # camera[1] pts[1], - # camera[1] ..., - # ... pts[batch_pts-1], - # ... ..., - # camera[n_cameras-1] pts[0], - # camera[n_cameras-1] pts[1], - # camera[n_cameras-1] ..., - # ... pts[batch_pts-1], - # ] ] - - ray_dirs_reshape = ray_dirs.view(n_cameras, pts_batch, -1, 3) - # [ - # [pts_0 in cam_0, pts_1 in cam_0, ..., pts_m in cam_0], - # [pts_0 in cam_1, pts_1 in cam_1, ..., pts_m in cam_1], - # ... - # [pts_0 in cam_n, pts_1 in cam_n, ..., pts_m in cam_n], - # ] - - ray_dirs_pts = torch.stack([ray_dirs_reshape[i, i] for i in range(pts_batch)]) - ray_dir_dot_prods = (ray_dirs_pts[None] * ray_dirs_reshape).sum( - dim=-1 - ) # pts_batch x n_cameras x n_pts - - return ray_dir_dot_prods.transpose(0, 1) - - -def _get_ray_angle_weights( - camera: CamerasBase, - pts: torch.Tensor, - min_ray_angle_weight: float, - weight_by_ray_angle_gamma: float, -): - ray_dir_dot_prods = _get_ray_dir_dot_prods( - camera, pts - ) # pts_batch x n_cameras x ... x 3 - angle_weight_01 = ray_dir_dot_prods * 0.5 + 0.5 # [-1, 1] to [0, 1] - angle_weight = (angle_weight_01 + min_ray_angle_weight) ** weight_by_ray_angle_gamma - return angle_weight - - -def _avgmaxstd_reduction_function( - x: torch.Tensor, - w: torch.Tensor, - reduction_functions: Sequence[ReductionFunction], - dim: int = 1, -): - """ - Args: - x: Features to aggreagate. Tensor of shape `(batch, n_views, ..., dim)`. - w: Aggregation weights. Tensor of shape `(batch, n_views, ...,)`. - dim: the dimension along which to aggregate. - reduction_functions: The set of reduction functions. - - Returns: - x_aggr: Aggregation of `x` to a tensor of shape `(batch, 1, ..., dim_aggregate)`. - """ - - pooled_features = [] - - mu = None - std = None - - if ReductionFunction.AVG in reduction_functions: - # average pool - mu = _avg_reduction_function(x, w, dim=dim) - pooled_features.append(mu) - - if ReductionFunction.STD in reduction_functions: - # standard-dev pool - std = _std_reduction_function(x, w, dim=dim, mu=mu) - pooled_features.append(std) - - if ReductionFunction.STD_AVG in reduction_functions: - # average-of-standard-dev pool - stdavg = _std_avg_reduction_function(x, w, dim=dim, mu=mu, std=std) - pooled_features.append(stdavg) - - if ReductionFunction.MAX in reduction_functions: - max_ = _max_reduction_function(x, w, dim=dim) - pooled_features.append(max_) - - # cat all results along the feature dimension (the last dim) - x_aggr = torch.cat(pooled_features, dim=-1) - - # zero out features that were all masked out - # pyre-fixme[16]: `bool` has no attribute `type_as`. - any_active = (w.max(dim=dim, keepdim=True).values > 1e-4).type_as(x_aggr) - x_aggr = x_aggr * any_active[..., None] - - # some asserts to check that everything was done right - assert torch.isfinite(x_aggr).all() - assert x_aggr.shape[1] == 1 - - return x_aggr - - -def _avg_reduction_function( - x: torch.Tensor, - w: torch.Tensor, - dim: int = 1, -): - mu = wmean(x, w, dim=dim, eps=1e-2) - return mu - - -def _std_reduction_function( - x: torch.Tensor, - w: torch.Tensor, - dim: int = 1, - mu: Optional[torch.Tensor] = None, # pre-computed mean -): - if mu is None: - mu = _avg_reduction_function(x, w, dim=dim) - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - std = wmean((x - mu) ** 2, w, dim=dim, eps=1e-2).clamp(1e-4).sqrt() - # FIXME: somehow this is extremely heavy in mem? - return std - - -def _std_avg_reduction_function( - x: torch.Tensor, - w: torch.Tensor, - dim: int = 1, - mu: Optional[torch.Tensor] = None, # pre-computed mean - std: Optional[torch.Tensor] = None, # pre-computed std -): - if std is None: - std = _std_reduction_function(x, w, dim=dim, mu=mu) - stdmean = std.mean(dim=-1, keepdim=True) - return stdmean - - -def _max_reduction_function( - x: torch.Tensor, - w: torch.Tensor, - dim: int = 1, - big_M_factor: float = 10.0, -): - big_M = x.max(dim=dim, keepdim=True).values.abs() * big_M_factor - max_ = (x * w - ((1 - w) * big_M)).max(dim=dim, keepdim=True).values - return max_ diff --git a/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_pooler.py b/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_pooler.py deleted file mode 100644 index a47ef72de7a2ac0192f10a1c53b3cb4a9c246346..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_pooler.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Dict, List, Optional, Union - -import torch -from pytorch3d.implicitron.tools.config import Configurable, run_auto_creation -from pytorch3d.renderer.cameras import CamerasBase - -from .feature_aggregator import FeatureAggregatorBase -from .view_sampler import ViewSampler - - -# pyre-ignore: 13 -class ViewPooler(Configurable, torch.nn.Module): - """ - Implements sampling of image-based features at the 2d projections of a set - of 3D points, and a subsequent aggregation of the resulting set of features - per-point. - - Args: - view_sampler: An instance of ViewSampler which is used for sampling of - image-based features at the 2D projections of a set - of 3D points. - feature_aggregator_class_type: The name of the feature aggregator class which - is available in the global registry. - feature_aggregator: A feature aggregator class which inherits from - FeatureAggregatorBase. Typically, the aggregated features and their - masks are output by a `ViewSampler` which samples feature tensors extracted - from a set of source images. FeatureAggregator executes step (4) above. - """ - - view_sampler: ViewSampler - feature_aggregator_class_type: str = "AngleWeightedReductionFeatureAggregator" - feature_aggregator: FeatureAggregatorBase - - def __post_init__(self): - run_auto_creation(self) - - def get_aggregated_feature_dim(self, feats: Union[Dict[str, torch.Tensor], int]): - """ - Returns the final dimensionality of the output aggregated features. - - Args: - feats: Either a `dict` of sampled features `{f_i: t_i}` corresponding - to the `feats_sampled` argument of `feature_aggregator,forward`, - or an `int` representing the sum of dimensionalities of each `t_i`. - - Returns: - aggregated_feature_dim: The final dimensionality of the output - aggregated features. - """ - return self.feature_aggregator.get_aggregated_feature_dim(feats) - - def has_aggregation(self): - """ - Specifies whether the `feature_aggregator` reduces the output `reduce_dim` - dimension to 1. - - Returns: - has_aggregation: `True` if `reduce_dim==1`, else `False`. - """ - return self.feature_aggregator.has_aggregation() - - def forward( - self, - *, # force kw args - pts: torch.Tensor, - seq_id_pts: Union[List[int], List[str], torch.LongTensor], - camera: CamerasBase, - seq_id_camera: Union[List[int], List[str], torch.LongTensor], - feats: Dict[str, torch.Tensor], - masks: Optional[torch.Tensor], - **kwargs, - ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: - """ - Project each point cloud from a batch of point clouds to corresponding - input cameras, sample features at the 2D projection locations in a batch - of source images, and aggregate the pointwise sampled features. - - Args: - pts: A tensor of shape `[pts_batch x n_pts x 3]` in world coords. - seq_id_pts: LongTensor of shape `[pts_batch]` denoting the ids of the scenes - from which `pts` were extracted, or a list of string names. - camera: 'n_cameras' cameras, each coresponding to a batch element of `feats`. - seq_id_camera: LongTensor of shape `[n_cameras]` denoting the ids of the scenes - corresponding to cameras in `camera`, or a list of string names. - feats: a dict of tensors of per-image features `{feat_i: T_i}`. - Each tensor `T_i` is of shape `[n_cameras x dim_i x H_i x W_i]`. - masks: `[n_cameras x 1 x H x W]`, define valid image regions - for sampling `feats`. - Returns: - feats_aggregated: If `feature_aggregator.concatenate_output==True`, a tensor - of shape `(pts_batch, reduce_dim, n_pts, sum(dim_1, ... dim_N))` - containing the aggregated features. `reduce_dim` depends on - the specific feature aggregator implementation and typically - equals 1 or `n_cameras`. - If `feature_aggregator.concatenate_output==False`, the aggregator - does not concatenate the aggregated features and returns a dictionary - of per-feature aggregations `{f_i: t_i_aggregated}` instead. - Each `t_i_aggregated` is of shape - `(pts_batch, reduce_dim, n_pts, aggr_dim_i)`. - """ - - # (1) Sample features and masks at the ray points - sampled_feats, sampled_masks = self.view_sampler( - pts=pts, - seq_id_pts=seq_id_pts, - camera=camera, - seq_id_camera=seq_id_camera, - feats=feats, - masks=masks, - ) - - # (2) Aggregate features from multiple views - # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a function. - feats_aggregated = self.feature_aggregator( # noqa: E731 - sampled_feats, - sampled_masks, - pts=pts, - camera=camera, - ) # TODO: do we need to pass a callback rather than compute here? - - return feats_aggregated diff --git a/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_sampler.py b/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_sampler.py deleted file mode 100644 index 56f91ed2f2e74ab10d3d0f4db6801c3136f419b4..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/view_pooler/view_sampler.py +++ /dev/null @@ -1,293 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Dict, List, Optional, Tuple, Union - -import torch -from pytorch3d.implicitron.tools.config import Configurable -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.renderer.utils import ndc_grid_sample - - -class ViewSampler(Configurable, torch.nn.Module): - """ - Implements sampling of image-based features at the 2d projections of a set - of 3D points. - - Args: - masked_sampling: If `True`, the `sampled_masks` output of `self.forward` - contains the input `masks` sampled at the 2d projections. Otherwise, - all entries of `sampled_masks` are set to 1. - sampling_mode: Controls the mode of the `torch.nn.functional.grid_sample` - function used to interpolate the sampled feature tensors at the - locations of the 2d projections. - """ - - masked_sampling: bool = False - sampling_mode: str = "bilinear" - - def forward( - self, - *, # force kw args - pts: torch.Tensor, - seq_id_pts: Union[List[int], List[str], torch.LongTensor], - camera: CamerasBase, - seq_id_camera: Union[List[int], List[str], torch.LongTensor], - feats: Dict[str, torch.Tensor], - masks: Optional[torch.Tensor], - **kwargs, - ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]: - """ - Project each point cloud from a batch of point clouds to corresponding - input cameras and sample features at the 2D projection locations. - - Args: - pts: A tensor of shape `[pts_batch x n_pts x 3]` in world coords. - seq_id_pts: LongTensor of shape `[pts_batch]` denoting the ids of the scenes - from which `pts` were extracted, or a list of string names. - camera: 'n_cameras' cameras, each coresponding to a batch element of `feats`. - seq_id_camera: LongTensor of shape `[n_cameras]` denoting the ids of the scenes - corresponding to cameras in `camera`, or a list of string names. - feats: a dict of tensors of per-image features `{feat_i: T_i}`. - Each tensor `T_i` is of shape `[n_cameras x dim_i x H_i x W_i]`. - masks: `[n_cameras x 1 x H x W]`, define valid image regions - for sampling `feats`. - Returns: - sampled_feats: Dict of sampled features `{feat_i: sampled_T_i}`. - Each `sampled_T_i` of shape `[pts_batch, n_cameras, n_pts, dim_i]`. - sampled_masks: A tensor with mask of the sampled features - of shape `(pts_batch, n_cameras, n_pts, 1)`. - """ - - # convert sequence ids to long tensors - seq_id_pts, seq_id_camera = [ - handle_seq_id(seq_id, pts.device) for seq_id in [seq_id_pts, seq_id_camera] - ] - - if self.masked_sampling and masks is None: - raise ValueError( - "Masks have to be provided for `self.masked_sampling==True`" - ) - - # project pts to all cameras and sample feats from the locations of - # the 2D projections - sampled_feats_all_cams, sampled_masks_all_cams = project_points_and_sample( - pts, - feats, - camera, - masks if self.masked_sampling else None, - sampling_mode=self.sampling_mode, - ) - - # generate the mask that invalidates features sampled from - # non-corresponding cameras - camera_pts_mask = (seq_id_camera[None] == seq_id_pts[:, None])[ - ..., None, None - ].to(pts) - - # mask the sampled features and masks - sampled_feats = { - k: f * camera_pts_mask for k, f in sampled_feats_all_cams.items() - } - sampled_masks = sampled_masks_all_cams * camera_pts_mask - - return sampled_feats, sampled_masks - - -def project_points_and_sample( - pts: torch.Tensor, - feats: Dict[str, torch.Tensor], - camera: CamerasBase, - masks: Optional[torch.Tensor], - eps: float = 1e-2, - sampling_mode: str = "bilinear", -) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]: - """ - Project each point cloud from a batch of point clouds to all input cameras - and sample features at the 2D projection locations. - - Args: - pts: `(pts_batch, n_pts, 3)` tensor containing a batch of 3D point clouds. - feats: A dict `{feat_i: feat_T_i}` of features to sample, - where each `feat_T_i` is a tensor of shape - `(n_cameras, feat_i_dim, feat_i_H, feat_i_W)` - of `feat_i_dim`-dimensional features extracted from `n_cameras` - source views. - camera: A batch of `n_cameras` cameras corresponding to their feature - tensors `feat_T_i` from `feats`. - masks: A tensor of shape `(n_cameras, 1, mask_H, mask_W)` denoting - valid locations for sampling. - eps: A small constant controlling the minimum depth of projections - of `pts` to avoid divisons by zero in the projection operation. - sampling_mode: Sampling mode of the grid sampler. - - Returns: - sampled_feats: Dict of sampled features `{feat_i: sampled_T_i}`. - Each `sampled_T_i` is of shape - `(pts_batch, n_cameras, n_pts, feat_i_dim)`. - sampled_masks: A tensor with the mask of the sampled features - of shape `(pts_batch, n_cameras, n_pts, 1)`. - If `masks` is `None`, the returned `sampled_masks` will be - filled with 1s. - """ - - n_cameras = camera.R.shape[0] - pts_batch = pts.shape[0] - n_pts = pts.shape[1:-1] - - camera_rep, pts_rep = cameras_points_cartesian_product(camera, pts) - - # The eps here is super-important to avoid NaNs in backprop! - proj_rep = camera_rep.transform_points( - pts_rep.reshape(n_cameras * pts_batch, -1, 3), eps=eps - )[..., :2] - # [ pts1 in cam1, pts2 in cam1, pts3 in cam1, - # pts1 in cam2, pts2 in cam2, pts3 in cam2, - # pts1 in cam3, pts2 in cam3, pts3 in cam3 ] - - # reshape for the grid sampler - sampling_grid_ndc = proj_rep.view(n_cameras, pts_batch, -1, 2) - # [ [pts1 in cam1, pts2 in cam1, pts3 in cam1], - # [pts1 in cam2, pts2 in cam2, pts3 in cam2], - # [pts1 in cam3, pts2 in cam3, pts3 in cam3] ] - # n_cameras x pts_batch x n_pts x 2 - - # sample both feats - feats_sampled = { - k: ndc_grid_sample( - f, - sampling_grid_ndc, - mode=sampling_mode, - align_corners=False, - ) - .permute(2, 0, 3, 1) - .reshape(pts_batch, n_cameras, *n_pts, -1) - for k, f in feats.items() - } # {k: pts_batch x n_cameras x *n_pts x dim} for each feat type "k" - - if masks is not None: - # sample masks - masks_sampled = ( - ndc_grid_sample( - masks, - sampling_grid_ndc, - mode=sampling_mode, - align_corners=False, - ) - .permute(2, 0, 3, 1) - .reshape(pts_batch, n_cameras, *n_pts, 1) - ) - else: - masks_sampled = sampling_grid_ndc.new_ones(pts_batch, n_cameras, *n_pts, 1) - - return feats_sampled, masks_sampled - - -def handle_seq_id( - seq_id: Union[torch.LongTensor, List[str], List[int]], - device, -) -> torch.LongTensor: - """ - Converts the input sequence id to a LongTensor. - - Args: - seq_id: A sequence of sequence ids. - device: The target device of the output. - Returns - long_seq_id: `seq_id` converted to a `LongTensor` and moved to `device`. - """ - if not torch.is_tensor(seq_id): - if isinstance(seq_id[0], str): - seq_id = [hash(s) for s in seq_id] - # pyre-fixme[9]: seq_id has type `Union[List[int], List[str], LongTensor]`; - # used as `Tensor`. - seq_id = torch.tensor(seq_id, dtype=torch.long, device=device) - # pyre-fixme[16]: Item `List` of `Union[List[int], List[str], LongTensor]` has - # no attribute `to`. - return seq_id.to(device) - - -def cameras_points_cartesian_product( - camera: CamerasBase, pts: torch.Tensor -) -> Tuple[CamerasBase, torch.Tensor]: - """ - Generates all pairs of pairs of elements from 'camera' and 'pts' and returns - `camera_rep` and `pts_rep` such that:: - - camera_rep = [ pts_rep = [ - camera[0] pts[0], - camera[0] pts[1], - camera[0] ..., - ... pts[batch_pts-1], - camera[1] pts[0], - camera[1] pts[1], - camera[1] ..., - ... pts[batch_pts-1], - ... ..., - camera[n_cameras-1] pts[0], - camera[n_cameras-1] pts[1], - camera[n_cameras-1] ..., - ... pts[batch_pts-1], - ] ] - - Args: - camera: A batch of `n_cameras` cameras. - pts: A batch of `batch_pts` points of shape `(batch_pts, ..., dim)` - - Returns: - camera_rep: A batch of batch_pts*n_cameras cameras such that:: - - camera_rep = [ - camera[0] - camera[0] - camera[0] - ... - camera[1] - camera[1] - camera[1] - ... - ... - camera[n_cameras-1] - camera[n_cameras-1] - camera[n_cameras-1] - ] - - - pts_rep: Repeated `pts` of shape `(batch_pts*n_cameras, ..., dim)`, - such that:: - - pts_rep = [ - pts[0], - pts[1], - ..., - pts[batch_pts-1], - pts[0], - pts[1], - ..., - pts[batch_pts-1], - ..., - pts[0], - pts[1], - ..., - pts[batch_pts-1], - ] - - """ - n_cameras = camera.R.shape[0] - batch_pts = pts.shape[0] - pts_rep = pts.repeat(n_cameras, *[1 for _ in pts.shape[1:]]) - idx_cams = ( - torch.arange(n_cameras)[:, None] - .expand( - n_cameras, - batch_pts, - ) - .reshape(batch_pts * n_cameras) - ) - # pyre-fixme[6]: For 1st param expected `Union[List[int], int, LongTensor]` but - # got `Tensor`. - camera_rep = camera[idx_cams] - return camera_rep, pts_rep diff --git a/pytorch3d/pytorch3d/implicitron/models/visualization/__init__.py b/pytorch3d/pytorch3d/implicitron/models/visualization/__init__.py deleted file mode 100644 index a9fdb3b996b73ba9ae811fa42fb7615768a928fc..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/visualization/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/pytorch3d/implicitron/models/visualization/render_flyaround.py b/pytorch3d/pytorch3d/implicitron/models/visualization/render_flyaround.py deleted file mode 100644 index 2a3afadbb86e0307bea4b9ab5e7a54ef0c7183fb..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/models/visualization/render_flyaround.py +++ /dev/null @@ -1,391 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import logging -import math -import os -import random -from typing import ( - Any, - Dict, - Iterable, - List, - Optional, - Sequence, - Tuple, - TYPE_CHECKING, - Union, -) - -import numpy as np -import torch -import torch.nn.functional as Fu -from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData -from pytorch3d.implicitron.dataset.utils import is_train_frame -from pytorch3d.implicitron.models.base_model import EvaluationMode -from pytorch3d.implicitron.tools.eval_video_trajectory import ( - generate_eval_video_cameras, -) -from pytorch3d.implicitron.tools.video_writer import VideoWriter -from pytorch3d.implicitron.tools.vis_utils import ( - get_visdom_connection, - make_depth_image, -) -from tqdm import tqdm - -if TYPE_CHECKING: - from visdom import Visdom - -logger = logging.getLogger(__name__) - - -def render_flyaround( - dataset: DatasetBase, - sequence_name: str, - model: torch.nn.Module, - output_video_path: str, - n_flyaround_poses: int = 40, - fps: int = 20, - trajectory_type: str = "circular_lsq_fit", - max_angle: float = 2 * math.pi, - trajectory_scale: float = 1.1, - scene_center: Tuple[float, float, float] = (0.0, 0.0, 0.0), - up: Tuple[float, float, float] = (0.0, -1.0, 0.0), - traj_offset: float = 0.0, - n_source_views: int = 9, - visdom_show_preds: bool = False, - visdom_environment: str = "render_flyaround", - visdom_server: str = "http://127.0.0.1", - visdom_port: int = 8097, - num_workers: int = 10, - device: Union[str, torch.device] = "cuda", - seed: Optional[int] = None, - video_resize: Optional[Tuple[int, int]] = None, - output_video_frames_dir: Optional[str] = None, - visualize_preds_keys: Sequence[str] = ( - "images_render", - "masks_render", - "depths_render", - "_all_source_images", - ), -) -> None: - """ - Uses `model` to generate a video consisting of renders of a scene imaged from - a camera flying around the scene. The scene is specified with the `dataset` object and - `sequence_name` which denotes the name of the scene whose frames are in `dataset`. - - Args: - dataset: The dataset object containing frames from a sequence in `sequence_name`. - sequence_name: Name of a sequence from `dataset`. - model: The model whose predictions are going to be visualized. - output_video_path: The path to the video output by this script. - n_flyaround_poses: The number of camera poses of the flyaround trajectory. - fps: Framerate of the output video. - trajectory_type: The type of the camera trajectory. Can be one of: - circular_lsq_fit: Camera centers follow a trajectory obtained - by fitting a 3D circle to train_cameras centers. - All cameras are looking towards scene_center. - figure_eight: Figure-of-8 trajectory around the center of the - central camera of the training dataset. - trefoil_knot: Same as 'figure_eight', but the trajectory has a shape - of a trefoil knot (https://en.wikipedia.org/wiki/Trefoil_knot). - figure_eight_knot: Same as 'figure_eight', but the trajectory has a shape - of a figure-eight knot - (https://en.wikipedia.org/wiki/Figure-eight_knot_(mathematics)). - trajectory_type: The type of the camera trajectory. Can be one of: - circular_lsq_fit: Camera centers follow a trajectory obtained - by fitting a 3D circle to train_cameras centers. - All cameras are looking towards scene_center. - figure_eight: Figure-of-8 trajectory around the center of the - central camera of the training dataset. - trefoil_knot: Same as 'figure_eight', but the trajectory has a shape - of a trefoil knot (https://en.wikipedia.org/wiki/Trefoil_knot). - figure_eight_knot: Same as 'figure_eight', but the trajectory has a shape - of a figure-eight knot - (https://en.wikipedia.org/wiki/Figure-eight_knot_(mathematics)). - max_angle: Defines the total length of the generated camera trajectory. - All possible trajectories (set with the `trajectory_type` argument) are - periodic with the period of `time==2pi`. - E.g. setting `trajectory_type=circular_lsq_fit` and `time=4pi` will generate - a trajectory of camera poses rotating the total of 720 deg around the object. - trajectory_scale: The extent of the trajectory. - scene_center: The center of the scene in world coordinates which all - the cameras from the generated trajectory look at. - up: The "up" vector of the scene (=the normal of the scene floor). - Active for the `trajectory_type="circular"`. - traj_offset: 3D offset vector added to each point of the trajectory. - n_source_views: The number of source views sampled from the known views of the - training sequence added to each evaluation batch. - visdom_show_preds: If `True`, exports the visualizations to visdom. - visdom_environment: The name of the visdom environment. - visdom_server: The address of the visdom server. - visdom_port: The visdom port. - num_workers: The number of workers used to load the training data. - seed: The random seed used for reproducible sampling of the source views. - video_resize: Optionally, defines the size of the output video. - output_video_frames_dir: If specified, the frames of the output video are going - to be permanently stored in this directory. - visualize_preds_keys: The names of the model predictions to visualize. - """ - - if seed is None: - seed = hash(sequence_name) - - if visdom_show_preds: - viz = get_visdom_connection(server=visdom_server, port=visdom_port) - else: - viz = None - - logger.info(f"Loading all data of sequence '{sequence_name}'.") - seq_idx = list(dataset.sequence_indices_in_order(sequence_name)) - train_data = _load_whole_dataset(dataset, seq_idx, num_workers=num_workers) - assert all(train_data.sequence_name[0] == sn for sn in train_data.sequence_name) - # pyre-ignore[6] - sequence_set_name = "train" if is_train_frame(train_data.frame_type)[0] else "test" - logger.info(f"Sequence set = {sequence_set_name}.") - train_cameras = train_data.camera - time = torch.linspace(0, max_angle, n_flyaround_poses + 1)[:n_flyaround_poses] - test_cameras = generate_eval_video_cameras( - train_cameras, - time=time, - n_eval_cams=n_flyaround_poses, - trajectory_type=trajectory_type, - trajectory_scale=trajectory_scale, - scene_center=scene_center, - up=up, - focal_length=None, - principal_point=torch.zeros(n_flyaround_poses, 2), - traj_offset_canonical=(0.0, 0.0, traj_offset), - ) - - # sample the source views reproducibly - with torch.random.fork_rng(): - torch.manual_seed(seed) - source_views_i = torch.randperm(len(seq_idx))[:n_source_views] - - # add the first dummy view that will get replaced with the target camera - source_views_i = Fu.pad(source_views_i, [1, 0]) - source_views = [seq_idx[i] for i in source_views_i.tolist()] - batch = _load_whole_dataset(dataset, source_views, num_workers=num_workers) - assert all(batch.sequence_name[0] == sn for sn in batch.sequence_name) - - preds_total = [] - for n in tqdm(range(n_flyaround_poses), total=n_flyaround_poses): - # set the first batch camera to the target camera - for k in ("R", "T", "focal_length", "principal_point"): - getattr(batch.camera, k)[0] = getattr(test_cameras[n], k) - - # Move to cuda - net_input = batch.to(device) - with torch.no_grad(): - preds = model(**{**net_input, "evaluation_mode": EvaluationMode.EVALUATION}) - - # make sure we dont overwrite something - assert all(k not in preds for k in net_input.keys()) - preds.update(net_input) # merge everything into one big dict - - # Render the predictions to images - rendered_pred = _images_from_preds(preds, extract_keys=visualize_preds_keys) - preds_total.append(rendered_pred) - - # show the preds every 5% of the export iterations - if visdom_show_preds and ( - n % max(n_flyaround_poses // 20, 1) == 0 or n == n_flyaround_poses - 1 - ): - assert viz is not None - _show_predictions( - preds_total, - sequence_name=batch.sequence_name[0], - viz=viz, - viz_env=visdom_environment, - predicted_keys=visualize_preds_keys, - ) - - logger.info(f"Exporting videos for sequence {sequence_name} ...") - _generate_prediction_videos( - preds_total, - sequence_name=batch.sequence_name[0], - viz=viz, - viz_env=visdom_environment, - fps=fps, - video_path=output_video_path, - resize=video_resize, - video_frames_dir=output_video_frames_dir, - predicted_keys=visualize_preds_keys, - ) - - -def _load_whole_dataset( - dataset: torch.utils.data.Dataset, idx: Sequence[int], num_workers: int = 10 -) -> FrameData: - load_all_dataloader = torch.utils.data.DataLoader( - torch.utils.data.Subset(dataset, idx), - batch_size=len(idx), - num_workers=num_workers, - shuffle=False, - collate_fn=FrameData.collate, - ) - return next(iter(load_all_dataloader)) - - -def _images_from_preds( - preds: Dict[str, Any], - extract_keys: Iterable[str] = ( - "image_rgb", - "images_render", - "fg_probability", - "masks_render", - "depths_render", - "depth_map", - "_all_source_images", - ), -) -> Dict[str, torch.Tensor]: - imout = {} - for k in extract_keys: - if k == "_all_source_images" and "image_rgb" in preds: - src_ims = preds["image_rgb"][1:].cpu().detach().clone() - v = _stack_images(src_ims, None)[None] - else: - if k not in preds or preds[k] is None: - print(f"cant show {k}") - continue - v = preds[k].cpu().detach().clone() - if k.startswith("depth"): - mask_resize = Fu.interpolate( - preds["masks_render"], - size=preds[k].shape[2:], - mode="nearest", - ) - v = make_depth_image(preds[k], mask_resize) - if v.shape[1] == 1: - v = v.repeat(1, 3, 1, 1) - imout[k] = v.detach().cpu() - - return imout - - -def _stack_images(ims: torch.Tensor, size: Optional[Tuple[int, int]]) -> torch.Tensor: - ba = ims.shape[0] - H = int(np.ceil(np.sqrt(ba))) - W = H - n_add = H * W - ba - if n_add > 0: - ims = torch.cat((ims, torch.zeros_like(ims[:1]).repeat(n_add, 1, 1, 1))) - - ims = ims.view(H, W, *ims.shape[1:]) - cated = torch.cat([torch.cat(list(row), dim=2) for row in ims], dim=1) - if size is not None: - cated = Fu.interpolate(cated[None], size=size, mode="bilinear")[0] - return cated.clamp(0.0, 1.0) - - -def _show_predictions( - preds: List[Dict[str, Any]], - sequence_name: str, - viz: "Visdom", - viz_env: str = "visualizer", - predicted_keys: Sequence[str] = ( - "images_render", - "masks_render", - "depths_render", - "_all_source_images", - ), - n_samples=10, - one_image_width=200, -) -> None: - """Given a list of predictions visualize them into a single image using visdom.""" - assert isinstance(preds, list) - - pred_all = [] - # Randomly choose a subset of the rendered images, sort by ordr in the sequence - n_samples = min(n_samples, len(preds)) - pred_idx = sorted(random.sample(list(range(len(preds))), n_samples)) - for predi in pred_idx: - # Make the concatentation for the same camera vertically - pred_all.append( - torch.cat( - [ - torch.nn.functional.interpolate( - preds[predi][k].cpu(), - scale_factor=one_image_width / preds[predi][k].shape[3], - mode="bilinear", - ).clamp(0.0, 1.0) - for k in predicted_keys - ], - dim=2, - ) - ) - # Concatenate the images horizontally - pred_all_cat = torch.cat(pred_all, dim=3)[0] - viz.image( - pred_all_cat, - win="show_predictions", - env=viz_env, - opts={"title": f"pred_{sequence_name}"}, - ) - - -def _generate_prediction_videos( - preds: List[Dict[str, Any]], - sequence_name: str, - viz: Optional["Visdom"] = None, - viz_env: str = "visualizer", - predicted_keys: Sequence[str] = ( - "images_render", - "masks_render", - "depths_render", - "_all_source_images", - ), - fps: int = 20, - video_path: str = "/tmp/video", - video_frames_dir: Optional[str] = None, - resize: Optional[Tuple[int, int]] = None, -) -> None: - """Given a list of predictions create and visualize rotating videos of the - objects using visdom. - """ - - # make sure the target video directory exists - os.makedirs(os.path.dirname(video_path), exist_ok=True) - - # init a video writer for each predicted key - vws = {} - for k in predicted_keys: - if k not in preds[0]: - logger.warn(f"Cannot generate video for prediction key '{k}'") - continue - cache_dir = ( - None - if video_frames_dir is None - else os.path.join(video_frames_dir, f"{sequence_name}_{k}") - ) - vws[k] = VideoWriter( - fps=fps, - out_path=f"{video_path}_{sequence_name}_{k}.mp4", - cache_dir=cache_dir, - ) - - for rendered_pred in tqdm(preds): - for k in vws: - vws[k].write_frame( - rendered_pred[k][0].clip(0.0, 1.0).detach().cpu().numpy(), - resize=resize, - ) - - for k in predicted_keys: - if k not in vws: - continue - vws[k].get_video() - logger.info(f"Generated {vws[k].out_path}.") - if viz is not None: - viz.video( - videofile=vws[k].out_path, - env=viz_env, - win=k, # we reuse the same window otherwise visdom dies - opts={"title": sequence_name + " " + k}, - ) diff --git a/pytorch3d/pytorch3d/implicitron/third_party/__init__.py b/pytorch3d/pytorch3d/implicitron/third_party/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/third_party/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/pytorch3d/implicitron/third_party/hyperlayers.py b/pytorch3d/pytorch3d/implicitron/third_party/hyperlayers.py deleted file mode 100644 index e56235130166b75f50a9aa9700a8de25b3f472c8..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/third_party/hyperlayers.py +++ /dev/null @@ -1,253 +0,0 @@ -# a copy-paste from https://github.com/vsitzmann/scene-representation-networks/blob/master/hyperlayers.py -# fmt: off -# flake8: noqa -'''Pytorch implementations of hyper-network modules. -''' -import functools - -import torch -import torch.nn as nn - -from . import pytorch_prototyping - - -def partialclass(cls, *args, **kwds): - class NewCls(cls): - __init__ = functools.partialmethod(cls.__init__, *args, **kwds) - - return NewCls - - -class LookupLayer(nn.Module): - def __init__(self, in_ch, out_ch, num_objects): - super().__init__() - - self.out_ch = out_ch - self.lookup_lin = LookupLinear(in_ch, out_ch, num_objects=num_objects) - self.norm_nl = nn.Sequential( - nn.LayerNorm([self.out_ch], elementwise_affine=False), nn.ReLU(inplace=True) - ) - - def forward(self, obj_idx): - net = nn.Sequential(self.lookup_lin(obj_idx), self.norm_nl) - return net - - -class LookupFC(nn.Module): - def __init__( - self, - hidden_ch, - num_hidden_layers, - num_objects, - in_ch, - out_ch, - outermost_linear=False, - ): - super().__init__() - self.layers = nn.ModuleList() - self.layers.append( - LookupLayer(in_ch=in_ch, out_ch=hidden_ch, num_objects=num_objects) - ) - - for i in range(num_hidden_layers): - self.layers.append( - LookupLayer(in_ch=hidden_ch, out_ch=hidden_ch, num_objects=num_objects) - ) - - if outermost_linear: - self.layers.append( - LookupLinear(in_ch=hidden_ch, out_ch=out_ch, num_objects=num_objects) - ) - else: - self.layers.append( - LookupLayer(in_ch=hidden_ch, out_ch=out_ch, num_objects=num_objects) - ) - - def forward(self, obj_idx): - net = [] - for i in range(len(self.layers)): - net.append(self.layers[i](obj_idx)) - - return nn.Sequential(*net) - - -class LookupLinear(nn.Module): - def __init__(self, in_ch, out_ch, num_objects): - super().__init__() - self.in_ch = in_ch - self.out_ch = out_ch - - self.hypo_params = nn.Embedding(num_objects, in_ch * out_ch + out_ch) - - for i in range(num_objects): - nn.init.kaiming_normal_( - self.hypo_params.weight.data[i, : self.in_ch * self.out_ch].view( - self.out_ch, self.in_ch - ), - a=0.0, - nonlinearity="relu", - mode="fan_in", - ) - self.hypo_params.weight.data[i, self.in_ch * self.out_ch :].fill_(0.0) - - def forward(self, obj_idx): - hypo_params = self.hypo_params(obj_idx) - - # Indices explicit to catch erros in shape of output layer - weights = hypo_params[..., : self.in_ch * self.out_ch] - biases = hypo_params[ - ..., self.in_ch * self.out_ch : (self.in_ch * self.out_ch) + self.out_ch - ] - - biases = biases.view(*(biases.size()[:-1]), 1, self.out_ch) - weights = weights.view(*(weights.size()[:-1]), self.out_ch, self.in_ch) - - return BatchLinear(weights=weights, biases=biases) - - -class HyperLayer(nn.Module): - """A hypernetwork that predicts a single Dense Layer, including LayerNorm and a ReLU.""" - - def __init__( - self, in_ch, out_ch, hyper_in_ch, hyper_num_hidden_layers, hyper_hidden_ch - ): - super().__init__() - - self.hyper_linear = HyperLinear( - in_ch=in_ch, - out_ch=out_ch, - hyper_in_ch=hyper_in_ch, - hyper_num_hidden_layers=hyper_num_hidden_layers, - hyper_hidden_ch=hyper_hidden_ch, - ) - self.norm_nl = nn.Sequential( - nn.LayerNorm([out_ch], elementwise_affine=False), nn.ReLU(inplace=True) - ) - - def forward(self, hyper_input): - """ - :param hyper_input: input to hypernetwork. - :return: nn.Module; predicted fully connected network. - """ - return nn.Sequential(self.hyper_linear(hyper_input), self.norm_nl) - - -class HyperFC(nn.Module): - """Builds a hypernetwork that predicts a fully connected neural network.""" - - def __init__( - self, - hyper_in_ch, - hyper_num_hidden_layers, - hyper_hidden_ch, - hidden_ch, - num_hidden_layers, - in_ch, - out_ch, - outermost_linear=False, - ): - super().__init__() - - PreconfHyperLinear = partialclass( - HyperLinear, - hyper_in_ch=hyper_in_ch, - hyper_num_hidden_layers=hyper_num_hidden_layers, - hyper_hidden_ch=hyper_hidden_ch, - ) - PreconfHyperLayer = partialclass( - HyperLayer, - hyper_in_ch=hyper_in_ch, - hyper_num_hidden_layers=hyper_num_hidden_layers, - hyper_hidden_ch=hyper_hidden_ch, - ) - - self.layers = nn.ModuleList() - self.layers.append(PreconfHyperLayer(in_ch=in_ch, out_ch=hidden_ch)) - - for i in range(num_hidden_layers): - self.layers.append(PreconfHyperLayer(in_ch=hidden_ch, out_ch=hidden_ch)) - - if outermost_linear: - self.layers.append(PreconfHyperLinear(in_ch=hidden_ch, out_ch=out_ch)) - else: - self.layers.append(PreconfHyperLayer(in_ch=hidden_ch, out_ch=out_ch)) - - def forward(self, hyper_input): - """ - :param hyper_input: Input to hypernetwork. - :return: nn.Module; Predicted fully connected neural network. - """ - net = [] - for i in range(len(self.layers)): - net.append(self.layers[i](hyper_input)) - - return nn.Sequential(*net) - - -class BatchLinear(nn.Module): - def __init__(self, weights, biases): - """Implements a batch linear layer. - - :param weights: Shape: (batch, out_ch, in_ch) - :param biases: Shape: (batch, 1, out_ch) - """ - super().__init__() - - self.weights = weights - self.biases = biases - - def __repr__(self): - return "BatchLinear(in_ch=%d, out_ch=%d)" % ( - self.weights.shape[-1], - self.weights.shape[-2], - ) - - def forward(self, input): - output = input.matmul( - self.weights.permute( - *[i for i in range(len(self.weights.shape) - 2)], -1, -2 - ) - ) - output += self.biases - return output - - -def last_hyper_layer_init(m) -> None: - if type(m) == nn.Linear: - nn.init.kaiming_normal_(m.weight, a=0.0, nonlinearity="relu", mode="fan_in") - m.weight.data *= 1e-1 - - -class HyperLinear(nn.Module): - """A hypernetwork that predicts a single linear layer (weights & biases).""" - - def __init__( - self, in_ch, out_ch, hyper_in_ch, hyper_num_hidden_layers, hyper_hidden_ch - ): - - super().__init__() - self.in_ch = in_ch - self.out_ch = out_ch - - self.hypo_params = pytorch_prototyping.FCBlock( - in_features=hyper_in_ch, - hidden_ch=hyper_hidden_ch, - num_hidden_layers=hyper_num_hidden_layers, - out_features=(in_ch * out_ch) + out_ch, - outermost_linear=True, - ) - self.hypo_params[-1].apply(last_hyper_layer_init) - - def forward(self, hyper_input): - hypo_params = self.hypo_params(hyper_input) - - # Indices explicit to catch erros in shape of output layer - weights = hypo_params[..., : self.in_ch * self.out_ch] - biases = hypo_params[ - ..., self.in_ch * self.out_ch : (self.in_ch * self.out_ch) + self.out_ch - ] - - biases = biases.view(*(biases.size()[:-1]), 1, self.out_ch) - weights = weights.view(*(weights.size()[:-1]), self.out_ch, self.in_ch) - - return BatchLinear(weights=weights, biases=biases) diff --git a/pytorch3d/pytorch3d/implicitron/third_party/pytorch_prototyping.py b/pytorch3d/pytorch3d/implicitron/third_party/pytorch_prototyping.py deleted file mode 100644 index 7dd973fc4053eaa6d38ba82c872a38ff83ba7741..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/third_party/pytorch_prototyping.py +++ /dev/null @@ -1,771 +0,0 @@ -# a copy-paste from https://raw.githubusercontent.com/vsitzmann/pytorch_prototyping/10f49b1e7df38a58fd78451eac91d7ac1a21df64/pytorch_prototyping.py -# fmt: off -# flake8: noqa -'''A number of custom pytorch modules with sane defaults that I find useful for model prototyping. -''' -import torch -import torch.nn as nn -import torchvision.utils -from torch.nn import functional as F - - -class FCLayer(nn.Module): - def __init__(self, in_features, out_features): - super().__init__() - self.net = nn.Sequential( - nn.Linear(in_features, out_features), - nn.LayerNorm([out_features]), - nn.ReLU(inplace=True), - ) - - def forward(self, input): - return self.net(input) - - -# From https://gist.github.com/wassname/ecd2dac6fc8f9918149853d17e3abf02 -class LayerNormConv2d(nn.Module): - def __init__(self, num_features, eps=1e-5, affine=True): - super().__init__() - self.num_features = num_features - self.affine = affine - self.eps = eps - - if self.affine: - self.gamma = nn.Parameter(torch.Tensor(num_features).uniform_()) - self.beta = nn.Parameter(torch.zeros(num_features)) - - def forward(self, x): - shape = [-1] + [1] * (x.dim() - 1) - mean = x.view(x.size(0), -1).mean(1).view(*shape) - std = x.view(x.size(0), -1).std(1).view(*shape) - - y = (x - mean) / (std + self.eps) - if self.affine: - shape = [1, -1] + [1] * (x.dim() - 2) - y = self.gamma.view(*shape) * y + self.beta.view(*shape) - return y - - -class FCBlock(nn.Module): - def __init__( - self, - hidden_ch, - num_hidden_layers, - in_features, - out_features, - outermost_linear=False, - ): - super().__init__() - - self.net = [] - self.net.append(FCLayer(in_features=in_features, out_features=hidden_ch)) - - for i in range(num_hidden_layers): - self.net.append(FCLayer(in_features=hidden_ch, out_features=hidden_ch)) - - if outermost_linear: - self.net.append(nn.Linear(in_features=hidden_ch, out_features=out_features)) - else: - self.net.append(FCLayer(in_features=hidden_ch, out_features=out_features)) - - self.net = nn.Sequential(*self.net) - self.net.apply(self.init_weights) - - def __getitem__(self, item): - return self.net[item] - - def init_weights(self, m): - if type(m) == nn.Linear: - nn.init.kaiming_normal_(m.weight, a=0.0, nonlinearity="relu", mode="fan_in") - - def forward(self, input): - return self.net(input) - - -class DownBlock3D(nn.Module): - """A 3D convolutional downsampling block.""" - - def __init__(self, in_channels, out_channels, norm=nn.BatchNorm3d): - super().__init__() - - self.net = [ - nn.ReplicationPad3d(1), - nn.Conv3d( - in_channels, - out_channels, - kernel_size=4, - padding=0, - stride=2, - bias=False if norm is not None else True, - ), - ] - - if norm is not None: - self.net += [norm(out_channels, affine=True)] - - self.net += [nn.LeakyReLU(0.2, True)] - self.net = nn.Sequential(*self.net) - - def forward(self, x): - return self.net(x) - - -class UpBlock3D(nn.Module): - """A 3D convolutional upsampling block.""" - - def __init__(self, in_channels, out_channels, norm=nn.BatchNorm3d): - super().__init__() - - self.net = [ - nn.ConvTranspose3d( - in_channels, - out_channels, - kernel_size=4, - stride=2, - padding=1, - bias=False if norm is not None else True, - ), - ] - - if norm is not None: - self.net += [norm(out_channels, affine=True)] - - self.net += [nn.ReLU(True)] - self.net = nn.Sequential(*self.net) - - def forward(self, x, skipped=None): - if skipped is not None: - input = torch.cat([skipped, x], dim=1) - else: - input = x - return self.net(input) - - -class Conv3dSame(torch.nn.Module): - """3D convolution that pads to keep spatial dimensions equal. - Cannot deal with stride. Only quadratic kernels (=scalar kernel_size). - """ - - def __init__( - self, - in_channels, - out_channels, - kernel_size, - bias=True, - padding_layer=nn.ReplicationPad3d, - ): - """ - :param in_channels: Number of input channels - :param out_channels: Number of output channels - :param kernel_size: Scalar. Spatial dimensions of kernel (only quadratic kernels supported). - :param bias: Whether or not to use bias. - :param padding_layer: Which padding to use. Default is reflection padding. - """ - super().__init__() - ka = kernel_size // 2 - kb = ka - 1 if kernel_size % 2 == 0 else ka - self.net = nn.Sequential( - padding_layer((ka, kb, ka, kb, ka, kb)), - nn.Conv3d(in_channels, out_channels, kernel_size, bias=bias, stride=1), - ) - - def forward(self, x): - return self.net(x) - - -class Conv2dSame(torch.nn.Module): - """2D convolution that pads to keep spatial dimensions equal. - Cannot deal with stride. Only quadratic kernels (=scalar kernel_size). - """ - - def __init__( - self, - in_channels, - out_channels, - kernel_size, - bias=True, - padding_layer=nn.ReflectionPad2d, - ): - """ - :param in_channels: Number of input channels - :param out_channels: Number of output channels - :param kernel_size: Scalar. Spatial dimensions of kernel (only quadratic kernels supported). - :param bias: Whether or not to use bias. - :param padding_layer: Which padding to use. Default is reflection padding. - """ - super().__init__() - ka = kernel_size // 2 - kb = ka - 1 if kernel_size % 2 == 0 else ka - self.net = nn.Sequential( - padding_layer((ka, kb, ka, kb)), - nn.Conv2d(in_channels, out_channels, kernel_size, bias=bias, stride=1), - ) - - self.weight = self.net[1].weight - self.bias = self.net[1].bias - - def forward(self, x): - return self.net(x) - - -class UpBlock(nn.Module): - """A 2d-conv upsampling block with a variety of options for upsampling, and following best practices / with - reasonable defaults. (LeakyReLU, kernel size multiple of stride) - """ - - def __init__( - self, - in_channels, - out_channels, - post_conv=True, - use_dropout=False, - dropout_prob=0.1, - norm=nn.BatchNorm2d, - upsampling_mode="transpose", - ): - """ - :param in_channels: Number of input channels - :param out_channels: Number of output channels - :param post_conv: Whether to have another convolutional layer after the upsampling layer. - :param use_dropout: bool. Whether to use dropout or not. - :param dropout_prob: Float. The dropout probability (if use_dropout is True) - :param norm: Which norm to use. If None, no norm is used. Default is Batchnorm with affinity. - :param upsampling_mode: Which upsampling mode: - transpose: Upsampling with stride-2, kernel size 4 transpose convolutions. - bilinear: Feature map is upsampled with bilinear upsampling, then a conv layer. - nearest: Feature map is upsampled with nearest neighbor upsampling, then a conv layer. - shuffle: Feature map is upsampled with pixel shuffling, then a conv layer. - """ - super().__init__() - - net = list() - - if upsampling_mode == "transpose": - net += [ - nn.ConvTranspose2d( - in_channels, - out_channels, - kernel_size=4, - stride=2, - padding=1, - bias=True if norm is None else False, - ) - ] - elif upsampling_mode == "bilinear": - net += [nn.UpsamplingBilinear2d(scale_factor=2)] - net += [ - Conv2dSame( - in_channels, - out_channels, - kernel_size=3, - bias=True if norm is None else False, - ) - ] - elif upsampling_mode == "nearest": - net += [nn.UpsamplingNearest2d(scale_factor=2)] - net += [ - Conv2dSame( - in_channels, - out_channels, - kernel_size=3, - bias=True if norm is None else False, - ) - ] - elif upsampling_mode == "shuffle": - net += [nn.PixelShuffle(upscale_factor=2)] - net += [ - Conv2dSame( - in_channels // 4, - out_channels, - kernel_size=3, - bias=True if norm is None else False, - ) - ] - else: - raise ValueError("Unknown upsampling mode!") - - if norm is not None: - net += [norm(out_channels, affine=True)] - - net += [nn.ReLU(True)] - - if use_dropout: - net += [nn.Dropout2d(dropout_prob, False)] - - if post_conv: - net += [ - Conv2dSame( - out_channels, - out_channels, - kernel_size=3, - bias=True if norm is None else False, - ) - ] - - if norm is not None: - net += [norm(out_channels, affine=True)] - - net += [nn.ReLU(True)] - - if use_dropout: - net += [nn.Dropout2d(0.1, False)] - - self.net = nn.Sequential(*net) - - def forward(self, x, skipped=None): - if skipped is not None: - input = torch.cat([skipped, x], dim=1) - else: - input = x - return self.net(input) - - -class DownBlock(nn.Module): - """A 2D-conv downsampling block following best practices / with reasonable defaults - (LeakyReLU, kernel size multiple of stride) - """ - - def __init__( - self, - in_channels, - out_channels, - prep_conv=True, - middle_channels=None, - use_dropout=False, - dropout_prob=0.1, - norm=nn.BatchNorm2d, - ): - """ - :param in_channels: Number of input channels - :param out_channels: Number of output channels - :param prep_conv: Whether to have another convolutional layer before the downsampling layer. - :param middle_channels: If prep_conv is true, this sets the number of channels between the prep and downsampling - convs. - :param use_dropout: bool. Whether to use dropout or not. - :param dropout_prob: Float. The dropout probability (if use_dropout is True) - :param norm: Which norm to use. If None, no norm is used. Default is Batchnorm with affinity. - """ - super().__init__() - - if middle_channels is None: - middle_channels = in_channels - - net = list() - - if prep_conv: - net += [ - nn.ReflectionPad2d(1), - nn.Conv2d( - in_channels, - middle_channels, - kernel_size=3, - padding=0, - stride=1, - bias=True if norm is None else False, - ), - ] - - if norm is not None: - net += [norm(middle_channels, affine=True)] - - net += [nn.LeakyReLU(0.2, True)] - - if use_dropout: - net += [nn.Dropout2d(dropout_prob, False)] - - net += [ - nn.ReflectionPad2d(1), - nn.Conv2d( - middle_channels, - out_channels, - kernel_size=4, - padding=0, - stride=2, - bias=True if norm is None else False, - ), - ] - - if norm is not None: - net += [norm(out_channels, affine=True)] - - net += [nn.LeakyReLU(0.2, True)] - - if use_dropout: - net += [nn.Dropout2d(dropout_prob, False)] - - self.net = nn.Sequential(*net) - - def forward(self, x): - return self.net(x) - - -class Unet3d(nn.Module): - """A 3d-Unet implementation with sane defaults.""" - - def __init__( - self, - in_channels, - out_channels, - nf0, - num_down, - max_channels, - norm=nn.BatchNorm3d, - outermost_linear=False, - ): - """ - :param in_channels: Number of input channels - :param out_channels: Number of output channels - :param nf0: Number of features at highest level of U-Net - :param num_down: Number of downsampling stages. - :param max_channels: Maximum number of channels (channels multiply by 2 with every downsampling stage) - :param norm: Which norm to use. If None, no norm is used. Default is Batchnorm with affinity. - :param outermost_linear: Whether the output layer should be a linear layer or a nonlinear one. - """ - super().__init__() - - assert num_down > 0, "Need at least one downsampling layer in UNet3d." - - # Define the in block - self.in_layer = [Conv3dSame(in_channels, nf0, kernel_size=3, bias=False)] - - if norm is not None: - self.in_layer += [norm(nf0, affine=True)] - - self.in_layer += [nn.LeakyReLU(0.2, True)] - self.in_layer = nn.Sequential(*self.in_layer) - - # Define the center UNet block. The feature map has height and width 1 --> no batchnorm. - self.unet_block = UnetSkipConnectionBlock3d( - int(min(2 ** (num_down - 1) * nf0, max_channels)), - int(min(2 ** (num_down - 1) * nf0, max_channels)), - norm=None, - ) - for i in list(range(0, num_down - 1))[::-1]: - self.unet_block = UnetSkipConnectionBlock3d( - int(min(2 ** i * nf0, max_channels)), - int(min(2 ** (i + 1) * nf0, max_channels)), - submodule=self.unet_block, - norm=norm, - ) - - # Define the out layer. Each unet block concatenates its inputs with its outputs - so the output layer - # automatically receives the output of the in_layer and the output of the last unet layer. - self.out_layer = [ - Conv3dSame(2 * nf0, out_channels, kernel_size=3, bias=outermost_linear) - ] - - if not outermost_linear: - if norm is not None: - self.out_layer += [norm(out_channels, affine=True)] - self.out_layer += [nn.ReLU(True)] - self.out_layer = nn.Sequential(*self.out_layer) - - def forward(self, x): - in_layer = self.in_layer(x) - unet = self.unet_block(in_layer) - out_layer = self.out_layer(unet) - return out_layer - - -class UnetSkipConnectionBlock3d(nn.Module): - """Helper class for building a 3D unet.""" - - def __init__(self, outer_nc, inner_nc, norm=nn.BatchNorm3d, submodule=None): - super().__init__() - - if submodule is None: - model = [ - DownBlock3D(outer_nc, inner_nc, norm=norm), - UpBlock3D(inner_nc, outer_nc, norm=norm), - ] - else: - model = [ - DownBlock3D(outer_nc, inner_nc, norm=norm), - submodule, - UpBlock3D(2 * inner_nc, outer_nc, norm=norm), - ] - - self.model = nn.Sequential(*model) - - def forward(self, x): - forward_passed = self.model(x) - return torch.cat([x, forward_passed], 1) - - -class UnetSkipConnectionBlock(nn.Module): - """Helper class for building a 2D unet.""" - - def __init__( - self, - outer_nc, - inner_nc, - upsampling_mode, - norm=nn.BatchNorm2d, - submodule=None, - use_dropout=False, - dropout_prob=0.1, - ): - super().__init__() - - if submodule is None: - model = [ - DownBlock( - outer_nc, - inner_nc, - use_dropout=use_dropout, - dropout_prob=dropout_prob, - norm=norm, - ), - UpBlock( - inner_nc, - outer_nc, - use_dropout=use_dropout, - dropout_prob=dropout_prob, - norm=norm, - upsampling_mode=upsampling_mode, - ), - ] - else: - model = [ - DownBlock( - outer_nc, - inner_nc, - use_dropout=use_dropout, - dropout_prob=dropout_prob, - norm=norm, - ), - submodule, - UpBlock( - 2 * inner_nc, - outer_nc, - use_dropout=use_dropout, - dropout_prob=dropout_prob, - norm=norm, - upsampling_mode=upsampling_mode, - ), - ] - - self.model = nn.Sequential(*model) - - def forward(self, x): - forward_passed = self.model(x) - return torch.cat([x, forward_passed], 1) - - -class Unet(nn.Module): - """A 2d-Unet implementation with sane defaults.""" - - def __init__( - self, - in_channels, - out_channels, - nf0, - num_down, - max_channels, - use_dropout, - upsampling_mode="transpose", - dropout_prob=0.1, - norm=nn.BatchNorm2d, - outermost_linear=False, - ): - """ - :param in_channels: Number of input channels - :param out_channels: Number of output channels - :param nf0: Number of features at highest level of U-Net - :param num_down: Number of downsampling stages. - :param max_channels: Maximum number of channels (channels multiply by 2 with every downsampling stage) - :param use_dropout: Whether to use dropout or no. - :param dropout_prob: Dropout probability if use_dropout=True. - :param upsampling_mode: Which type of upsampling should be used. See "UpBlock" for documentation. - :param norm: Which norm to use. If None, no norm is used. Default is Batchnorm with affinity. - :param outermost_linear: Whether the output layer should be a linear layer or a nonlinear one. - """ - super().__init__() - - assert num_down > 0, "Need at least one downsampling layer in UNet." - - # Define the in block - self.in_layer = [ - Conv2dSame( - in_channels, nf0, kernel_size=3, bias=True if norm is None else False - ) - ] - if norm is not None: - self.in_layer += [norm(nf0, affine=True)] - self.in_layer += [nn.LeakyReLU(0.2, True)] - - if use_dropout: - self.in_layer += [nn.Dropout2d(dropout_prob)] - self.in_layer = nn.Sequential(*self.in_layer) - - # Define the center UNet block - self.unet_block = UnetSkipConnectionBlock( - min(2 ** (num_down - 1) * nf0, max_channels), - min(2 ** (num_down - 1) * nf0, max_channels), - use_dropout=use_dropout, - dropout_prob=dropout_prob, - norm=None, # Innermost has no norm (spatial dimension 1) - upsampling_mode=upsampling_mode, - ) - - for i in list(range(0, num_down - 1))[::-1]: - self.unet_block = UnetSkipConnectionBlock( - min(2 ** i * nf0, max_channels), - min(2 ** (i + 1) * nf0, max_channels), - use_dropout=use_dropout, - dropout_prob=dropout_prob, - submodule=self.unet_block, - norm=norm, - upsampling_mode=upsampling_mode, - ) - - # Define the out layer. Each unet block concatenates its inputs with its outputs - so the output layer - # automatically receives the output of the in_layer and the output of the last unet layer. - self.out_layer = [ - Conv2dSame( - 2 * nf0, - out_channels, - kernel_size=3, - bias=outermost_linear or (norm is None), - ) - ] - - if not outermost_linear: - if norm is not None: - self.out_layer += [norm(out_channels, affine=True)] - self.out_layer += [nn.ReLU(True)] - - if use_dropout: - self.out_layer += [nn.Dropout2d(dropout_prob)] - self.out_layer = nn.Sequential(*self.out_layer) - - self.out_layer_weight = self.out_layer[0].weight - - def forward(self, x): - in_layer = self.in_layer(x) - unet = self.unet_block(in_layer) - out_layer = self.out_layer(unet) - return out_layer - - -class Identity(nn.Module): - """Helper module to allow Downsampling and Upsampling nets to default to identity if they receive an empty list.""" - - def __init__(self): - super().__init__() - - def forward(self, input): - return input - - -class DownsamplingNet(nn.Module): - """A subnetwork that downsamples a 2D feature map with strided convolutions.""" - - def __init__( - self, - per_layer_out_ch, - in_channels, - use_dropout, - dropout_prob=0.1, - last_layer_one=False, - norm=nn.BatchNorm2d, - ): - """ - :param per_layer_out_ch: python list of integers. Defines the number of output channels per layer. Length of - list defines number of downsampling steps (each step dowsamples by factor of 2.) - :param in_channels: Number of input channels. - :param use_dropout: Whether or not to use dropout. - :param dropout_prob: Dropout probability. - :param last_layer_one: Whether the output of the last layer will have a spatial size of 1. In that case, - the last layer will not have batchnorm, else, it will. - :param norm: Which norm to use. Defaults to BatchNorm. - """ - super().__init__() - - if not len(per_layer_out_ch): - self.downs = Identity() - else: - self.downs = list() - self.downs.append( - DownBlock( - in_channels, - per_layer_out_ch[0], - use_dropout=use_dropout, - dropout_prob=dropout_prob, - middle_channels=per_layer_out_ch[0], - norm=norm, - ) - ) - for i in range(0, len(per_layer_out_ch) - 1): - if last_layer_one and (i == len(per_layer_out_ch) - 2): - norm = None - self.downs.append( - DownBlock( - per_layer_out_ch[i], - per_layer_out_ch[i + 1], - dropout_prob=dropout_prob, - use_dropout=use_dropout, - norm=norm, - ) - ) - self.downs = nn.Sequential(*self.downs) - - def forward(self, input): - return self.downs(input) - - -class UpsamplingNet(nn.Module): - """A subnetwork that upsamples a 2D feature map with a variety of upsampling options.""" - - def __init__( - self, - per_layer_out_ch, - in_channels, - upsampling_mode, - use_dropout, - dropout_prob=0.1, - first_layer_one=False, - norm=nn.BatchNorm2d, - ): - """ - :param per_layer_out_ch: python list of integers. Defines the number of output channels per layer. Length of - list defines number of upsampling steps (each step upsamples by factor of 2.) - :param in_channels: Number of input channels. - :param upsampling_mode: Mode of upsampling. For documentation, see class "UpBlock" - :param use_dropout: Whether or not to use dropout. - :param dropout_prob: Dropout probability. - :param first_layer_one: Whether the input to the last layer will have a spatial size of 1. In that case, - the first layer will not have a norm, else, it will. - :param norm: Which norm to use. Defaults to BatchNorm. - """ - super().__init__() - - if not len(per_layer_out_ch): - self.ups = Identity() - else: - self.ups = list() - self.ups.append( - UpBlock( - in_channels, - per_layer_out_ch[0], - use_dropout=use_dropout, - dropout_prob=dropout_prob, - norm=None if first_layer_one else norm, - upsampling_mode=upsampling_mode, - ) - ) - for i in range(0, len(per_layer_out_ch) - 1): - self.ups.append( - UpBlock( - per_layer_out_ch[i], - per_layer_out_ch[i + 1], - use_dropout=use_dropout, - dropout_prob=dropout_prob, - norm=norm, - upsampling_mode=upsampling_mode, - ) - ) - self.ups = nn.Sequential(*self.ups) - - def forward(self, input): - return self.ups(input) diff --git a/pytorch3d/pytorch3d/implicitron/tools/__init__.py b/pytorch3d/pytorch3d/implicitron/tools/__init__.py deleted file mode 100644 index 2e41cd717f6a439a9c08d76a9d0e4a54e190fc5a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. diff --git a/pytorch3d/pytorch3d/implicitron/tools/camera_utils.py b/pytorch3d/pytorch3d/implicitron/tools/camera_utils.py deleted file mode 100644 index ecf6e9fadbc559574de801af45f81ccf1788742b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/camera_utils.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -# TODO: all this potentially goes to PyTorch3D - -import math -from typing import Tuple - -import pytorch3d as pt3d -import torch -from pytorch3d.renderer.cameras import CamerasBase - - -def jitter_extrinsics( - R: torch.Tensor, - T: torch.Tensor, - max_angle: float = (math.pi * 2.0), - translation_std: float = 1.0, - scale_std: float = 0.3, -): - """ - Jitter the extrinsic camera parameters `R` and `T` with a random similarity - transformation. The transformation rotates by a random angle between [0, max_angle]; - scales by a random factor exp(N(0, scale_std)), where N(0, scale_std) is - a random sample from a normal distrubtion with zero mean and variance scale_std; - and translates by a 3D offset sampled from N(0, translation_std). - """ - assert all(x >= 0.0 for x in (max_angle, translation_std, scale_std)) - N = R.shape[0] - R_jit = pt3d.transforms.random_rotations(1, device=R.device) - R_jit = pt3d.transforms.so3_exponential_map( - pt3d.transforms.so3_log_map(R_jit) * max_angle - ) - T_jit = torch.randn_like(R_jit[:1, :, 0]) * translation_std - rigid_transform = pt3d.ops.eyes(dim=4, N=N, device=R.device) - rigid_transform[:, :3, :3] = R_jit.expand(N, 3, 3) - rigid_transform[:, 3, :3] = T_jit.expand(N, 3) - scale_jit = torch.exp(torch.randn_like(T_jit[:, 0]) * scale_std).expand(N) - return apply_camera_alignment(R, T, rigid_transform, scale_jit) - - -def apply_camera_alignment( - R: torch.Tensor, - T: torch.Tensor, - rigid_transform: torch.Tensor, - scale: torch.Tensor, -): - """ - Args: - R: Camera rotation matrix of shape (N, 3, 3). - T: Camera translation of shape (N, 3). - rigid_transform: A tensor of shape (N, 4, 4) representing a batch of - N 4x4 tensors that map the scene pointcloud from misaligned coords - to the aligned space. - scale: A list of N scaling factors. A tensor of shape (N,) - - Returns: - R_aligned: The aligned rotations R. - T_aligned: The aligned translations T. - """ - R_rigid = rigid_transform[:, :3, :3] - T_rigid = rigid_transform[:, 3:, :3] - R_aligned = R_rigid.permute(0, 2, 1).bmm(R) - T_aligned = scale[:, None] * (T - (T_rigid @ R_aligned)[:, 0]) - return R_aligned, T_aligned - - -def get_min_max_depth_bounds(cameras, scene_center, scene_extent): - """ - Estimate near/far depth plane as: - near = dist(cam_center, self.scene_center) - self.scene_extent - far = dist(cam_center, self.scene_center) + self.scene_extent - """ - cam_center = cameras.get_camera_center() - center_dist = ( - ((cam_center - scene_center.to(cameras.R)[None]) ** 2) - .sum(dim=-1) - .clamp(0.001) - .sqrt() - ) - center_dist = center_dist.clamp(scene_extent + 1e-3) - min_depth = center_dist - scene_extent - max_depth = center_dist + scene_extent - return min_depth, max_depth - - -def volumetric_camera_overlaps( - cameras: CamerasBase, - scene_extent: float = 8.0, - scene_center: Tuple[float, float, float] = (0.0, 0.0, 0.0), - resol: int = 16, - weigh_by_ray_angle: bool = True, -): - """ - Compute the overlaps between viewing frustrums of all pairs of cameras - in `cameras`. - """ - device = cameras.device - ba = cameras.R.shape[0] - n_vox = int(resol**3) - grid = pt3d.structures.Volumes( - densities=torch.zeros([1, 1, resol, resol, resol], device=device), - volume_translation=-torch.FloatTensor(scene_center)[None].to(device), - voxel_size=2.0 * scene_extent / resol, - ).get_coord_grid(world_coordinates=True) - - grid = grid.view(1, n_vox, 3).expand(ba, n_vox, 3) - gridp = cameras.transform_points(grid, eps=1e-2) - proj_in_camera = ( - torch.prod((gridp[..., :2].abs() <= 1.0), dim=-1) - * (gridp[..., 2] > 0.0).float() - ) # ba x n_vox - - if weigh_by_ray_angle: - rays = torch.nn.functional.normalize( - grid - cameras.get_camera_center()[:, None], dim=-1 - ) - rays_masked = rays * proj_in_camera[..., None] - - # - slow and readable: - # inter = torch.zeros(ba, ba) - # for i1 in range(ba): - # for i2 in range(ba): - # inter[i1, i2] = ( - # 1 + (rays_masked[i1] * rays_masked[i2] - # ).sum(dim=-1)).sum() - - # - fast: - rays_masked = rays_masked.view(ba, n_vox * 3) - inter = n_vox + (rays_masked @ rays_masked.t()) - - else: - inter = proj_in_camera @ proj_in_camera.t() - - mass = torch.diag(inter) - iou = inter / (mass[:, None] + mass[None, :] - inter).clamp(0.1) - - return iou diff --git a/pytorch3d/pytorch3d/implicitron/tools/circle_fitting.py b/pytorch3d/pytorch3d/implicitron/tools/circle_fitting.py deleted file mode 100644 index 2f50278933a690e0e1f4bdcff1e33408b70bc4d1..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/circle_fitting.py +++ /dev/null @@ -1,238 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import warnings -from dataclasses import dataclass -from math import pi -from typing import Optional - -import torch - - -def get_rotation_to_best_fit_xy( - points: torch.Tensor, centroid: Optional[torch.Tensor] = None -) -> torch.Tensor: - """ - Returns a rotation R such that `points @ R` has a best fit plane - parallel to the xy plane - - Args: - points: (*, N, 3) tensor of points in 3D - centroid: (*, 1, 3), (3,) or scalar: their centroid - - Returns: - (*, 3, 3) tensor rotation matrix - """ - if centroid is None: - centroid = points.mean(dim=-2, keepdim=True) - - points_centered = points - centroid - _, evec = torch.linalg.eigh(points_centered.transpose(-1, -2) @ points_centered) - # in general, evec can form either right- or left-handed basis, - # but we need the former to have a proper rotation (not reflection) - return torch.cat( - (evec[..., 1:], torch.cross(evec[..., 1], evec[..., 2])[..., None]), dim=-1 - ) - - -def _signed_area(path: torch.Tensor) -> torch.Tensor: - """ - Calculates the signed area / LΓ©vy area of a 2D path. If the path is closed, - i.e. ends where it starts, this is the integral of the winding number over - the whole plane. If not, consider a closed path made by adding a straight - line from the end to the start; the signed area is the integral of the - winding number (also over the plane) with respect to that closed path. - - If this number is positive, it indicates in some sense that the path - turns anticlockwise more than clockwise, and vice versa. - - Args: - path: N x 2 tensor of points. - - Returns: - signed area, shape () - """ - # This calculation is a sum of areas of triangles of the form - # (path[0], path[i], path[i+1]), where each triangle is half a - # parallelogram. - x, y = (path[1:] - path[:1]).unbind(1) - return (y[1:] * x[:-1] - x[1:] * y[:-1]).sum() * 0.5 - - -@dataclass(frozen=True) -class Circle2D: - """ - Contains details of a circle in a plane. - Members - center: tensor shape (2,) - radius: tensor shape () - generated_points: points around the circle, shape (n_points, 2) - """ - - center: torch.Tensor - radius: torch.Tensor - generated_points: torch.Tensor - - -def fit_circle_in_2d( - points2d, *, n_points: int = 0, angles: Optional[torch.Tensor] = None -) -> Circle2D: - """ - Simple best fitting of a circle to 2D points. In particular, the circle which - minimizes the sum of the squares of the squared-distances to the circle. - - Finds (a,b) and r to minimize the sum of squares (over the x,y pairs) of - r**2 - [(x-a)**2+(y-b)**2] - i.e. - (2*a)*x + (2*b)*y + (r**2 - a**2 - b**2)*1 - (x**2 + y**2) - - In addition, generates points along the circle. If angles is None (default) - then n_points around the circle equally spaced are given. These begin at the - point closest to the first input point. They continue in the direction which - seems to match the movement of points in points2d, as judged by its - signed area. If `angles` are provided, then n_points is ignored, and points - along the circle at the given angles are returned, with the starting point - and direction as before. - - (Note that `generated_points` is affected by the order of the points in - points2d, but the other outputs are not.) - - Args: - points2d: N x 2 tensor of 2D points - n_points: number of points to generate on the circle, if angles not given - angles: optional angles in radians of points to generate. - - Returns: - Circle2D object - """ - design = torch.cat([points2d, torch.ones_like(points2d[:, :1])], dim=1) - rhs = (points2d**2).sum(1) - n_provided = points2d.shape[0] - if n_provided < 3: - raise ValueError(f"{n_provided} points are not enough to determine a circle") - solution = torch.linalg.lstsq(design, rhs[:, None]).solution - center = solution[:2, 0] / 2 - radius = torch.sqrt(solution[2, 0] + (center**2).sum()) - if n_points > 0: - if angles is not None: - warnings.warn("n_points ignored because angles provided") - else: - angles = torch.linspace(0, 2 * pi, n_points, device=points2d.device) - - if angles is not None: - initial_direction_xy = (points2d[0] - center).unbind() - initial_angle = torch.atan2(initial_direction_xy[1], initial_direction_xy[0]) - with torch.no_grad(): - anticlockwise = _signed_area(points2d) > 0 - if anticlockwise: - use_angles = initial_angle + angles - else: - use_angles = initial_angle - angles - generated_points = center[None] + radius * torch.stack( - [torch.cos(use_angles), torch.sin(use_angles)], dim=-1 - ) - else: - generated_points = points2d.new_zeros(0, 2) - return Circle2D(center=center, radius=radius, generated_points=generated_points) - - -@dataclass(frozen=True) -class Circle3D: - """ - Contains details of a circle in 3D. - Members - center: tensor shape (3,) - radius: tensor shape () - normal: tensor shape (3,) - generated_points: points around the circle, shape (n_points, 3) - """ - - center: torch.Tensor - radius: torch.Tensor - normal: torch.Tensor - generated_points: torch.Tensor - - -def fit_circle_in_3d( - points, - *, - n_points: int = 0, - angles: Optional[torch.Tensor] = None, - offset: Optional[torch.Tensor] = None, - up: Optional[torch.Tensor] = None, -) -> Circle3D: - """ - Simple best fit circle to 3D points. Uses circle_2d in the - least-squares best fit plane. - - In addition, generates points along the circle. If angles is None (default) - then n_points around the circle equally spaced are given. These begin at the - point closest to the first input point. They continue in the direction which - seems to be match the movement of points. If angles is provided, then n_points - is ignored, and points along the circle at the given angles are returned, - with the starting point and direction as before. - - Further, an offset can be given to add to the generated points; this is - interpreted in a rotated coordinate system where (0, 0, 1) is normal to the - circle, specifically the normal which is approximately in the direction of a - given `up` vector. The remaining rotation is disambiguated in an unspecified - but deterministic way. - - (Note that `generated_points` is affected by the order of the points in - points, but the other outputs are not.) - - Args: - points2d: N x 3 tensor of 3D points - n_points: number of points to generate on the circle - angles: optional angles in radians of points to generate. - offset: optional tensor (3,), a displacement expressed in a "canonical" - coordinate system to add to the generated points. - up: optional tensor (3,), a vector which helps define the - "canonical" coordinate system for interpretting `offset`. - Required if offset is used. - - - Returns: - Circle3D object - """ - centroid = points.mean(0) - r = get_rotation_to_best_fit_xy(points, centroid) - normal = r[:, 2] - rotated_points = (points - centroid) @ r - result_2d = fit_circle_in_2d( - rotated_points[:, :2], n_points=n_points, angles=angles - ) - center_3d = result_2d.center @ r[:, :2].t() + centroid - n_generated_points = result_2d.generated_points.shape[0] - if n_generated_points > 0: - generated_points_in_plane = torch.cat( - [ - result_2d.generated_points, - torch.zeros_like(result_2d.generated_points[:, :1]), - ], - dim=1, - ) - if offset is not None: - if up is None: - raise ValueError("Missing `up` input for interpreting offset") - with torch.no_grad(): - swap = torch.dot(up, normal) < 0 - if swap: - # We need some rotation which takes +z to -z. Here's one. - generated_points_in_plane += offset * offset.new_tensor([1, -1, -1]) - else: - generated_points_in_plane += offset - - generated_points = generated_points_in_plane @ r.t() + centroid - else: - generated_points = points.new_zeros(0, 3) - - return Circle3D( - radius=result_2d.radius, - center=center_3d, - normal=normal, - generated_points=generated_points, - ) diff --git a/pytorch3d/pytorch3d/implicitron/tools/config.py b/pytorch3d/pytorch3d/implicitron/tools/config.py deleted file mode 100644 index 0fb4012e6546242b7dced42a48c57fe52c0fb495..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/config.py +++ /dev/null @@ -1,1208 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import dataclasses -import inspect -import itertools -import sys -import warnings -from collections import Counter, defaultdict -from enum import Enum -from functools import partial -from typing import ( - Any, - Callable, - Dict, - get_args, - get_origin, - List, - Optional, - Tuple, - Type, - TypeVar, - Union, -) - -from omegaconf import DictConfig, OmegaConf, open_dict - - -""" -This functionality allows a configurable system to be determined in a dataclass-type -way. It is a generalization of omegaconf's "structured", in the dataclass case. -Core functionality: - -- Configurable -- A base class used to label a class as being one which uses this - system. Uses class members and __post_init__ like a dataclass. - -- expand_args_fields -- Expands a class like `dataclasses.dataclass`. Runs automatically. - -- get_default_args -- gets an omegaconf.DictConfig for initializing a given class. - -- run_auto_creation -- Initialises nested members. To be called in __post_init__. - - -In addition, a Configurable may contain members whose type is decided at runtime. - -- ReplaceableBase -- As a base instead of Configurable, labels a class to say that - any child class can be used instead. - -- registry -- A global store of named child classes of ReplaceableBase classes. - Used as `@registry.register` decorator on class definition. - - -Additional utility functions: - -- remove_unused_components -- used for simplifying a DictConfig instance. -- get_default_args_field -- default for DictConfig member of another configurable. -- enable_get_default_args -- Allows get_default_args on a function or plain class. - - -1. The simplest usage of this functionality is as follows. First a schema is defined -in dataclass style. - - class A(Configurable): - n: int = 9 - - class B(Configurable): - a: A - - def __post_init__(self): - run_auto_creation(self) - -Then it can be used like - - b_args = get_default_args(B) - b = B(**b_args) - -In this case, get_default_args(B) returns an omegaconf.DictConfig with the right -members {"a_args": {"n": 9}}. It also modifies the definitions of the classes to -something like the following. (The modification itself is done by the function -`expand_args_fields`, which is called inside `get_default_args`.) - - @dataclasses.dataclass - class A: - n: int = 9 - - @dataclasses.dataclass - class B: - a_args: DictConfig = dataclasses.field(default_factory=lambda: DictConfig({"n": 9})) - - def __post_init__(self): - self.a = A(**self.a_args) - -2. Pluggability. Instead of a dataclass-style member being given a concrete class, -it can be given a base class and the implementation will be looked up by name in the -global `registry` in this module. E.g. - - class A(ReplaceableBase): - k: int = 1 - - @registry.register - class A1(A): - m: int = 3 - - @registry.register - class A2(A): - n: str = "2" - - class B(Configurable): - a: A - a_class_type: str = "A2" - b: Optional[A] - b_class_type: Optional[str] = "A2" - - def __post_init__(self): - run_auto_creation(self) - -will expand to - - @dataclasses.dataclass - class A: - k: int = 1 - - @dataclasses.dataclass - class A1(A): - m: int = 3 - - @dataclasses.dataclass - class A2(A): - n: str = "2" - - @dataclasses.dataclass - class B: - a_class_type: str = "A2" - a_A1_args: DictConfig = dataclasses.field( - default_factory=lambda: DictConfig({"k": 1, "m": 3} - ) - a_A2_args: DictConfig = dataclasses.field( - default_factory=lambda: DictConfig({"k": 1, "n": 2} - ) - b_class_type: Optional[str] = "A2" - b_A1_args: DictConfig = dataclasses.field( - default_factory=lambda: DictConfig({"k": 1, "m": 3} - ) - b_A2_args: DictConfig = dataclasses.field( - default_factory=lambda: DictConfig({"k": 1, "n": 2} - ) - - def __post_init__(self): - if self.a_class_type == "A1": - self.a = A1(**self.a_A1_args) - elif self.a_class_type == "A2": - self.a = A2(**self.a_A2_args) - else: - raise ValueError(...) - - if self.b_class_type is None: - self.b = None - elif self.b_class_type == "A1": - self.b = A1(**self.b_A1_args) - elif self.b_class_type == "A2": - self.b = A2(**self.b_A2_args) - else: - raise ValueError(...) - -3. Aside from these classes, the members of these classes should be things -which DictConfig is happy with: e.g. (bool, int, str, None, float) and what -can be built from them with `DictConfig`s and lists of them. - -In addition, you can call `get_default_args` on a function or class to get -the `DictConfig` of its defaulted arguments, assuming those are all things -which `DictConfig` is happy with, so long as you add a call to -`enable_get_default_args` after its definition. If you want to use such a -thing as the default for a member of another configured class, -`get_default_args_field` is a helper. -""" - - -TYPE_SUFFIX: str = "_class_type" -ARGS_SUFFIX: str = "_args" -ENABLED_SUFFIX: str = "_enabled" -CREATE_PREFIX: str = "create_" -IMPL_SUFFIX: str = "_impl" -TWEAK_SUFFIX: str = "_tweak_args" -_DATACLASS_INIT: str = "__dataclass_own_init__" -PRE_EXPAND_NAME: str = "pre_expand" - - -class ReplaceableBase: - """ - Base class for a class (a "replaceable") which is a base class for - dataclass-style implementations. The implementations can be stored - in the registry. They get expanded into dataclasses with expand_args_fields. - This expansion is delayed. - """ - - def __new__(cls, *args, **kwargs): - """ - These classes should be expanded only when needed (because processing - fixes the list of replaceable subclasses of members of the class). It - is safer if users expand the classes explicitly. But if the class gets - instantiated when it hasn't been processed, we expand it here. - """ - obj = super().__new__(cls) - if cls is not ReplaceableBase and not _is_actually_dataclass(cls): - expand_args_fields(cls) - return obj - - -class Configurable: - """ - Base class for dataclass-style classes which are not replaceable. These get - expanded into a dataclass with expand_args_fields. - This expansion is delayed. - """ - - def __new__(cls, *args, **kwargs): - """ - These classes should be expanded only when needed (because processing - fixes the list of replaceable subclasses of members of the class). It - is safer if users expand the classes explicitly. But if the class gets - instantiated when it hasn't been processed, we expand it here. - """ - obj = super().__new__(cls) - if cls is not Configurable and not _is_actually_dataclass(cls): - expand_args_fields(cls) - return obj - - -_X = TypeVar("X", bound=ReplaceableBase) -_Y = TypeVar("Y", bound=Union[ReplaceableBase, Configurable]) - - -class _Registry: - """ - Register from names to classes. In particular, we say that direct subclasses of - ReplaceableBase are "base classes" and we register subclasses of each base class - in a separate namespace. - """ - - def __init__(self) -> None: - self._mapping: Dict[ - Type[ReplaceableBase], Dict[str, Type[ReplaceableBase]] - ] = defaultdict(dict) - - def register(self, some_class: Type[_X]) -> Type[_X]: - """ - A class decorator, to register a class in self. - """ - name = some_class.__name__ - self._register(some_class, name=name) - return some_class - - def _register( - self, - some_class: Type[ReplaceableBase], - *, - base_class: Optional[Type[ReplaceableBase]] = None, - name: str, - ) -> None: - """ - Register a new member. - - Args: - cls: the new member - base_class: (optional) what the new member is a type for - name: name for the new member - """ - if base_class is None: - base_class = self._base_class_from_class(some_class) - if base_class is None: - raise ValueError( - f"Cannot register {some_class}. Cannot tell what it is." - ) - self._mapping[base_class][name] = some_class - - def get(self, base_class_wanted: Type[_X], name: str) -> Type[_X]: - """ - Retrieve a class from the registry by name - - Args: - base_class_wanted: parent type of type we are looking for. - It determines the namespace. - This will typically be a direct subclass of ReplaceableBase. - name: what to look for - - Returns: - class type - """ - if self._is_base_class(base_class_wanted): - base_class = base_class_wanted - else: - base_class = self._base_class_from_class(base_class_wanted) - if base_class is None: - raise ValueError( - f"Cannot look up {base_class_wanted}. Cannot tell what it is." - ) - if not isinstance(name, str): - raise ValueError( - f"Cannot look up a {type(name)} in the registry. Got {name}." - ) - result = self._mapping[base_class].get(name) - if result is None: - raise ValueError(f"{name} has not been registered.") - if not issubclass(result, base_class_wanted): - raise ValueError( - f"{name} resolves to {result} which does not subclass {base_class_wanted}" - ) - # pyre-ignore[7] - return result - - def get_all( - self, base_class_wanted: Type[ReplaceableBase] - ) -> List[Type[ReplaceableBase]]: - """ - Retrieve all registered implementations from the registry - - Args: - base_class_wanted: parent type of type we are looking for. - It determines the namespace. - This will typically be a direct subclass of ReplaceableBase. - Returns: - list of class types in alphabetical order of registered name. - """ - if self._is_base_class(base_class_wanted): - source = self._mapping[base_class_wanted] - return [source[key] for key in sorted(source)] - - base_class = self._base_class_from_class(base_class_wanted) - if base_class is None: - raise ValueError( - f"Cannot look up {base_class_wanted}. Cannot tell what it is." - ) - source = self._mapping[base_class] - return [ - source[key] - for key in sorted(source) - if issubclass(source[key], base_class_wanted) - and source[key] is not base_class_wanted - ] - - @staticmethod - def _is_base_class(some_class: Type[ReplaceableBase]) -> bool: - """ - Return whether the given type is a direct subclass of ReplaceableBase - and so gets used as a namespace. - """ - return ReplaceableBase in some_class.__bases__ - - @staticmethod - def _base_class_from_class( - some_class: Type[ReplaceableBase], - ) -> Optional[Type[ReplaceableBase]]: - """ - Find the parent class of some_class which inherits ReplaceableBase, or None - """ - for base in some_class.mro()[-3::-1]: - if base is not ReplaceableBase and issubclass(base, ReplaceableBase): - return base - return None - - -# Global instance of the registry -registry = _Registry() - - -class _ProcessType(Enum): - """ - Type of member which gets rewritten by expand_args_fields. - """ - - CONFIGURABLE = 1 - REPLACEABLE = 2 - OPTIONAL_CONFIGURABLE = 3 - OPTIONAL_REPLACEABLE = 4 - - -def _default_create( - name: str, type_: Type, process_type: _ProcessType -) -> Callable[[Any], None]: - """ - Return the default creation function for a member. This is a function which - could be called in __post_init__ to initialise the member, and will be called - from run_auto_creation. - - Args: - name: name of the member - type_: type of the member (with any Optional removed) - process_type: Shows whether member's declared type inherits ReplaceableBase, - in which case the actual type to be created is decided at - runtime. - - Returns: - Function taking one argument, the object whose member should be - initialized, i.e. self. - """ - impl_name = f"{CREATE_PREFIX}{name}{IMPL_SUFFIX}" - - def inner(self): - expand_args_fields(type_) - impl = getattr(self, impl_name) - args = getattr(self, name + ARGS_SUFFIX) - impl(True, args) - - def inner_optional(self): - expand_args_fields(type_) - impl = getattr(self, impl_name) - enabled = getattr(self, name + ENABLED_SUFFIX) - args = getattr(self, name + ARGS_SUFFIX) - impl(enabled, args) - - def inner_pluggable(self): - type_name = getattr(self, name + TYPE_SUFFIX) - impl = getattr(self, impl_name) - if type_name is None: - args = None - else: - args = getattr(self, f"{name}_{type_name}{ARGS_SUFFIX}", None) - impl(type_name, args) - - if process_type == _ProcessType.OPTIONAL_CONFIGURABLE: - return inner_optional - return inner if process_type == _ProcessType.CONFIGURABLE else inner_pluggable - - -def _default_create_impl( - name: str, type_: Type, process_type: _ProcessType -) -> Callable[[Any, Any, DictConfig], None]: - """ - Return the default internal function for initialising a member. This is a function - which could be called in the create_ function to initialise the member. - - Args: - name: name of the member - type_: type of the member (with any Optional removed) - process_type: Shows whether member's declared type inherits ReplaceableBase, - in which case the actual type to be created is decided at - runtime. - - Returns: - Function taking - - self, the object whose member should be initialized. - - option for what to do. This is - - for pluggables, the type to initialise or None to do nothing - - for non pluggables, a bool indicating whether to initialise. - - the args for initializing the member. - """ - - def create_configurable(self, enabled, args): - if enabled: - expand_args_fields(type_) - setattr(self, name, type_(**args)) - else: - setattr(self, name, None) - - def create_pluggable(self, type_name, args): - if type_name is None: - setattr(self, name, None) - return - - if not isinstance(type_name, str): - raise ValueError( - f"A {type(type_name)} was received as the type of {name}." - + f" Perhaps this is from {name}{TYPE_SUFFIX}?" - ) - chosen_class = registry.get(type_, type_name) - if self._known_implementations.get(type_name, chosen_class) is not chosen_class: - # If this warning is raised, it means that a new definition of - # the chosen class has been registered since our class was processed - # (i.e. expanded). A DictConfig which comes from our get_default_args - # (which might have triggered the processing) will contain the old default - # values for the members of the chosen class. Changes to those defaults which - # were made in the redefinition will not be reflected here. - warnings.warn(f"New implementation of {type_name} is being chosen.") - expand_args_fields(chosen_class) - setattr(self, name, chosen_class(**args)) - - if process_type in (_ProcessType.CONFIGURABLE, _ProcessType.OPTIONAL_CONFIGURABLE): - return create_configurable - return create_pluggable - - -def run_auto_creation(self: Any) -> None: - """ - Run all the functions named in self._creation_functions. - """ - for create_function in self._creation_functions: - getattr(self, create_function)() - - -def _is_configurable_class(C) -> bool: - return isinstance(C, type) and issubclass(C, (Configurable, ReplaceableBase)) - - -def get_default_args(C, *, _do_not_process: Tuple[type, ...] = ()) -> DictConfig: - """ - Get the DictConfig corresponding to the defaults in a dataclass or - configurable. Normal use is to provide a dataclass can be provided as C. - If enable_get_default_args has been called on a function or plain class, - then that function or class can be provided as C. - - If C is a subclass of Configurable or ReplaceableBase, we make sure - it has been processed with expand_args_fields. - - Args: - C: the class or function to be processed - _do_not_process: (internal use) When this function is called from - expand_args_fields, we specify any class currently being - processed, to make sure we don't try to process a class - while it is already being processed. - - Returns: - new DictConfig object, which is typed. - """ - if C is None: - return DictConfig({}) - - if _is_configurable_class(C): - if C in _do_not_process: - raise ValueError( - f"Internal recursion error. Need processed {C}," - f" but cannot get it. _do_not_process={_do_not_process}" - ) - # This is safe to run multiple times. It will return - # straight away if C has already been processed. - expand_args_fields(C, _do_not_process=_do_not_process) - - if dataclasses.is_dataclass(C): - # Note that if get_default_args_field is used somewhere in C, - # this call is recursive. No special care is needed, - # because in practice get_default_args_field is used for - # separate types than the outer type. - - try: - out: DictConfig = OmegaConf.structured(C) - except Exception: - print(f"### OmegaConf.structured({C}) failed ###") - # We don't use `raise From` here, because that gets the original - # exception hidden by the OC_CAUSE logic in the case where we are - # called by hydra. - raise - exclude = getattr(C, "_processed_members", ()) - with open_dict(out): - for field in exclude: - out.pop(field, None) - return out - - if _is_configurable_class(C): - raise ValueError(f"Failed to process {C}") - - if not inspect.isfunction(C) and not inspect.isclass(C): - raise ValueError(f"Unexpected {C}") - - dataclass_name = _dataclass_name_for_function(C) - dataclass = getattr(sys.modules[C.__module__], dataclass_name, None) - if dataclass is None: - raise ValueError( - f"Cannot get args for {C}. Was enable_get_default_args forgotten?" - ) - - try: - out: DictConfig = OmegaConf.structured(dataclass) - except Exception: - print(f"### OmegaConf.structured failed for {C.__name__} ###") - raise - return out - - -def _dataclass_name_for_function(C: Any) -> str: - """ - Returns the name of the dataclass which enable_get_default_args(C) - creates. - """ - name = f"_{C.__name__}_default_args_" - return name - - -def _field_annotations_for_default_args( - C: Any, -) -> List[Tuple[str, Any, dataclasses.Field]]: - """ - If C is a function or a plain class with an __init__ function, - return the fields which `enable_get_default_args(C)` will need - to make a dataclass with. - - Args: - C: a function, or a class with an __init__ function. Must - have types for all its defaulted args. - - Returns: - a list of fields for a dataclass. - """ - - field_annotations = [] - for pname, defval in _params_iter(C): - default = defval.default - if default == inspect.Parameter.empty: - # we do not have a default value for the parameter - continue - - if defval.annotation == inspect._empty: - raise ValueError( - "All arguments of the input to enable_get_default_args have to" - f" be typed. Argument '{pname}' does not have a type annotation." - ) - - _, annotation = _resolve_optional(defval.annotation) - - if isinstance(default, set): # force OmegaConf to convert it to ListConfig - default = tuple(default) - - if isinstance(default, (list, dict)): - # OmegaConf will convert to [Dict|List]Config, so it is safe to reuse the value - field_ = dataclasses.field(default_factory=lambda default=default: default) - elif not _is_immutable_type(annotation, default): - continue - else: - # we can use a simple default argument for dataclass.field - field_ = dataclasses.field(default=default) - field_annotations.append((pname, defval.annotation, field_)) - - return field_annotations - - -def enable_get_default_args(C: Any, *, overwrite: bool = True) -> None: - """ - If C is a function or a plain class with an __init__ function, - and you want get_default_args(C) to work, then add - `enable_get_default_args(C)` straight after the definition of C. - This makes a dataclass corresponding to the default arguments of C - and stores it in the same module as C. - - Args: - C: a function, or a class with an __init__ function. Must - have types for all its defaulted args. - overwrite: whether to allow calling this a second time on - the same function. - """ - if not inspect.isfunction(C) and not inspect.isclass(C): - raise ValueError(f"Unexpected {C}") - - field_annotations = _field_annotations_for_default_args(C) - - name = _dataclass_name_for_function(C) - module = sys.modules[C.__module__] - if hasattr(module, name): - if overwrite: - warnings.warn(f"Overwriting {name} in {C.__module__}.") - else: - raise ValueError(f"Cannot overwrite {name} in {C.__module__}.") - dc = dataclasses.make_dataclass(name, field_annotations) - dc.__module__ = C.__module__ - setattr(module, name, dc) - - -def _params_iter(C): - """Returns dict of keyword args of a class or function C.""" - if inspect.isclass(C): - return itertools.islice( # exclude `self` - inspect.signature(C.__init__).parameters.items(), 1, None - ) - - return inspect.signature(C).parameters.items() - - -def _is_immutable_type(type_: Type, val: Any) -> bool: - if val is None: - return True - - PRIMITIVE_TYPES = (int, float, bool, str, bytes, tuple) - # sometimes type can be too relaxed (e.g. Any), so we also check values - if isinstance(val, PRIMITIVE_TYPES): - return True - - return type_ in PRIMITIVE_TYPES or ( - inspect.isclass(type_) and issubclass(type_, Enum) - ) - - -# copied from OmegaConf -def _resolve_optional(type_: Any) -> Tuple[bool, Any]: - """Check whether `type_` is equivalent to `typing.Optional[T]` for some T.""" - if get_origin(type_) is Union: - args = get_args(type_) - if len(args) == 2 and args[1] == type(None): # noqa E721 - return True, args[0] - if type_ is Any: - return True, Any - - return False, type_ - - -def _is_actually_dataclass(some_class) -> bool: - # Return whether the class some_class has been processed with - # the dataclass annotation. This is more specific than - # dataclasses.is_dataclass which returns True on anything - # deriving from a dataclass. - - # Checking for __init__ would also work for our purpose. - return "__dataclass_fields__" in some_class.__dict__ - - -def expand_args_fields( - some_class: Type[_Y], *, _do_not_process: Tuple[type, ...] = () -) -> Type[_Y]: - """ - This expands a class which inherits Configurable or ReplaceableBase classes, - including dataclass processing. some_class is modified in place by this function. - If expand_args_fields(some_class) has already been called, subsequent calls do - nothing and return some_class unmodified. - For classes of type ReplaceableBase, you can add some_class to the registry before - or after calling this function. But potential inner classes need to be registered - before this function is run on the outer class. - - The transformations this function makes, before the concluding - dataclasses.dataclass, are as follows. If X is a base class with registered - subclasses Y and Z, replace a class member - - x: X - - and optionally - - x_class_type: str = "Y" - def create_x(self):... - - with - - x_Y_args: dict = dataclasses.field(default_factory=lambda: get_default_args(Y)) - x_Z_args: dict = dataclasses.field(default_factory=lambda: get_default_args(Z)) - def create_x(self): - args = self.getattr(f"x_{self.x_class_type}_args") - self.create_x_impl(self.x_class_type, args) - def create_x_impl(self, x_type, args): - x_type = registry.get(X, x_type) - expand_args_fields(x_type) - self.x = x_type(**args) - x_class_type: str = "UNDEFAULTED" - - without adding the optional attributes if they are already there. - - Similarly, replace - - x: Optional[X] - - and optionally - - x_class_type: Optional[str] = "Y" - def create_x(self):... - - with - - x_Y_args: dict = dataclasses.field(default_factory=lambda: get_default_args(Y)) - x_Z_args: dict = dataclasses.field(default_factory=lambda: get_default_args(Z)) - def create_x(self): - if self.x_class_type is None: - args = None - else: - args = self.getattr(f"x_{self.x_class_type}_args", None) - self.create_x_impl(self.x_class_type, args) - def create_x_impl(self, x_class_type, args): - if x_class_type is None: - self.x = None - return - - x_type = registry.get(X, x_class_type) - expand_args_fields(x_type) - assert args is not None - self.x = x_type(**args) - x_class_type: Optional[str] = "UNDEFAULTED" - - without adding the optional attributes if they are already there. - - Similarly, if X is a subclass of Configurable, - - x: X - - and optionally - - def create_x(self):... - - will be replaced with - - x_args: dict = dataclasses.field(default_factory=lambda: get_default_args(X)) - def create_x(self): - self.create_x_impl(True, self.x_args) - - def create_x_impl(self, enabled, args): - if enabled: - expand_args_fields(X) - self.x = X(**args) - else: - self.x = None - - Similarly, replace, - - x: Optional[X] - x_enabled: bool = ... - - and optionally - - def create_x(self):... - - with - - x_args: dict = dataclasses.field(default_factory=lambda: get_default_args(X)) - x_enabled: bool = ... - def create_x(self): - self.create_x_impl(self.x_enabled, self.x_args) - - def create_x_impl(self, enabled, args): - if enabled: - expand_args_fields(X) - self.x = X(**args) - else: - self.x = None - - - Also adds the following class members, unannotated so that dataclass - ignores them. - - _creation_functions: Tuple[str, ...] of all the create_ functions, - including those from base classes (not the create_x_impl ones). - - _known_implementations: Dict[str, Type] containing the classes which - have been found from the registry. - (used only to raise a warning if it one has been overwritten) - - _processed_members: a Dict[str, Any] of all the members which have been - transformed, with values giving the types they were declared to have. - (E.g. {"x": X} or {"x": Optional[X]} in the cases above.) - - In addition, if the class has a member function - - @classmethod - def x_tweak_args(cls, member_type: Type, args: DictConfig) -> None - - then the default_factory of x_args will also have a call to x_tweak_args(X, x_args) and - the default_factory of x_Y_args will also have a call to x_tweak_args(Y, x_Y_args). - - In addition, if the class inherits torch.nn.Module, the generated __init__ will - call torch.nn.Module's __init__ before doing anything else. - - Before any transformation of the class, if the class has a classmethod called - `pre_expand`, it will be called with no arguments. - - Note that although the *_args members are intended to have type DictConfig, they - are actually internally annotated as dicts. OmegaConf is happy to see a DictConfig - in place of a dict, but not vice-versa. Allowing dict lets a class user specify - x_args as an explicit dict without getting an incomprehensible error. - - Args: - some_class: the class to be processed - _do_not_process: Internal use for get_default_args: Because get_default_args calls - and is called by this function, we let it specify any class currently - being processed, to make sure we don't try to process a class while - it is already being processed. - - - Returns: - some_class itself, which has been modified in place. This - allows this function to be used as a class decorator. - """ - if _is_actually_dataclass(some_class): - return some_class - - if hasattr(some_class, PRE_EXPAND_NAME): - getattr(some_class, PRE_EXPAND_NAME)() - - # The functions this class's run_auto_creation will run. - creation_functions: List[str] = [] - # The classes which this type knows about from the registry - # We could use a weakref.WeakValueDictionary here which would mean - # that we don't warn if the class we should have expected is elsewhere - # unused. - known_implementations: Dict[str, Type] = {} - # Names of members which have been processed. - processed_members: Dict[str, Any] = {} - - # For all bases except ReplaceableBase and Configurable and object, - # we need to process them before our own processing. This is - # because dataclasses expect to inherit dataclasses and not unprocessed - # dataclasses. - for base in some_class.mro()[-3:0:-1]: - if base is ReplaceableBase: - continue - if base is Configurable: - continue - if not issubclass(base, (Configurable, ReplaceableBase)): - continue - expand_args_fields(base, _do_not_process=_do_not_process) - if "_creation_functions" in base.__dict__: - creation_functions.extend(base._creation_functions) - if "_known_implementations" in base.__dict__: - known_implementations.update(base._known_implementations) - if "_processed_members" in base.__dict__: - processed_members.update(base._processed_members) - - to_process: List[Tuple[str, Type, _ProcessType]] = [] - if "__annotations__" in some_class.__dict__: - for name, type_ in some_class.__annotations__.items(): - underlying_and_process_type = _get_type_to_process(type_) - if underlying_and_process_type is None: - continue - underlying_type, process_type = underlying_and_process_type - to_process.append((name, underlying_type, process_type)) - - for name, underlying_type, process_type in to_process: - processed_members[name] = some_class.__annotations__[name] - _process_member( - name=name, - type_=underlying_type, - process_type=process_type, - some_class=some_class, - creation_functions=creation_functions, - _do_not_process=_do_not_process, - known_implementations=known_implementations, - ) - - for key, count in Counter(creation_functions).items(): - if count > 1: - warnings.warn(f"Clash with {key} in a base class.") - some_class._creation_functions = tuple(creation_functions) - some_class._processed_members = processed_members - some_class._known_implementations = known_implementations - - dataclasses.dataclass(eq=False)(some_class) - _fixup_class_init(some_class) - return some_class - - -def _fixup_class_init(some_class) -> None: - """ - In-place modification of the some_class class which happens - after dataclass processing. - - If the dataclass some_class inherits torch.nn.Module, then - makes torch.nn.Module's __init__ be called before anything else - on instantiation of some_class. - This is a bit like attr's __pre_init__. - """ - - assert _is_actually_dataclass(some_class) - try: - import torch - except ModuleNotFoundError: - return - - if not issubclass(some_class, torch.nn.Module): - return - - def init(self, *args, **kwargs) -> None: - torch.nn.Module.__init__(self) - getattr(self, _DATACLASS_INIT)(*args, **kwargs) - - assert _DATACLASS_INIT not in some_class.__dict__ - - setattr(some_class, _DATACLASS_INIT, some_class.__init__) - some_class.__init__ = init - - -def get_default_args_field( - C, - *, - _do_not_process: Tuple[type, ...] = (), - _hook: Optional[Callable[[DictConfig], None]] = None, -): - """ - Get a dataclass field which defaults to get_default_args(...) - - Args: - C: As for get_default_args. - _do_not_process: As for get_default_args - _hook: Function called on the result before returning. - - Returns: - function to return new DictConfig object - """ - - def create(): - args = get_default_args(C, _do_not_process=_do_not_process) - if _hook is not None: - with open_dict(args): - _hook(args) - return args - - return dataclasses.field(default_factory=create) - - -def _get_default_args_field_from_registry( - *, - base_class_wanted: Type[_X], - name: str, - _do_not_process: Tuple[type, ...] = (), - _hook: Optional[Callable[[DictConfig], None]] = None, -): - """ - Get a dataclass field which defaults to - get_default_args(registry.get(base_class_wanted, name)). - - This is used internally in place of get_default_args_field in - order that default values are updated if a class is redefined. - - Args: - base_class_wanted: As for registry.get. - name: As for registry.get. - _do_not_process: As for get_default_args - _hook: Function called on the result before returning. - - Returns: - function to return new DictConfig object - """ - - def create(): - C = registry.get(base_class_wanted=base_class_wanted, name=name) - args = get_default_args(C, _do_not_process=_do_not_process) - if _hook is not None: - with open_dict(args): - _hook(args) - return args - - return dataclasses.field(default_factory=create) - - -def _get_type_to_process(type_) -> Optional[Tuple[Type, _ProcessType]]: - """ - If a member is annotated as `type_`, and that should expanded in - expand_args_fields, return how it should be expanded. - """ - if get_origin(type_) == Union: - # We look for Optional[X] which is a Union of X with None. - args = get_args(type_) - if len(args) != 2 or all(a is not type(None) for a in args): # noqa: E721 - return - underlying = args[0] if args[1] is type(None) else args[1] # noqa: E721 - if ( - isinstance(underlying, type) - and issubclass(underlying, ReplaceableBase) - and ReplaceableBase in underlying.__bases__ - ): - return underlying, _ProcessType.OPTIONAL_REPLACEABLE - - if isinstance(underlying, type) and issubclass(underlying, Configurable): - return underlying, _ProcessType.OPTIONAL_CONFIGURABLE - - if not isinstance(type_, type): - # e.g. any other Union or Tuple. Or ClassVar. - return - - if issubclass(type_, ReplaceableBase) and ReplaceableBase in type_.__bases__: - return type_, _ProcessType.REPLACEABLE - - if issubclass(type_, Configurable): - return type_, _ProcessType.CONFIGURABLE - - -def _process_member( - *, - name: str, - type_: Type, - process_type: _ProcessType, - some_class: Type, - creation_functions: List[str], - _do_not_process: Tuple[type, ...], - known_implementations: Dict[str, Type], -) -> None: - """ - Make the modification (of expand_args_fields) to some_class for a single member. - - Args: - name: member name - type_: member type (with Optional removed if needed) - process_type: whether member has dynamic type - some_class: (MODIFIED IN PLACE) the class being processed - creation_functions: (MODIFIED IN PLACE) the names of the create functions - _do_not_process: as for expand_args_fields. - known_implementations: (MODIFIED IN PLACE) known types from the registry - """ - # Because we are adding defaultable members, make - # sure they go at the end of __annotations__ in case - # there are non-defaulted standard class members. - del some_class.__annotations__[name] - hook = getattr(some_class, name + TWEAK_SUFFIX, None) - - if process_type in (_ProcessType.REPLACEABLE, _ProcessType.OPTIONAL_REPLACEABLE): - type_name = name + TYPE_SUFFIX - if type_name not in some_class.__annotations__: - if process_type == _ProcessType.OPTIONAL_REPLACEABLE: - some_class.__annotations__[type_name] = Optional[str] - else: - some_class.__annotations__[type_name] = str - setattr(some_class, type_name, "UNDEFAULTED") - - for derived_type in registry.get_all(type_): - if derived_type in _do_not_process: - continue - if issubclass(derived_type, some_class): - # When derived_type is some_class we have a simple - # recursion to avoid. When it's a strict subclass the - # situation is even worse. - continue - known_implementations[derived_type.__name__] = derived_type - args_name = f"{name}_{derived_type.__name__}{ARGS_SUFFIX}" - if args_name in some_class.__annotations__: - raise ValueError( - f"Cannot generate {args_name} because it is already present." - ) - some_class.__annotations__[args_name] = dict - if hook is not None: - hook_closed = partial(hook, derived_type) - else: - hook_closed = None - setattr( - some_class, - args_name, - _get_default_args_field_from_registry( - base_class_wanted=type_, - name=derived_type.__name__, - _do_not_process=_do_not_process + (some_class,), - _hook=hook_closed, - ), - ) - else: - args_name = name + ARGS_SUFFIX - if args_name in some_class.__annotations__: - raise ValueError( - f"Cannot generate {args_name} because it is already present." - ) - if issubclass(type_, some_class) or type_ in _do_not_process: - raise ValueError(f"Cannot process {type_} inside {some_class}") - - some_class.__annotations__[args_name] = dict - if hook is not None: - hook_closed = partial(hook, type_) - else: - hook_closed = None - setattr( - some_class, - args_name, - get_default_args_field( - type_, - _do_not_process=_do_not_process + (some_class,), - _hook=hook_closed, - ), - ) - if process_type == _ProcessType.OPTIONAL_CONFIGURABLE: - enabled_name = name + ENABLED_SUFFIX - if enabled_name not in some_class.__annotations__: - raise ValueError( - f"{name} is an Optional[{type_.__name__}] member " - f"but there is no corresponding member {enabled_name}." - ) - - creation_function_name = f"{CREATE_PREFIX}{name}" - if not hasattr(some_class, creation_function_name): - setattr( - some_class, - creation_function_name, - _default_create(name, type_, process_type), - ) - creation_functions.append(creation_function_name) - - creation_function_impl_name = f"{CREATE_PREFIX}{name}{IMPL_SUFFIX}" - if not hasattr(some_class, creation_function_impl_name): - setattr( - some_class, - creation_function_impl_name, - _default_create_impl(name, type_, process_type), - ) - - -def remove_unused_components(dict_: DictConfig) -> None: - """ - Assuming dict_ represents the state of a configurable, - modify it to remove all the portions corresponding to - pluggable parts which are not in use. - For example, if renderer_class_type is SignedDistanceFunctionRenderer, - the renderer_MultiPassEmissionAbsorptionRenderer_args will be - removed. Also, if chocolate_enabled is False, then chocolate_args will - be removed. - - Args: - dict_: (MODIFIED IN PLACE) a DictConfig instance - """ - keys = [key for key in dict_ if isinstance(key, str)] - suffix_length = len(TYPE_SUFFIX) - replaceables = [key[:-suffix_length] for key in keys if key.endswith(TYPE_SUFFIX)] - args_keys = [key for key in keys if key.endswith(ARGS_SUFFIX)] - for replaceable in replaceables: - selected_type = dict_[replaceable + TYPE_SUFFIX] - if selected_type is None: - expect = "" - else: - expect = replaceable + "_" + selected_type + ARGS_SUFFIX - with open_dict(dict_): - for key in args_keys: - if key.startswith(replaceable + "_") and key != expect: - del dict_[key] - - suffix_length = len(ENABLED_SUFFIX) - enableables = [key[:-suffix_length] for key in keys if key.endswith(ENABLED_SUFFIX)] - for enableable in enableables: - enabled = dict_[enableable + ENABLED_SUFFIX] - if not enabled: - with open_dict(dict_): - dict_.pop(enableable + ARGS_SUFFIX, None) - - for key in dict_: - if isinstance(dict_.get(key), DictConfig): - remove_unused_components(dict_[key]) diff --git a/pytorch3d/pytorch3d/implicitron/tools/depth_cleanup.py b/pytorch3d/pytorch3d/implicitron/tools/depth_cleanup.py deleted file mode 100644 index 9e5b509e5f7ed7e7d03df19bd9d6fc56559dac69..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/depth_cleanup.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch -import torch.nn.functional as Fu -from pytorch3d.ops import wmean -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.structures import Pointclouds - - -def cleanup_eval_depth( - point_cloud: Pointclouds, - camera: CamerasBase, - depth: torch.Tensor, - mask: torch.Tensor, - sigma: float = 0.01, - image=None, -): - - ba, _, H, W = depth.shape - - pcl = point_cloud.points_padded() - n_pts = point_cloud.num_points_per_cloud() - pcl_mask = ( - torch.arange(pcl.shape[1], dtype=torch.int64, device=pcl.device)[None] - < n_pts[:, None] - ).type_as(pcl) - - pcl_proj = camera.transform_points(pcl, eps=1e-2)[..., :-1] - pcl_depth = camera.get_world_to_view_transform().transform_points(pcl)[..., -1] - - depth_and_idx = torch.cat( - ( - depth, - torch.arange(H * W).view(1, 1, H, W).expand(ba, 1, H, W).type_as(depth), - ), - dim=1, - ) - - depth_and_idx_sampled = Fu.grid_sample( - depth_and_idx, -pcl_proj[:, None], mode="nearest" - )[:, :, 0].view(ba, 2, -1) - - depth_sampled, idx_sampled = depth_and_idx_sampled.split([1, 1], dim=1) - df = (depth_sampled[:, 0] - pcl_depth).abs() - - # the threshold is a sigma-multiple of the standard deviation of the depth - mu = wmean(depth.view(ba, -1, 1), mask.view(ba, -1)).view(ba, 1) - std = ( - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - wmean((depth.view(ba, -1) - mu).view(ba, -1, 1) ** 2, mask.view(ba, -1)) - .clamp(1e-4) - .sqrt() - .view(ba, -1) - ) - good_df_thr = std * sigma - good_depth = (df <= good_df_thr).float() * pcl_mask - - # perc_kept = good_depth.sum(dim=1) / pcl_mask.sum(dim=1).clamp(1) - # print(f'Kept {100.0 * perc_kept.mean():1.3f} % points') - - good_depth_raster = torch.zeros_like(depth).view(ba, -1) - good_depth_raster.scatter_add_(1, torch.round(idx_sampled[:, 0]).long(), good_depth) - - good_depth_mask = (good_depth_raster.view(ba, 1, H, W) > 0).float() - - # if float(torch.rand(1)) > 0.95: - # depth_ok = depth * good_depth_mask - - # # visualize - # visdom_env = 'depth_cleanup_dbg' - # from visdom import Visdom - # # from tools.vis_utils import make_depth_image - # from pytorch3d.vis.plotly_vis import plot_scene - # viz = Visdom() - - # show_pcls = { - # 'pointclouds': point_cloud, - # } - # for d, nm in zip( - # (depth, depth_ok), - # ('pointclouds_unproj', 'pointclouds_unproj_ok'), - # ): - # pointclouds_unproj = get_rgbd_point_cloud( - # camera, image, d, - # ) - # if int(pointclouds_unproj.num_points_per_cloud()) > 0: - # show_pcls[nm] = pointclouds_unproj - - # scene_dict = {'1': { - # **show_pcls, - # 'cameras': camera, - # }} - # scene = plot_scene( - # scene_dict, - # pointcloud_max_points=5000, - # pointcloud_marker_size=1.5, - # camera_scale=1.0, - # ) - # viz.plotlyplot(scene, env=visdom_env, win='scene') - - # # depth_image_ok = make_depth_image(depths_ok, masks) - # # viz.images(depth_image_ok, env=visdom_env, win='depth_ok') - # # depth_image = make_depth_image(depths, masks) - # # viz.images(depth_image, env=visdom_env, win='depth') - # # # viz.images(rgb_rendered, env=visdom_env, win='images_render') - # # viz.images(images, env=visdom_env, win='images') - # import pdb; pdb.set_trace() - - return good_depth_mask diff --git a/pytorch3d/pytorch3d/implicitron/tools/eval_video_trajectory.py b/pytorch3d/pytorch3d/implicitron/tools/eval_video_trajectory.py deleted file mode 100644 index bda9ec295729d58210fbec23cb3349a444516de2..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/eval_video_trajectory.py +++ /dev/null @@ -1,266 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import logging -import math -from typing import Optional, Tuple - -import torch -from pytorch3d.implicitron.tools import utils -from pytorch3d.implicitron.tools.circle_fitting import fit_circle_in_3d -from pytorch3d.renderer import look_at_view_transform, PerspectiveCameras -from pytorch3d.transforms import Scale - - -logger = logging.getLogger(__name__) - - -def generate_eval_video_cameras( - train_cameras, - n_eval_cams: int = 100, - trajectory_type: str = "figure_eight", - trajectory_scale: float = 0.2, - scene_center: Tuple[float, float, float] = (0.0, 0.0, 0.0), - up: Tuple[float, float, float] = (0.0, 0.0, 1.0), - focal_length: Optional[torch.Tensor] = None, - principal_point: Optional[torch.Tensor] = None, - time: Optional[torch.Tensor] = None, - infer_up_as_plane_normal: bool = True, - traj_offset: Optional[Tuple[float, float, float]] = None, - traj_offset_canonical: Optional[Tuple[float, float, float]] = None, - remove_outliers_rate: float = 0.0, -) -> PerspectiveCameras: - """ - Generate a camera trajectory rendering a scene from multiple viewpoints. - - Args: - train_cameras: The set of cameras from the training dataset object. - n_eval_cams: Number of cameras in the trajectory. - trajectory_type: The type of the camera trajectory. Can be one of: - circular_lsq_fit: Camera centers follow a trajectory obtained - by fitting a 3D circle to train_cameras centers. - All cameras are looking towards scene_center. - figure_eight: Figure-of-8 trajectory around the center of the - central camera of the training dataset. - trefoil_knot: Same as 'figure_eight', but the trajectory has a shape - of a trefoil knot (https://en.wikipedia.org/wiki/Trefoil_knot). - figure_eight_knot: Same as 'figure_eight', but the trajectory has a shape - of a figure-eight knot - (https://en.wikipedia.org/wiki/Figure-eight_knot_(mathematics)). - trajectory_scale: The extent of the trajectory. - scene_center: The center of the scene in world coordinates which all - the cameras from the generated trajectory look at. - up: The "circular_lsq_fit" vector of the scene (=the normal of the scene floor). - Active for the `trajectory_type="circular"`. - focal_length: The focal length of the output cameras. If `None`, an average - focal length of the train_cameras is used. - principal_point: The principal point of the output cameras. If `None`, an average - principal point of all train_cameras is used. - time: Defines the total length of the generated camera trajectory. All possible - trajectories (set with the `trajectory_type` argument) are periodic with - the period of `time=2pi`. - E.g. setting `trajectory_type=circular_lsq_fit` and `time=4pi`, will generate - a trajectory of camera poses rotating the total of 720 deg around the object. - infer_up_as_plane_normal: Infer the camera `up` vector automatically as the normal - of the plane fit to the optical centers of `train_cameras`. - traj_offset: 3D offset vector added to each point of the trajectory. - traj_offset_canonical: 3D offset vector expressed in the local coordinates of - the estimated trajectory which is added to each point of the trajectory. - remove_outliers_rate: the number between 0 and 1; if > 0, - some outlier train_cameras will be removed from trajectory estimation; - the filtering is based on camera center coordinates; top and - bottom `remove_outliers_rate` cameras on each dimension are removed. - Returns: - Batch of camera instances which can be used as the test dataset - """ - if remove_outliers_rate > 0.0: - train_cameras = _remove_outlier_cameras(train_cameras, remove_outliers_rate) - - if trajectory_type in ("figure_eight", "trefoil_knot", "figure_eight_knot"): - cam_centers = train_cameras.get_camera_center() - # get the nearest camera center to the mean of centers - mean_camera_idx = ( - ((cam_centers - cam_centers.mean(dim=0)[None]) ** 2) - .sum(dim=1) - .min(dim=0) - .indices - ) - # generate the knot trajectory in canonical coords - if time is None: - time = torch.linspace(0, 2 * math.pi, n_eval_cams + 1)[:n_eval_cams] - else: - assert time.numel() == n_eval_cams - if trajectory_type == "trefoil_knot": - traj = _trefoil_knot(time) - elif trajectory_type == "figure_eight_knot": - traj = _figure_eight_knot(time) - elif trajectory_type == "figure_eight": - traj = _figure_eight(time) - else: - raise ValueError(f"bad trajectory type: {trajectory_type}") - traj[:, 2] -= traj[:, 2].max() - - # transform the canonical knot to the coord frame of the mean camera - mean_camera = PerspectiveCameras( - **{ - k: getattr(train_cameras, k)[[int(mean_camera_idx)]] - for k in ("focal_length", "principal_point", "R", "T") - } - ) - traj_trans = Scale(cam_centers.std(dim=0).mean() * trajectory_scale).compose( - mean_camera.get_world_to_view_transform().inverse() - ) - - if traj_offset_canonical is not None: - traj_trans = traj_trans.translate( - torch.FloatTensor(traj_offset_canonical)[None].to(traj) - ) - - traj = traj_trans.transform_points(traj) - - plane_normal = _fit_plane(cam_centers)[:, 0] - if infer_up_as_plane_normal: - up = _disambiguate_normal(plane_normal, up) - - elif trajectory_type == "circular_lsq_fit": - ### fit plane to the camera centers - - # get the center of the plane as the median of the camera centers - cam_centers = train_cameras.get_camera_center() - - if time is not None: - angle = time - else: - angle = torch.linspace(0, 2.0 * math.pi, n_eval_cams).to(cam_centers) - - fit = fit_circle_in_3d( - cam_centers, - angles=angle, - offset=angle.new_tensor(traj_offset_canonical) - if traj_offset_canonical is not None - else None, - up=angle.new_tensor(up), - ) - traj = fit.generated_points - - # scalethe trajectory - _t_mu = traj.mean(dim=0, keepdim=True) - traj = (traj - _t_mu) * trajectory_scale + _t_mu - - plane_normal = fit.normal - - if infer_up_as_plane_normal: - up = _disambiguate_normal(plane_normal, up) - - else: - raise ValueError(f"Uknown trajectory_type {trajectory_type}.") - - if traj_offset is not None: - traj = traj + torch.FloatTensor(traj_offset)[None].to(traj) - - # point all cameras towards the center of the scene - R, T = look_at_view_transform( - eye=traj, - at=(scene_center,), # (1, 3) - up=(up,), # (1, 3) - device=traj.device, - ) - - # get the average focal length and principal point - if focal_length is None: - focal_length = train_cameras.focal_length.mean(dim=0).repeat(n_eval_cams, 1) - if principal_point is None: - principal_point = train_cameras.principal_point.mean(dim=0).repeat( - n_eval_cams, 1 - ) - - test_cameras = PerspectiveCameras( - focal_length=focal_length, - principal_point=principal_point, - R=R, - T=T, - device=focal_length.device, - ) - - # _visdom_plot_scene( - # train_cameras, - # test_cameras, - # ) - - return test_cameras - - -def _remove_outlier_cameras( - cameras: PerspectiveCameras, outlier_rate: float -) -> PerspectiveCameras: - keep_indices = utils.get_inlier_indicators( - cameras.get_camera_center(), dim=0, outlier_rate=outlier_rate - ) - # pyre-fixme[6]: For 1st param expected `Union[List[int], int, BoolTensor, - # LongTensor]` but got `Tensor`. - clean_cameras = cameras[keep_indices] - logger.info( - "Filtered outlier cameras when estimating the trajectory: " - f"{len(cameras)} β†’ {len(clean_cameras)}" - ) - # pyre-fixme[7]: Expected `PerspectiveCameras` but got `CamerasBase`. - return clean_cameras - - -def _disambiguate_normal(normal, up): - up_t = torch.tensor(up).to(normal) - flip = (up_t * normal).sum().sign() - up = normal * flip - up = up.tolist() - return up - - -def _fit_plane(x): - x = x - x.mean(dim=0)[None] - cov = (x.t() @ x) / x.shape[0] - _, e_vec = torch.linalg.eigh(cov) - return e_vec - - -def _visdom_plot_scene( - train_cameras, - test_cameras, -) -> None: - from pytorch3d.vis.plotly_vis import plot_scene - - p = plot_scene( - { - "scene": { - "train_cams": train_cameras, - "test_cams": test_cameras, - } - } - ) - from visdom import Visdom - - viz = Visdom() - viz.plotlyplot(p, env="cam_traj_dbg", win="cam_trajs") - - -def _figure_eight_knot(t: torch.Tensor, z_scale: float = 0.5): - x = (2 + (2 * t).cos()) * (3 * t).cos() - y = (2 + (2 * t).cos()) * (3 * t).sin() - z = (4 * t).sin() * z_scale - return torch.stack((x, y, z), dim=-1) - - -def _trefoil_knot(t: torch.Tensor, z_scale: float = 0.5): - x = t.sin() + 2 * (2 * t).sin() - y = t.cos() - 2 * (2 * t).cos() - z = -(3 * t).sin() * z_scale - return torch.stack((x, y, z), dim=-1) - - -def _figure_eight(t: torch.Tensor, z_scale: float = 0.5): - x = t.cos() - y = (2 * t).sin() / 2 - z = t.sin() * z_scale - return torch.stack((x, y, z), dim=-1) diff --git a/pytorch3d/pytorch3d/implicitron/tools/image_utils.py b/pytorch3d/pytorch3d/implicitron/tools/image_utils.py deleted file mode 100644 index 29c7e0a4122eb81b0e61343c74b29a0091ffcd8c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/image_utils.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -from typing import Sequence, Union - -import torch - - -def mask_background( - image_rgb: torch.Tensor, - mask_fg: torch.Tensor, - dim_color: int = 1, - bg_color: Union[torch.Tensor, Sequence, str, float] = 0.0, -) -> torch.Tensor: - """ - Mask the background input image tensor `image_rgb` with `bg_color`. - The background regions are obtained from the binary foreground segmentation - mask `mask_fg`. - """ - tgt_view = [1, 1, 1, 1] - tgt_view[dim_color] = 3 - # obtain the background color tensor - if isinstance(bg_color, torch.Tensor): - bg_color_t = bg_color.view(1, 3, 1, 1).clone().to(image_rgb) - elif isinstance(bg_color, (float, tuple, list)): - if isinstance(bg_color, float): - bg_color = [bg_color] * 3 - bg_color_t = torch.tensor( - bg_color, device=image_rgb.device, dtype=image_rgb.dtype - ).view(*tgt_view) - elif isinstance(bg_color, str): - if bg_color == "white": - bg_color_t = image_rgb.new_ones(tgt_view) - elif bg_color == "black": - bg_color_t = image_rgb.new_zeros(tgt_view) - else: - raise ValueError(_invalid_color_error_msg(bg_color)) - else: - raise ValueError(_invalid_color_error_msg(bg_color)) - # cast to the image_rgb's type - mask_fg = mask_fg.type_as(image_rgb) - # mask the bg - image_masked = mask_fg * image_rgb + (1 - mask_fg) * bg_color_t - return image_masked - - -def _invalid_color_error_msg(bg_color) -> str: - return ( - f"Invalid bg_color={bg_color}. Plese set bg_color to a 3-element" - + " tensor. or a string (white | black), or a float." - ) diff --git a/pytorch3d/pytorch3d/implicitron/tools/metric_utils.py b/pytorch3d/pytorch3d/implicitron/tools/metric_utils.py deleted file mode 100644 index 2ed2a8e3f043b628949456ac27434efba9b76641..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/metric_utils.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -from typing import Optional, Tuple - -import torch -from torch.nn import functional as F - - -def eval_depth( - pred: torch.Tensor, - gt: torch.Tensor, - crop: int = 1, - mask: Optional[torch.Tensor] = None, - get_best_scale: bool = True, - mask_thr: float = 0.5, - best_scale_clamp_thr: float = 1e-4, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Evaluate the depth error between the prediction `pred` and the ground - truth `gt`. - - Args: - pred: A tensor of shape (N, 1, H, W) denoting the predicted depth maps. - gt: A tensor of shape (N, 1, H, W) denoting the ground truth depth maps. - crop: The number of pixels to crop from the border. - mask: A mask denoting the valid regions of the gt depth. - get_best_scale: If `True`, estimates a scaling factor of the predicted depth - that yields the best mean squared error between `pred` and `gt`. - This is typically enabled for cases where predicted reconstructions - are inherently defined up to an arbitrary scaling factor. - mask_thr: A constant used to threshold the `mask` to specify the valid - regions. - best_scale_clamp_thr: The threshold for clamping the divisor in best - scale estimation. - - Returns: - mse_depth: Mean squared error between `pred` and `gt`. - abs_depth: Mean absolute difference between `pred` and `gt`. - """ - - # chuck out the border - if crop > 0: - gt = gt[:, :, crop:-crop, crop:-crop] - pred = pred[:, :, crop:-crop, crop:-crop] - - if mask is not None: - # mult gt by mask - if crop > 0: - mask = mask[:, :, crop:-crop, crop:-crop] - gt = gt * (mask > mask_thr).float() - - dmask = (gt > 0.0).float() - dmask_mass = torch.clamp(dmask.sum((1, 2, 3)), 1e-4) - - if get_best_scale: - # mult preds by a scalar "scale_best" - # s.t. we get best possible mse error - scale_best = estimate_depth_scale_factor(pred, gt, dmask, best_scale_clamp_thr) - pred = pred * scale_best[:, None, None, None] - - df = gt - pred - - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - mse_depth = (dmask * (df**2)).sum((1, 2, 3)) / dmask_mass - abs_depth = (dmask * df.abs()).sum((1, 2, 3)) / dmask_mass - - return mse_depth, abs_depth - - -def estimate_depth_scale_factor(pred, gt, mask, clamp_thr): - xy = pred * gt * mask - xx = pred * pred * mask - scale_best = xy.mean((1, 2, 3)) / torch.clamp(xx.mean((1, 2, 3)), clamp_thr) - return scale_best - - -def calc_psnr( - x: torch.Tensor, - y: torch.Tensor, - mask: Optional[torch.Tensor] = None, -) -> torch.Tensor: - """ - Calculates the Peak-signal-to-noise ratio between tensors `x` and `y`. - """ - mse = calc_mse(x, y, mask=mask) - psnr = torch.log10(mse.clamp(1e-10)) * (-10.0) - return psnr - - -def calc_mse( - x: torch.Tensor, - y: torch.Tensor, - mask: Optional[torch.Tensor] = None, -) -> torch.Tensor: - """ - Calculates the mean square error between tensors `x` and `y`. - """ - if mask is None: - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - return torch.mean((x - y) ** 2) - else: - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - return (((x - y) ** 2) * mask).sum() / mask.expand_as(x).sum().clamp(1e-5) - - -def calc_bce( - pred: torch.Tensor, - gt: torch.Tensor, - equal_w: bool = True, - pred_eps: float = 0.01, - mask: Optional[torch.Tensor] = None, - lerp_bound: Optional[float] = None, -) -> torch.Tensor: - """ - Calculates the binary cross entropy. - """ - if pred_eps > 0.0: - # up/low bound the predictions - pred = torch.clamp(pred, pred_eps, 1.0 - pred_eps) - - if mask is None: - mask = torch.ones_like(gt) - - if equal_w: - mask_fg = (gt > 0.5).float() * mask - mask_bg = (1 - mask_fg) * mask - weight = mask_fg / mask_fg.sum().clamp(1.0) + mask_bg / mask_bg.sum().clamp(1.0) - # weight sum should be at this point ~2 - # pyre-fixme[58]: `/` is not supported for operand types `int` and `Tensor`. - weight = weight * (weight.numel() / weight.sum().clamp(1.0)) - else: - weight = torch.ones_like(gt) * mask - - if lerp_bound is not None: - return binary_cross_entropy_lerp(pred, gt, weight, lerp_bound) - else: - return F.binary_cross_entropy(pred, gt, reduction="mean", weight=weight) - - -def binary_cross_entropy_lerp( - pred: torch.Tensor, - gt: torch.Tensor, - weight: torch.Tensor, - lerp_bound: float, -): - """ - Binary cross entropy which avoids exploding gradients by linearly - extrapolating the log function for log(1-pred) mad log(pred) whenever - pred or 1-pred is smaller than lerp_bound. - """ - loss = log_lerp(1 - pred, lerp_bound) * (1 - gt) + log_lerp(pred, lerp_bound) * gt - loss_reduced = -(loss * weight).sum() / weight.sum().clamp(1e-4) - return loss_reduced - - -def log_lerp(x: torch.Tensor, b: float): - """ - Linearly extrapolated log for x < b. - """ - assert b > 0 - return torch.where(x >= b, x.log(), math.log(b) + (x - b) / b) - - -def rgb_l1( - pred: torch.Tensor, target: torch.Tensor, mask: Optional[torch.Tensor] = None -) -> torch.Tensor: - """ - Calculates the mean absolute error between the predicted colors `pred` - and ground truth colors `target`. - """ - if mask is None: - mask = torch.ones_like(pred[:, :1]) - return ((pred - target).abs() * mask).sum(dim=(1, 2, 3)) / mask.sum( - dim=(1, 2, 3) - ).clamp(1) - - -def huber(dfsq: torch.Tensor, scaling: float = 0.03) -> torch.Tensor: - """ - Calculates the huber function of the input squared error `dfsq`. - The function smoothly transitions from a region with unit gradient - to a hyperbolic function at `dfsq=scaling`. - """ - loss = (safe_sqrt(1 + dfsq / (scaling * scaling), eps=1e-4) - 1) * scaling - return loss - - -def neg_iou_loss( - predict: torch.Tensor, - target: torch.Tensor, - mask: Optional[torch.Tensor] = None, -) -> torch.Tensor: - """ - This is a great loss because it emphasizes on the active - regions of the predict and targets - """ - return 1.0 - iou(predict, target, mask=mask) - - -def safe_sqrt(A: torch.Tensor, eps: float = 1e-4) -> torch.Tensor: - """ - performs safe differentiable sqrt - """ - return (torch.clamp(A, float(0)) + eps).sqrt() - - -def iou( - predict: torch.Tensor, - target: torch.Tensor, - mask: Optional[torch.Tensor] = None, -) -> torch.Tensor: - """ - This is a great loss because it emphasizes on the active - regions of the predict and targets - """ - dims = tuple(range(predict.dim())[1:]) - if mask is not None: - predict = predict * mask - target = target * mask - intersect = (predict * target).sum(dims) - union = (predict + target - predict * target).sum(dims) + 1e-4 - return (intersect / union).sum() / intersect.numel() - - -def beta_prior(pred: torch.Tensor, cap: float = 0.1) -> torch.Tensor: - if cap <= 0.0: - raise ValueError("capping should be positive to avoid unbound loss") - - min_value = math.log(cap) + math.log(cap + 1.0) - return (torch.log(pred + cap) + torch.log(1.0 - pred + cap)).mean() - min_value diff --git a/pytorch3d/pytorch3d/implicitron/tools/model_io.py b/pytorch3d/pytorch3d/implicitron/tools/model_io.py deleted file mode 100644 index f94a4ed2511e72ddf2b9215826001d135f834dcc..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/model_io.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import glob -import logging -import os -import shutil -import tempfile -from typing import Optional - -import torch - - -logger = logging.getLogger(__name__) - - -def load_stats(flstats): - from pytorch3d.implicitron.tools.stats import Stats - - if not os.path.isfile(flstats): - return None - - return Stats.load(flstats) - - -def get_model_path(fl) -> str: - fl = os.path.splitext(fl)[0] - flmodel = "%s.pth" % fl - return flmodel - - -def get_optimizer_path(fl) -> str: - fl = os.path.splitext(fl)[0] - flopt = "%s_opt.pth" % fl - return flopt - - -def get_stats_path(fl, eval_results: bool = False) -> str: - fl = os.path.splitext(fl)[0] - if eval_results: - for postfix in ("_2", ""): - flstats = os.path.join(os.path.dirname(fl), f"stats_test{postfix}.jgz") - if os.path.isfile(flstats): - break - else: - flstats = "%s_stats.jgz" % fl - # pyre-fixme[61]: `flstats` is undefined, or not always defined. - return flstats - - -def safe_save_model(model, stats, fl, optimizer=None, cfg=None) -> None: - """ - This functions stores model files safely so that no model files exist on the - file system in case the saving procedure gets interrupted. - - This is done first by saving the model files to a temporary directory followed - by (atomic) moves to the target location. Note, that this can still result - in a corrupt set of model files in case interruption happens while performing - the moves. It is however quite improbable that a crash would occur right at - this time. - """ - logger.info(f"saving model files safely to {fl}") - # first store everything to a tmpdir - with tempfile.TemporaryDirectory() as tmpdir: - tmpfl = os.path.join(tmpdir, os.path.split(fl)[-1]) - stored_tmp_fls = save_model(model, stats, tmpfl, optimizer=optimizer, cfg=cfg) - tgt_fls = [ - ( - os.path.join(os.path.split(fl)[0], os.path.split(tmpfl)[-1]) - if (tmpfl is not None) - else None - ) - for tmpfl in stored_tmp_fls - ] - # then move from the tmpdir to the right location - for tmpfl, tgt_fl in zip(stored_tmp_fls, tgt_fls): - if tgt_fl is None: - continue - shutil.move(tmpfl, tgt_fl) - - -def save_model(model, stats, fl, optimizer=None, cfg=None): - flstats = get_stats_path(fl) - flmodel = get_model_path(fl) - logger.info("saving model to %s" % flmodel) - torch.save(model.state_dict(), flmodel) - flopt = None - if optimizer is not None: - flopt = get_optimizer_path(fl) - logger.info("saving optimizer to %s" % flopt) - torch.save(optimizer.state_dict(), flopt) - logger.info("saving model stats to %s" % flstats) - stats.save(flstats) - - return flstats, flmodel, flopt - - -def save_stats(stats, fl, cfg=None): - flstats = get_stats_path(fl) - logger.info("saving model stats to %s" % flstats) - stats.save(flstats) - return flstats - - -def load_model(fl, map_location: Optional[dict]): - flstats = get_stats_path(fl) - flmodel = get_model_path(fl) - flopt = get_optimizer_path(fl) - model_state_dict = torch.load(flmodel, map_location=map_location) - stats = load_stats(flstats) - if os.path.isfile(flopt): - optimizer = torch.load(flopt, map_location=map_location) - else: - optimizer = None - - return model_state_dict, stats, optimizer - - -def parse_epoch_from_model_path(model_path) -> int: - return int( - os.path.split(model_path)[-1].replace(".pth", "").replace("model_epoch_", "") - ) - - -def get_checkpoint(exp_dir, epoch): - fl = os.path.join(exp_dir, "model_epoch_%08d.pth" % epoch) - return fl - - -def find_last_checkpoint( - exp_dir, any_path: bool = False, all_checkpoints: bool = False -): - if any_path: - exts = [".pth", "_stats.jgz", "_opt.pth"] - else: - exts = [".pth"] - - for ext in exts: - fls = sorted( - glob.glob( - os.path.join(glob.escape(exp_dir), "model_epoch_" + "[0-9]" * 8 + ext) - ) - ) - if len(fls) > 0: - break - # pyre-fixme[61]: `fls` is undefined, or not always defined. - if len(fls) == 0: - fl = None - else: - if all_checkpoints: - # pyre-fixme[61]: `fls` is undefined, or not always defined. - fl = [f[0 : -len(ext)] + ".pth" for f in fls] - else: - # pyre-fixme[61]: `ext` is undefined, or not always defined. - fl = fls[-1][0 : -len(ext)] + ".pth" - - return fl - - -def purge_epoch(exp_dir, epoch) -> None: - model_path = get_checkpoint(exp_dir, epoch) - - for file_path in [ - model_path, - get_optimizer_path(model_path), - get_stats_path(model_path), - ]: - if os.path.isfile(file_path): - logger.info("deleting %s" % file_path) - os.remove(file_path) diff --git a/pytorch3d/pytorch3d/implicitron/tools/point_cloud_utils.py b/pytorch3d/pytorch3d/implicitron/tools/point_cloud_utils.py deleted file mode 100644 index 5954aace687749fb4de2eb271bfb6017952ef7f2..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/point_cloud_utils.py +++ /dev/null @@ -1,195 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -from typing import cast, Optional, Tuple - -import torch -import torch.nn.functional as Fu -from pytorch3d.renderer import ( - AlphaCompositor, - NDCMultinomialRaysampler, - PointsRasterizationSettings, - PointsRasterizer, - ray_bundle_to_ray_points, -) -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.structures import Pointclouds - - -def get_rgbd_point_cloud( - camera: CamerasBase, - image_rgb: torch.Tensor, - depth_map: torch.Tensor, - mask: Optional[torch.Tensor] = None, - mask_thr: float = 0.5, - *, - euclidean: bool = False, -) -> Pointclouds: - """ - Given a batch of images, depths, masks and cameras, generate a single colored - point cloud by unprojecting depth maps and coloring with the source - pixel colors. - - Arguments: - camera: Batch of N cameras - image_rgb: Batch of N images of shape (N, C, H, W). - For RGB images C=3. - depth_map: Batch of N depth maps of shape (N, 1, H', W'). - Only positive values here are used to generate points. - If euclidean=False (default) this contains perpendicular distances - from each point to the camera plane (z-values). - If euclidean=True, this contains distances from each point to - the camera center. - mask: If provided, batch of N masks of the same shape as depth_map. - If provided, values in depth_map are ignored if the corresponding - element of mask is smaller than mask_thr. - mask_thr: used in interpreting mask - euclidean: used in interpreting depth_map. - - Returns: - Pointclouds object containing one point cloud. - """ - imh, imw = depth_map.shape[2:] - - # convert the depth maps to point clouds using the grid ray sampler - pts_3d = ray_bundle_to_ray_points( - NDCMultinomialRaysampler( - image_width=imw, - image_height=imh, - n_pts_per_ray=1, - min_depth=1.0, - max_depth=1.0, - unit_directions=euclidean, - )(camera)._replace(lengths=depth_map[:, 0, ..., None]) - ) - - pts_mask = depth_map > 0.0 - if mask is not None: - pts_mask *= mask > mask_thr - pts_mask = pts_mask.reshape(-1) - - pts_3d = pts_3d.reshape(-1, 3)[pts_mask] - - pts_colors = torch.nn.functional.interpolate( - image_rgb, - size=[imh, imw], - mode="bilinear", - align_corners=False, - ) - pts_colors = pts_colors.permute(0, 2, 3, 1).reshape(-1, image_rgb.shape[1])[ - pts_mask - ] - - return Pointclouds(points=pts_3d[None], features=pts_colors[None]) - - -def render_point_cloud_pytorch3d( - camera, - point_cloud, - render_size: Tuple[int, int], - point_radius: float = 0.03, - topk: int = 10, - eps: float = 1e-2, - bg_color=None, - bin_size: Optional[int] = None, - **kwargs, -): - - # feature dimension - featdim = point_cloud.features_packed().shape[-1] - - # move to the camera coordinates; using identity cameras in the renderer - point_cloud = _transform_points(camera, point_cloud, eps, **kwargs) - camera_trivial = camera.clone() - camera_trivial.R[:] = torch.eye(3) - camera_trivial.T *= 0.0 - - bin_size = ( - bin_size - if bin_size is not None - else (64 if int(max(render_size)) > 1024 else None) - ) - rasterizer = PointsRasterizer( - cameras=camera_trivial, - raster_settings=PointsRasterizationSettings( - image_size=render_size, - radius=point_radius, - points_per_pixel=topk, - bin_size=bin_size, - ), - ) - - fragments = rasterizer(point_cloud, **kwargs) - - # Construct weights based on the distance of a point to the true point. - # However, this could be done differently: e.g. predicted as opposed - # to a function of the weights. - r = rasterizer.raster_settings.radius - - # set up the blending weights - dists2 = fragments.dists - weights = 1 - dists2 / (r * r) - ok = cast(torch.BoolTensor, (fragments.idx >= 0)).float() - - weights = weights * ok - - fragments_prm = fragments.idx.long().permute(0, 3, 1, 2) - weights_prm = weights.permute(0, 3, 1, 2) - images = AlphaCompositor()( - fragments_prm, - weights_prm, - point_cloud.features_packed().permute(1, 0), - background_color=bg_color if bg_color is not None else [0.0] * featdim, - **kwargs, - ) - - # get the depths ... - # weighted_fs[b,c,i,j] = sum_k cum_alpha_k * features[c,pointsidx[b,k,i,j]] - # cum_alpha_k = alphas[b,k,i,j] * prod_l=0..k-1 (1 - alphas[b,l,i,j]) - cumprod = torch.cumprod(1 - weights, dim=-1) - cumprod = torch.cat((torch.ones_like(cumprod[..., :1]), cumprod[..., :-1]), dim=-1) - depths = (weights * cumprod * fragments.zbuf).sum(dim=-1) - # add the rendering mask - # pyre-fixme[6]: For 1st param expected `Tensor` but got `float`. - render_mask = -torch.prod(1.0 - weights, dim=-1) + 1.0 - - # cat depths and render mask - rendered_blob = torch.cat((images, depths[:, None], render_mask[:, None]), dim=1) - - # reshape back - rendered_blob = Fu.interpolate( - rendered_blob, - size=tuple(render_size), - mode="bilinear", - align_corners=False, - ) - - data_rendered, depth_rendered, render_mask = rendered_blob.split( - [rendered_blob.shape[1] - 2, 1, 1], - dim=1, - ) - - return data_rendered, render_mask, depth_rendered - - -def _signed_clamp(x, eps): - sign = x.sign() + (x == 0.0).type_as(x) - x_clamp = sign * torch.clamp(x.abs(), eps) - return x_clamp - - -def _transform_points(cameras, point_clouds, eps, **kwargs): - pts_world = point_clouds.points_padded() - pts_view = cameras.get_world_to_view_transform(**kwargs).transform_points( - pts_world, eps=eps - ) - # it is crucial to actually clamp the points as well ... - pts_view = torch.cat( - (pts_view[..., :-1], _signed_clamp(pts_view[..., -1:], eps)), dim=-1 - ) - point_clouds = point_clouds.update_padded(pts_view) - return point_clouds diff --git a/pytorch3d/pytorch3d/implicitron/tools/rasterize_mc.py b/pytorch3d/pytorch3d/implicitron/tools/rasterize_mc.py deleted file mode 100644 index 3fbf4b8d348c2451e987da194616719d76b1d115..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/rasterize_mc.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -from typing import Optional, Tuple - -import pytorch3d - -import torch -from pytorch3d.ops import packed_to_padded -from pytorch3d.renderer import PerspectiveCameras -from pytorch3d.structures import Pointclouds - -from .point_cloud_utils import render_point_cloud_pytorch3d - - -@torch.no_grad() -def rasterize_sparse_ray_bundle( - ray_bundle: "pytorch3d.implicitron.models.renderer.base.ImplicitronRayBundle", - features: torch.Tensor, - image_size_hw: Tuple[int, int], - depth: torch.Tensor, - masks: Optional[torch.Tensor] = None, -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Rasterizes sparse features corresponding to the coordinates defined by - the rays in the bundle. - - Args: - ray_bundle: ray bundle object with B x ... x 2 pixel coordinates, - it can be packed. - features: B x ... x C tensor containing per-point rendered features. - image_size_hw: Tuple[image_height, image_width] containing - the size of rasterized image. - depth: B x ... x 1 tensor containing per-point rendered depth. - masks: B x ... x 1 tensor containing the alpha mask of the - rendered features. - - Returns: - - image_render: B x C x H x W tensor of rasterized features - - depths_render: B x 1 x H x W tensor of rasterized depth maps - - masks_render: B x 1 x H x W tensor of opacities after splatting - """ - # Flatten the features and xy locations. - features_depth_ras = torch.cat( - (features.flatten(1, -2), depth.flatten(1, -2)), dim=-1 - ) - xys = ray_bundle.xys - masks_ras = None - if ray_bundle.is_packed(): - camera_counts = ray_bundle.camera_counts - assert camera_counts is not None - xys, first_idxs, _ = ray_bundle.get_padded_xys() - masks_ras = ( - torch.arange(xys.shape[1], device=xys.device)[:, None] - < camera_counts[:, None, None] - ) - - max_size = torch.max(camera_counts).item() - features_depth_ras = packed_to_padded( - features_depth_ras[:, 0], first_idxs, max_size - ) - if masks is not None: - padded_mask = packed_to_padded(masks.flatten(1, -1), first_idxs, max_size) - masks_ras = padded_mask * masks_ras - - xys_ras = xys.flatten(1, -2) - - if masks_ras is None: - assert not ray_bundle.is_packed() - masks_ras = masks.flatten(1, -2) if masks is not None else None - - if min(*image_size_hw) <= 0: - raise ValueError( - "Need to specify a positive output_size_hw for bundle rasterisation." - ) - - # Estimate the rasterization point radius so that we approximately fill - # the whole image given the number of rasterized points. - pt_radius = 2.0 / math.sqrt(xys.shape[1]) - - # Rasterize the samples. - features_depth_render, masks_render = rasterize_mc_samples( - xys_ras, - features_depth_ras, - image_size_hw, - radius=pt_radius, - masks=masks_ras, - ) - images_render = features_depth_render[:, :-1] - depths_render = features_depth_render[:, -1:] - return images_render, depths_render, masks_render - - -def rasterize_mc_samples( - xys: torch.Tensor, - feats: torch.Tensor, - image_size_hw: Tuple[int, int], - radius: float = 0.03, - topk: int = 5, - masks: Optional[torch.Tensor] = None, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Rasterizes Monte-Carlo sampled features back onto the image. - - Specifically, the code uses the PyTorch3D point rasterizer to render - a z-flat point cloud composed of the xy MC locations and their features. - - Args: - xys: B x N x 2 2D point locations in PyTorch3D NDC convention - feats: B x N x dim tensor containing per-point rendered features. - image_size_hw: Tuple[image_height, image_width] containing - the size of rasterized image. - radius: Rasterization point radius. - topk: The maximum z-buffer size for the PyTorch3D point cloud rasterizer. - masks: B x N x 1 tensor containing the alpha mask of the - rendered features. - """ - - if masks is None: - masks = torch.ones_like(xys[..., :1]) - - feats = torch.cat((feats, masks), dim=-1) - pointclouds = Pointclouds( - points=torch.cat([xys, torch.ones_like(xys[..., :1])], dim=-1), - features=feats, - ) - - data_rendered, render_mask, _ = render_point_cloud_pytorch3d( - PerspectiveCameras(device=feats.device), - pointclouds, - render_size=image_size_hw, - point_radius=radius, - topk=topk, - ) - - data_rendered, masks_pt = data_rendered.split( - [data_rendered.shape[1] - 1, 1], dim=1 - ) - render_mask = masks_pt * render_mask - - return data_rendered, render_mask diff --git a/pytorch3d/pytorch3d/implicitron/tools/stats.py b/pytorch3d/pytorch3d/implicitron/tools/stats.py deleted file mode 100644 index c49ba4248062ab8624fa8d84b36739663a118505..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/stats.py +++ /dev/null @@ -1,511 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import gzip -import json -import logging -import time -import warnings -from collections.abc import Iterable -from itertools import cycle - -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -from matplotlib import colors as mcolors -from pytorch3d.implicitron.tools.vis_utils import get_visdom_connection - -logger = logging.getLogger(__name__) - - -class AverageMeter: - """Computes and stores the average and current value""" - - def __init__(self): - self.history = [] - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1, epoch=0): - - # make sure the history is of the same len as epoch - while len(self.history) <= epoch: - self.history.append([]) - - self.history[epoch].append(val / n) - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - def get_epoch_averages(self, epoch=-1): - if len(self.history) == 0: # no stats here - return None - elif epoch == -1: - return [ - (float(np.array(x).mean()) if len(x) > 0 else float("NaN")) - for x in self.history - ] - else: - return float(np.array(self.history[epoch]).mean()) - - def get_all_values(self): - all_vals = [np.array(x) for x in self.history] - all_vals = np.concatenate(all_vals) - return all_vals - - def get_epoch(self): - return len(self.history) - - @staticmethod - def from_json_str(json_str): - self = AverageMeter() - self.__dict__.update(json.loads(json_str)) - return self - - -class Stats: - # TODO: update this with context manager - """ - stats logging object useful for gathering statistics of training a deep net in pytorch - Example:: - - # init stats structure that logs statistics 'objective' and 'top1e' - stats = Stats( ('objective','top1e') ) - network = init_net() # init a pytorch module (=nueral network) - dataloader = init_dataloader() # init a dataloader - for epoch in range(10): - # start of epoch -> call new_epoch - stats.new_epoch() - - # iterate over batches - for batch in dataloader: - - output = network(batch) # run and save into a dict of output variables - - # stats.update() automatically parses the 'objective' and 'top1e' from - # the "output" dict and stores this into the db - stats.update(output) - # prints the metric averages over given epoch - std_out = stats.get_status_string() - logger.info(str_out) - # stores the training plots into '/tmp/epoch_stats.pdf' - # and plots into a visdom server running at localhost (if running) - stats.plot_stats(plot_file='/tmp/epoch_stats.pdf') - - """ - - def __init__( - self, - log_vars, - epoch=-1, - visdom_env="main", - do_plot=True, - plot_file=None, - visdom_server="http://localhost", - visdom_port=8097, - ): - - self.log_vars = log_vars - self.visdom_env = visdom_env - self.visdom_server = visdom_server - self.visdom_port = visdom_port - self.plot_file = plot_file - self.do_plot = do_plot - self.hard_reset(epoch=epoch) - self._t_last_update = None - - @staticmethod - def from_json_str(json_str): - self = Stats([]) - # load the global state - self.__dict__.update(json.loads(json_str)) - # recover the AverageMeters - for stat_set in self.stats: - self.stats[stat_set] = { - log_var: AverageMeter.from_json_str(log_vals_json_str) - for log_var, log_vals_json_str in self.stats[stat_set].items() - } - return self - - @staticmethod - def load(flpath, postfix=".jgz"): - flpath = _get_postfixed_filename(flpath, postfix) - with gzip.open(flpath, "r") as fin: - data = json.loads(fin.read().decode("utf-8")) - return Stats.from_json_str(data) - - def save(self, flpath, postfix=".jgz"): - flpath = _get_postfixed_filename(flpath, postfix) - # store into a gzipped-json - with gzip.open(flpath, "w") as fout: - fout.write(json.dumps(self, cls=StatsJSONEncoder).encode("utf-8")) - - # some sugar to be used with "with stats:" at the beginning of the epoch - def __enter__(self): - if self.do_plot and self.epoch >= 0: - self.plot_stats(self.visdom_env) - self.new_epoch() - - def __exit__(self, type, value, traceback): - iserr = type is not None and issubclass(type, Exception) - iserr = iserr or (type is KeyboardInterrupt) - if iserr: - logger.error("error inside 'with' block") - return - if self.do_plot: - self.plot_stats(self.visdom_env) - - def reset(self): # to be called after each epoch - stat_sets = list(self.stats.keys()) - logger.debug(f"stats: epoch {self.epoch} - reset") - self.it = {k: -1 for k in stat_sets} - for stat_set in stat_sets: - for stat in self.stats[stat_set]: - self.stats[stat_set][stat].reset() - - def hard_reset(self, epoch=-1): # to be called during object __init__ - self.epoch = epoch - logger.debug(f"stats: epoch {self.epoch} - hard reset") - self.stats = {} - - # reset - self.reset() - - def new_epoch(self): - logger.debug(f"stats: new epoch {(self.epoch + 1)}") - self.epoch += 1 - self.reset() # zero the stats + increase epoch counter - - def gather_value(self, val): - if isinstance(val, (float, int)): - val = float(val) - else: - val = val.data.cpu().numpy() - val = float(val.sum()) - return val - - def add_log_vars(self, added_log_vars): - for add_log_var in added_log_vars: - if add_log_var not in self.stats: - logger.debug(f"Adding {add_log_var}") - self.log_vars.append(add_log_var) - - def update(self, preds, time_start=None, freeze_iter=False, stat_set="train"): - - if self.epoch == -1: # uninitialized - logger.warning( - "epoch==-1 means uninitialized stats structure -> new_epoch() called" - ) - self.new_epoch() - - if stat_set not in self.stats: - self.stats[stat_set] = {} - self.it[stat_set] = -1 - - if not freeze_iter: - self.it[stat_set] += 1 - - epoch = self.epoch - - for stat in self.log_vars: - - if stat not in self.stats[stat_set]: - self.stats[stat_set][stat] = AverageMeter() - - if stat == "sec/it": # compute speed - if time_start is None: - time_per_it = 0.0 - else: - now = time.time() - time_per_it = now - (self._t_last_update or time_start) - self._t_last_update = now - val = time_per_it - else: - if stat in preds: - try: - val = self.gather_value(preds[stat]) - except KeyError: - raise ValueError( - "could not extract prediction %s\ - from the prediction dictionary" - % stat - ) from None - else: - val = None - - if val is not None: - self.stats[stat_set][stat].update(val, epoch=epoch, n=1) - - def get_epoch_averages(self, epoch=None): - - stat_sets = list(self.stats.keys()) - - if epoch is None: - epoch = self.epoch - if epoch == -1: - epoch = list(range(self.epoch)) - - outvals = {} - for stat_set in stat_sets: - outvals[stat_set] = { - "epoch": epoch, - "it": self.it[stat_set], - "epoch_max": self.epoch, - } - - for stat in self.stats[stat_set].keys(): - if self.stats[stat_set][stat].count == 0: - continue - if isinstance(epoch, Iterable): - avgs = self.stats[stat_set][stat].get_epoch_averages() - avgs = [avgs[e] for e in epoch] - else: - avgs = self.stats[stat_set][stat].get_epoch_averages(epoch=epoch) - outvals[stat_set][stat] = avgs - - return outvals - - def print( - self, - max_it=None, - stat_set="train", - vars_print=None, - get_str=False, - skip_nan=False, - stat_format=lambda s: s.replace("loss_", "").replace("prev_stage_", "ps_"), - ): - """ - stats.print() is deprecated. Please use get_status_string() instead. - example: - std_out = stats.get_status_string() - logger.info(str_out) - """ - - epoch = self.epoch - stats = self.stats - - str_out = "" - - it = self.it[stat_set] - stat_str = "" - stats_print = sorted(stats[stat_set].keys()) - for stat in stats_print: - if stats[stat_set][stat].count == 0: - continue - if skip_nan and not np.isfinite(stats[stat_set][stat].avg): - continue - stat_str += " {0:.12}: {1:1.3f} |".format( - stat_format(stat), stats[stat_set][stat].avg - ) - - head_str = "[%s] | epoch %3d | it %5d" % (stat_set, epoch, it) - if max_it: - head_str += "/ %d" % max_it - - str_out = "%s | %s" % (head_str, stat_str) - - if get_str: - return str_out - else: - warnings.warn( - "get_str=False is deprecated." - "Please enable this flag to get receive the output string.", - DeprecationWarning, - ) - print(str_out) - - def get_status_string( - self, - max_it=None, - stat_set="train", - vars_print=None, - skip_nan=False, - stat_format=lambda s: s.replace("loss_", "").replace("prev_stage_", "ps_"), - ): - return self.print( - max_it=max_it, - stat_set=stat_set, - vars_print=vars_print, - get_str=True, - skip_nan=skip_nan, - stat_format=stat_format, - ) - - def plot_stats( - self, visdom_env=None, plot_file=None, visdom_server=None, visdom_port=None - ): - - # use the cached visdom env if none supplied - if visdom_env is None: - visdom_env = self.visdom_env - if visdom_server is None: - visdom_server = self.visdom_server - if visdom_port is None: - visdom_port = self.visdom_port - if plot_file is None: - plot_file = self.plot_file - - stat_sets = list(self.stats.keys()) - - logger.debug( - f"printing charts to visdom env '{visdom_env}' ({visdom_server}:{visdom_port})" - ) - - novisdom = False - - viz = get_visdom_connection(server=visdom_server, port=visdom_port) - if viz is None or not viz.check_connection(): - logger.info("no visdom server! -> skipping visdom plots") - novisdom = True - - lines = [] - - # plot metrics - if not novisdom: - viz.close(env=visdom_env, win=None) - - for stat in self.log_vars: - vals = [] - stat_sets_now = [] - for stat_set in stat_sets: - val = self.stats[stat_set][stat].get_epoch_averages() - if val is None: - continue - else: - val = np.array(val).reshape(-1) - stat_sets_now.append(stat_set) - vals.append(val) - - if len(vals) == 0: - continue - - lines.append((stat_sets_now, stat, vals)) - - if not novisdom: - for tmodes, stat, vals in lines: - title = "%s" % stat - opts = {"title": title, "legend": list(tmodes)} - for i, (tmode, val) in enumerate(zip(tmodes, vals)): - update = "append" if i > 0 else None - valid = np.where(np.isfinite(val))[0] - if len(valid) == 0: - continue - x = np.arange(len(val)) - viz.line( - Y=val[valid], - X=x[valid], - env=visdom_env, - opts=opts, - win=f"stat_plot_{title}", - name=tmode, - update=update, - ) - - if plot_file: - logger.info(f"plotting stats to {plot_file}") - ncol = 3 - nrow = int(np.ceil(float(len(lines)) / ncol)) - matplotlib.rcParams.update({"font.size": 5}) - color = cycle(plt.cm.tab10(np.linspace(0, 1, 10))) - fig = plt.figure(1) - plt.clf() - for idx, (tmodes, stat, vals) in enumerate(lines): - c = next(color) - plt.subplot(nrow, ncol, idx + 1) - plt.gca() - for vali, vals_ in enumerate(vals): - c_ = c * (1.0 - float(vali) * 0.3) - valid = np.where(np.isfinite(vals_))[0] - if len(valid) == 0: - continue - x = np.arange(len(vals_)) - plt.plot(x[valid], vals_[valid], c=c_, linewidth=1) - plt.ylabel(stat) - plt.xlabel("epoch") - plt.gca().yaxis.label.set_color(c[0:3] * 0.75) - plt.legend(tmodes) - gcolor = np.array(mcolors.to_rgba("lightgray")) - grid_params = {"visible": True, "color": gcolor} - plt.grid(**grid_params, which="major", linestyle="-", linewidth=0.4) - plt.grid(**grid_params, which="minor", linestyle="--", linewidth=0.2) - plt.minorticks_on() - - plt.tight_layout() - plt.show() - try: - fig.savefig(plot_file) - except PermissionError: - warnings.warn("Cant dump stats due to insufficient permissions!") - - def synchronize_logged_vars(self, log_vars, default_val=float("NaN")): - - stat_sets = list(self.stats.keys()) - - # remove the additional log_vars - for stat_set in stat_sets: - for stat in self.stats[stat_set].keys(): - if stat not in log_vars: - logger.warning(f"additional stat {stat_set}:{stat} -> removing") - - self.stats[stat_set] = { - stat: v for stat, v in self.stats[stat_set].items() if stat in log_vars - } - - self.log_vars = log_vars # !!! - - for stat_set in stat_sets: - for stat in log_vars: - if stat not in self.stats[stat_set]: - logger.info( - "missing stat %s:%s -> filling with default values (%1.2f)" - % (stat_set, stat, default_val) - ) - elif len(self.stats[stat_set][stat].history) != self.epoch + 1: - h = self.stats[stat_set][stat].history - if len(h) == 0: # just never updated stat ... skip - continue - else: - logger.info( - "incomplete stat %s:%s -> reseting with default values (%1.2f)" - % (stat_set, stat, default_val) - ) - else: - continue - - self.stats[stat_set][stat] = AverageMeter() - self.stats[stat_set][stat].reset() - - lastep = self.epoch + 1 - for ep in range(lastep): - self.stats[stat_set][stat].update(default_val, n=1, epoch=ep) - epoch_generated = self.stats[stat_set][stat].get_epoch() - assert ( - epoch_generated == self.epoch + 1 - ), "bad epoch of synchronized log_var! %d vs %d" % ( - self.epoch + 1, - epoch_generated, - ) - - -class StatsJSONEncoder(json.JSONEncoder): - def default(self, o): - if isinstance(o, (AverageMeter, Stats)): - enc = self.encode(o.__dict__) - return enc - else: - raise TypeError( - f"Object of type {o.__class__.__name__} " f"is not JSON serializable" - ) - - -def _get_postfixed_filename(fl, postfix): - return fl if fl.endswith(postfix) else fl + postfix diff --git a/pytorch3d/pytorch3d/implicitron/tools/utils.py b/pytorch3d/pytorch3d/implicitron/tools/utils.py deleted file mode 100644 index 6cb0d4ec136467eb0996c39be668263168e07ddd..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/utils.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import collections -import dataclasses -import time -from contextlib import contextmanager -from typing import Any, Callable, Dict, Iterable, Iterator - -import torch - - -@contextmanager -def evaluating(net: torch.nn.Module): - """Temporarily switch to evaluation mode.""" - istrain = net.training - try: - net.eval() - yield net - finally: - if istrain: - net.train() - - -def try_to_cuda(t: Any) -> Any: - """ - Try to move the input variable `t` to a cuda device. - - Args: - t: Input. - - Returns: - t_cuda: `t` moved to a cuda device, if supported. - """ - try: - t = t.cuda() - except AttributeError: - pass - return t - - -def try_to_cpu(t: Any) -> Any: - """ - Try to move the input variable `t` to a cpu device. - - Args: - t: Input. - - Returns: - t_cpu: `t` moved to a cpu device, if supported. - """ - try: - t = t.cpu() - except AttributeError: - pass - return t - - -def dict_to_cuda(batch: Dict[Any, Any]) -> Dict[Any, Any]: - """ - Move all values in a dictionary to cuda if supported. - - Args: - batch: Input dict. - - Returns: - batch_cuda: `batch` moved to a cuda device, if supported. - """ - return {k: try_to_cuda(v) for k, v in batch.items()} - - -def dict_to_cpu(batch): - """ - Move all values in a dictionary to cpu if supported. - - Args: - batch: Input dict. - - Returns: - batch_cpu: `batch` moved to a cpu device, if supported. - """ - return {k: try_to_cpu(v) for k, v in batch.items()} - - -def dataclass_to_cuda_(obj): - """ - Move all contents of a dataclass to cuda inplace if supported. - - Args: - batch: Input dataclass. - - Returns: - batch_cuda: `batch` moved to a cuda device, if supported. - """ - for f in dataclasses.fields(obj): - setattr(obj, f.name, try_to_cuda(getattr(obj, f.name))) - return obj - - -def dataclass_to_cpu_(obj): - """ - Move all contents of a dataclass to cpu inplace if supported. - - Args: - batch: Input dataclass. - - Returns: - batch_cuda: `batch` moved to a cpu device, if supported. - """ - for f in dataclasses.fields(obj): - setattr(obj, f.name, try_to_cpu(getattr(obj, f.name))) - return obj - - -# TODO: test it -def cat_dataclass(batch, tensor_collator: Callable): - """ - Concatenate all fields of a list of dataclasses `batch` to a single - dataclass object using `tensor_collator`. - - Args: - batch: Input list of dataclasses. - - Returns: - concatenated_batch: All elements of `batch` concatenated to a single - dataclass object. - tensor_collator: The function used to concatenate tensor fields. - """ - - elem = batch[0] - collated = {} - - for f in dataclasses.fields(elem): - elem_f = getattr(elem, f.name) - if elem_f is None: - collated[f.name] = None - elif torch.is_tensor(elem_f): - collated[f.name] = tensor_collator([getattr(e, f.name) for e in batch]) - elif dataclasses.is_dataclass(elem_f): - collated[f.name] = cat_dataclass( - [getattr(e, f.name) for e in batch], tensor_collator - ) - elif isinstance(elem_f, collections.abc.Mapping): - collated[f.name] = { - k: tensor_collator([getattr(e, f.name)[k] for e in batch]) - if elem_f[k] is not None - else None - for k in elem_f - } - else: - raise ValueError("Unsupported field type for concatenation") - - return type(elem)(**collated) - - -def recursive_visitor(it: Iterable[Any]) -> Iterator[Any]: - for x in it: - if isinstance(x, Iterable) and not isinstance(x, (str, bytes)): - yield from recursive_visitor(x) - else: - yield x - - -def get_inlier_indicators( - tensor: torch.Tensor, dim: int, outlier_rate: float -) -> torch.Tensor: - remove_elements = int(min(outlier_rate, 1.0) * tensor.shape[dim] / 2) - hi = torch.topk(tensor, remove_elements, dim=dim).indices.tolist() - lo = torch.topk(-tensor, remove_elements, dim=dim).indices.tolist() - remove_indices = set(recursive_visitor([hi, lo])) - keep_indices = tensor.new_ones(tensor.shape[dim : dim + 1], dtype=torch.bool) - keep_indices[list(remove_indices)] = False - return keep_indices - - -class Timer: - """ - A simple class for timing execution. - - Example:: - - with Timer(): - print("This print statement is timed.") - - """ - - def __init__(self, name="timer", quiet=False): - self.name = name - self.quiet = quiet - - def __enter__(self): - self.start = time.time() - return self - - def __exit__(self, *args): - self.end = time.time() - self.interval = self.end - self.start - if not self.quiet: - print("%20s: %1.6f sec" % (self.name, self.interval)) diff --git a/pytorch3d/pytorch3d/implicitron/tools/video_writer.py b/pytorch3d/pytorch3d/implicitron/tools/video_writer.py deleted file mode 100644 index 3db55e886587a9d080877b17e797f252fe64c055..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/video_writer.py +++ /dev/null @@ -1,179 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import os -import shutil -import subprocess -import tempfile -import warnings -from typing import Optional, Tuple, Union - -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -from PIL import Image - -_DEFAULT_FFMPEG = os.environ.get("FFMPEG", "ffmpeg") - -matplotlib.use("Agg") - - -class VideoWriter: - """ - A class for exporting videos. - """ - - def __init__( - self, - cache_dir: Optional[str] = None, - ffmpeg_bin: str = _DEFAULT_FFMPEG, - out_path: str = "/tmp/video.mp4", - fps: int = 20, - output_format: str = "visdom", - rmdir_allowed: bool = False, - **kwargs, - ) -> None: - """ - Args: - cache_dir: A directory for storing the video frames. If `None`, - a temporary directory will be used. - ffmpeg_bin: The path to an `ffmpeg` executable. - out_path: The path to the output video. - fps: The speed of the generated video in frames-per-second. - output_format: Format of the output video. Currently only `"visdom"` - is supported. - rmdir_allowed: If `True` delete and create `cache_dir` in case - it is not empty. - """ - self.rmdir_allowed = rmdir_allowed - self.output_format = output_format - self.fps = fps - self.out_path = out_path - self.cache_dir = cache_dir - self.ffmpeg_bin = ffmpeg_bin - self.frames = [] - self.regexp = "frame_%08d.png" - self.frame_num = 0 - - if self.cache_dir is not None: - self.tmp_dir = None - if os.path.isdir(self.cache_dir): - if rmdir_allowed: - shutil.rmtree(self.cache_dir) - else: - warnings.warn( - f"Warning: cache directory not empty ({self.cache_dir})." - ) - os.makedirs(self.cache_dir, exist_ok=True) - else: - self.tmp_dir = tempfile.TemporaryDirectory() - self.cache_dir = self.tmp_dir.name - - def write_frame( - self, - frame: Union[matplotlib.figure.Figure, np.ndarray, Image.Image, str], - resize: Optional[Union[float, Tuple[int, int]]] = None, - ) -> None: - """ - Write a frame to the video. - - Args: - frame: An object containing the frame image. - resize: Either a floating defining the image rescaling factor - or a 2-tuple defining the size of the output image. - """ - - # pyre-fixme[6]: For 1st argument expected `Union[PathLike[str], str]` but - # got `Optional[str]`. - outfile = os.path.join(self.cache_dir, self.regexp % self.frame_num) - - if isinstance(frame, matplotlib.figure.Figure): - plt.savefig(outfile) - im = Image.open(outfile) - elif isinstance(frame, np.ndarray): - if frame.dtype in (np.float64, np.float32, float): - frame = (np.transpose(frame, (1, 2, 0)) * 255.0).astype(np.uint8) - im = Image.fromarray(frame) - elif isinstance(frame, Image.Image): - im = frame - elif isinstance(frame, str): - im = Image.open(frame).convert("RGB") - else: - raise ValueError("Cant convert type %s" % str(type(frame))) - - if im is not None: - if resize is not None: - if isinstance(resize, float): - resize = [int(resize * s) for s in im.size] - else: - resize = im.size - # make sure size is divisible by 2 - resize = tuple([resize[i] + resize[i] % 2 for i in (0, 1)]) - # pyre-fixme[16]: Module `Image` has no attribute `ANTIALIAS`. - im = im.resize(resize, Image.ANTIALIAS) - im.save(outfile) - - self.frames.append(outfile) - self.frame_num += 1 - - def get_video(self, quiet: bool = True) -> str: - """ - Generate the video from the written frames. - - Args: - quiet: If `True`, suppresses logging messages. - - Returns: - video_path: The path to the generated video if any frames were added. - Otherwise returns an empty string. - """ - if self.frame_num == 0: - return "" - - # pyre-fixme[6]: For 1st argument expected `Union[PathLike[str], str]` but - # got `Optional[str]`. - regexp = os.path.join(self.cache_dir, self.regexp) - - if shutil.which(self.ffmpeg_bin) is None: - raise ValueError( - f"Cannot find ffmpeg as `{self.ffmpeg_bin}`. " - + "Please set FFMPEG in the environment or ffmpeg_bin on this class." - ) - - if self.output_format == "visdom": # works for ppt too - args = [ - self.ffmpeg_bin, - "-r", - str(self.fps), - "-i", - regexp, - "-vcodec", - "h264", - "-f", - "mp4", - "-y", - "-crf", - "18", - "-b", - "2000k", - "-pix_fmt", - "yuv420p", - self.out_path, - ] - if quiet: - subprocess.check_call( - args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL - ) - else: - subprocess.check_call(args) - else: - raise ValueError("no such output type %s" % str(self.output_format)) - - return self.out_path - - def __del__(self) -> None: - if self.tmp_dir is not None: - self.tmp_dir.cleanup() diff --git a/pytorch3d/pytorch3d/implicitron/tools/vis_utils.py b/pytorch3d/pytorch3d/implicitron/tools/vis_utils.py deleted file mode 100644 index 1b3b1f15db5a86f0b1d4b0a319b9306e8212dbfe..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/implicitron/tools/vis_utils.py +++ /dev/null @@ -1,187 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import logging -from typing import Any, Dict, Optional, Tuple, TYPE_CHECKING - -import torch - -if TYPE_CHECKING: - from visdom import Visdom - - -logger = logging.getLogger(__name__) - - -def get_visdom_env(visdom_env: str, exp_dir: str) -> str: - """ - Parse out visdom environment name from the input config. - - Args: - visdom_env: Name of the wisdom environment, could be empty string. - exp_dir: Root experiment directory. - - Returns: - visdom_env: The name of the visdom environment. If the given visdom_env is - empty, return the name of the bottom directory in exp_dir. - """ - if len(visdom_env) == 0: - visdom_env = exp_dir.split("/")[-1] - else: - visdom_env = visdom_env - return visdom_env - - -# TODO: a proper singleton -_viz_singleton = None - - -def get_visdom_connection( - server: str = "http://localhost", - port: int = 8097, -) -> Optional["Visdom"]: - """ - Obtain a connection to a visdom server if visdom is installed. - - Args: - server: Server address. - port: Server port. - - Returns: - connection: The connection object. - """ - try: - from visdom import Visdom - except ImportError: - logger.debug("Cannot load visdom") - return None - - if server == "None": - return None - - global _viz_singleton - if _viz_singleton is None: - _viz_singleton = Visdom(server=server, port=port) - return _viz_singleton - - -def visualize_basics( - viz: "Visdom", - preds: Dict[str, Any], - visdom_env_imgs: str, - title: str = "", - visualize_preds_keys: Tuple[str, ...] = ( - "image_rgb", - "images_render", - "fg_probability", - "masks_render", - "depths_render", - "depth_map", - ), - store_history: bool = False, -) -> None: - """ - Visualize basic outputs of a `GenericModel` to visdom. - - Args: - viz: The visdom object. - preds: A dictionary containing `GenericModel` outputs. - visdom_env_imgs: Target visdom environment name. - title: The title of produced visdom window. - visualize_preds_keys: The list of keys of `preds` for visualization. - store_history: Store the history buffer in visdom windows. - """ - imout = {} - for k in visualize_preds_keys: - if k not in preds or preds[k] is None: - logger.info(f"cant show {k}") - continue - v = preds[k].cpu().detach().clone() - if k.startswith("depth"): - # divide by 95th percentile - normfac = ( - v.view(v.shape[0], -1) - .topk(k=int(0.05 * (v.numel() // v.shape[0])), dim=-1) - .values[:, -1] - ) - v = v / normfac[:, None, None, None].clamp(1e-4) - if v.shape[1] == 1: - v = v.repeat(1, 3, 1, 1) - v = torch.nn.functional.interpolate( - v, - scale_factor=( - 600.0 - if ( - "_eval" in visdom_env_imgs - and k in ("images_render", "depths_render") - ) - else 200.0 - ) - / v.shape[2], - mode="bilinear", - ) - imout[k] = v - - # TODO: handle errors on the outside - try: - imout = {"all": torch.cat(list(imout.values()), dim=2)} - except RuntimeError as e: - print("cant cat!", e.args) - - for k, v in imout.items(): - viz.images( - v.clamp(0.0, 1.0), - win=k, - env=visdom_env_imgs, - opts={"title": title + "_" + k, "store_history": store_history}, - ) - - -def make_depth_image( - depths: torch.Tensor, - masks: torch.Tensor, - max_quantile: float = 0.98, - min_quantile: float = 0.02, - min_out_depth: float = 0.1, - max_out_depth: float = 0.9, -) -> torch.Tensor: - """ - Convert a batch of depth maps to a grayscale image. - - Args: - depths: A tensor of shape `(B, 1, H, W)` containing a batch of depth maps. - masks: A tensor of shape `(B, 1, H, W)` containing a batch of foreground masks. - max_quantile: The quantile of the input depth values which will - be mapped to `max_out_depth`. - min_quantile: The quantile of the input depth values which will - be mapped to `min_out_depth`. - min_out_depth: The minimal value in each depth map will be assigned this color. - max_out_depth: The maximal value in each depth map will be assigned this color. - - Returns: - depth_image: A tensor of shape `(B, 1, H, W)` a batch of grayscale - depth images. - """ - normfacs = [] - for d, m in zip(depths, masks): - ok = (d.view(-1) > 1e-6) * (m.view(-1) > 0.5) - if ok.sum() <= 1: - logger.info("empty depth!") - normfacs.append(torch.zeros(2).type_as(depths)) - continue - dok = d.view(-1)[ok].view(-1) - _maxk = max(int(round((1 - max_quantile) * (dok.numel()))), 1) - _mink = max(int(round(min_quantile * (dok.numel()))), 1) - normfac_max = dok.topk(k=_maxk, dim=-1).values[-1] - normfac_min = dok.topk(k=_mink, dim=-1, largest=False).values[-1] - normfacs.append(torch.stack([normfac_min, normfac_max])) - normfacs = torch.stack(normfacs) - _min, _max = (normfacs[:, 0].view(-1, 1, 1, 1), normfacs[:, 1].view(-1, 1, 1, 1)) - depths = (depths - _min) / (_max - _min).clamp(1e-4) - depths = ( - (depths * (max_out_depth - min_out_depth) + min_out_depth) * masks.float() - ).clamp(0.0, 1.0) - return depths diff --git a/pytorch3d/pytorch3d/io/__init__.py b/pytorch3d/pytorch3d/io/__init__.py deleted file mode 100644 index c28b5df11912086c2aa1801704be22a0c263775e..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/io/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -from .obj_io import load_obj, load_objs_as_meshes, save_obj -from .pluggable import IO -from .ply_io import load_ply, save_ply - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/io/experimental_gltf_io.py b/pytorch3d/pytorch3d/io/experimental_gltf_io.py deleted file mode 100644 index 10905227ddf38450058d6f6aa7137d87785d2e17..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/io/experimental_gltf_io.py +++ /dev/null @@ -1,862 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -""" -This module implements loading meshes from glTF 2 assets stored in a -GLB container file or a glTF JSON file with embedded binary data. -It is experimental. - -The module provides a MeshFormatInterpreter called -MeshGlbFormat which must be used explicitly. -e.g. - -.. code-block:: python - - from pytorch3d.io import IO - from pytorch3d.io.experimental_gltf_io import MeshGlbFormat - - io = IO() - io.register_meshes_format(MeshGlbFormat()) - io.load_mesh(...) - -This implementation is quite restricted in what it supports. - - - It does not try to validate the input against the standard. - - It loads the default scene only. - - Only triangulated geometry is supported. - - The geometry of all meshes of the entire scene is aggregated into a single mesh. - Use `load_meshes()` instead to get un-aggregated (but transformed) ones. - - All material properties are ignored except for either vertex color, baseColorTexture - or baseColorFactor. If available, one of these (in this order) is exclusively - used which does not match the semantics of the standard. -""" - -import json -import struct -import warnings -from base64 import b64decode -from collections import defaultdict, deque -from enum import IntEnum -from io import BytesIO -from typing import Any, BinaryIO, cast, Dict, List, Optional, Tuple, Union - -import numpy as np -import torch -from iopath.common.file_io import PathManager -from PIL import Image -from pytorch3d.io.utils import _open_file, PathOrStr -from pytorch3d.renderer.mesh import TexturesBase, TexturesUV, TexturesVertex -from pytorch3d.structures import join_meshes_as_scene, Meshes -from pytorch3d.transforms import quaternion_to_matrix, Transform3d - -from .pluggable_formats import endswith, MeshFormatInterpreter - - -_GLTF_MAGIC = 0x46546C67 -_JSON_CHUNK_TYPE = 0x4E4F534A -_BINARY_CHUNK_TYPE = 0x004E4942 -_DATA_URI_PREFIX = "data:application/octet-stream;base64," - - -class _PrimitiveMode(IntEnum): - POINTS = 0 - LINES = 1 - LINE_LOOP = 2 - LINE_STRIP = 3 - TRIANGLES = 4 - TRIANGLE_STRIP = 5 - TRIANGLE_FAN = 6 - - -class _ComponentType(IntEnum): - BYTE = 5120 - UNSIGNED_BYTE = 5121 - SHORT = 5122 - UNSIGNED_SHORT = 5123 - UNSIGNED_INT = 5125 - FLOAT = 5126 - - -_ITEM_TYPES: Dict[int, Any] = { - 5120: np.int8, - 5121: np.uint8, - 5122: np.int16, - 5123: np.uint16, - 5125: np.uint32, - 5126: np.float32, -} - - -_ElementShape = Union[Tuple[int], Tuple[int, int]] -_ELEMENT_SHAPES: Dict[str, _ElementShape] = { - "SCALAR": (1,), - "VEC2": (2,), - "VEC3": (3,), - "VEC4": (4,), - "MAT2": (2, 2), - "MAT3": (3, 3), - "MAT4": (4, 4), -} - -_DTYPE_BYTES: Dict[Any, int] = { - np.int8: 1, - np.uint8: 1, - np.int16: 2, - np.uint16: 2, - np.uint32: 4, - np.float32: 4, -} - - -class _TargetType(IntEnum): - ARRAY_BUFFER = 34962 - ELEMENT_ARRAY_BUFFER = 34963 - - -class OurEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, np.int64): - return str(obj) - return super(OurEncoder, self).default(obj) - - -def _read_header(stream: BinaryIO) -> Optional[Tuple[int, int]]: - header = stream.read(12) - magic, version, length = struct.unpack(" Optional[Tuple[Dict[str, Any], np.ndarray]]: - """ - Get the json header and the binary data from a - GLB file. - """ - json_data = None - binary_data = None - - while stream.tell() < length: - chunk_header = stream.read(8) - chunk_length, chunk_type = struct.unpack(" Transform3d: - """ - Convert a transform from the json data in to a PyTorch3D - Transform3d format. - """ - array = node.get("matrix") - if array is not None: # Stored in column-major order - M = np.array(array, dtype=np.float32).reshape(4, 4, order="F") - return Transform3d(matrix=torch.from_numpy(M)) - - out = Transform3d() - - # Given some of (scale/rotation/translation), we do them in that order to - # get points in to the world space. - # See https://github.com/KhronosGroup/glTF/issues/743 . - - array = node.get("scale", None) - if array is not None: - scale_vector = torch.FloatTensor(array) - out = out.scale(scale_vector[None]) - - # Rotation quaternion (x, y, z, w) where w is the scalar - array = node.get("rotation", None) - if array is not None: - x, y, z, w = array - # We negate w. This is equivalent to inverting the rotation. - # This is needed as quaternion_to_matrix makes a matrix which - # operates on column vectors, whereas Transform3d wants a - # matrix which operates on row vectors. - rotation_quaternion = torch.FloatTensor([-w, x, y, z]) - rotation_matrix = quaternion_to_matrix(rotation_quaternion) - out = out.rotate(R=rotation_matrix) - - array = node.get("translation", None) - if array is not None: - translation_vector = torch.FloatTensor(array) - out = out.translate(x=translation_vector[None]) - - return out - - -class _GLTFLoader: - def __init__(self, stream: BinaryIO) -> None: - self._json_data = None - # Map from buffer index to (decoded) binary data - self._binary_data = {} - - version_and_length = _read_header(stream) - if version_and_length is None: # GLTF - stream.seek(0) - json_data = json.load(stream) - else: # GLB - version, length = version_and_length - if version != 2: - warnings.warn("Unsupported version") - return - json_and_binary_data = _read_chunks(stream, length) - if json_and_binary_data is None: - raise ValueError("Data not found") - json_data, binary_data = json_and_binary_data - self._binary_data[0] = binary_data - - self._json_data = json_data - self._accessors = json_data.get("accessors", []) - self._buffer_views = json_data.get("bufferViews", []) - self._buffers = json_data.get("buffers", []) - self._texture_map_images = {} - - def _access_image(self, image_index: int) -> np.ndarray: - """ - Get the data for an image from the file. This is only called - by _get_texture_map_image which caches it. - """ - - image_json = self._json_data["images"][image_index] - buffer_view = self._buffer_views[image_json["bufferView"]] - if "byteStride" in buffer_view: - raise NotImplementedError("strided buffer views") - - length = buffer_view["byteLength"] - offset = buffer_view.get("byteOffset", 0) - - binary_data = self.get_binary_data(buffer_view["buffer"]) - bytesio = BytesIO(binary_data[offset : offset + length].tobytes()) - with Image.open(bytesio) as f: - array = np.array(f) - if array.dtype == np.uint8: - return array.astype(np.float32) / 255.0 - else: - return array - - def _get_texture_map_image(self, image_index: int) -> torch.Tensor: - """ - Return a texture map image as a torch tensor. - Calling this function repeatedly with the same arguments returns - the very same tensor, this allows a memory optimization to happen - later in TexturesUV.join_scene. - Any alpha channel is ignored. - """ - im = self._texture_map_images.get(image_index) - if im is not None: - return im - - im = torch.from_numpy(self._access_image(image_index))[:, :, :3] - self._texture_map_images[image_index] = im - return im - - def _access_data(self, accessor_index: int) -> np.ndarray: - """ - Get the raw data from an accessor as a numpy array. - """ - accessor = self._accessors[accessor_index] - - buffer_view_index = accessor.get("bufferView") - # Undefined buffer view (all zeros) are not (yet) supported - if buffer_view_index is None: - raise NotImplementedError("Undefined buffer view") - - accessor_byte_offset = accessor.get("byteOffset", 0) - component_type = accessor["componentType"] - element_count = accessor["count"] - element_type = accessor["type"] - - # Sparse accessors are not (yet) supported - if accessor.get("sparse") is not None: - raise NotImplementedError("Sparse Accessors") - - buffer_view = self._buffer_views[buffer_view_index] - buffer_index = buffer_view["buffer"] - buffer_byte_length = buffer_view["byteLength"] - element_byte_offset = buffer_view.get("byteOffset", 0) - element_byte_stride = buffer_view.get("byteStride", 0) - if element_byte_stride != 0 and element_byte_stride < 4: - raise ValueError("Stride is too small.") - if element_byte_stride > 252: - raise ValueError("Stride is too big.") - - element_shape = _ELEMENT_SHAPES[element_type] - item_type = _ITEM_TYPES[component_type] - item_dtype = np.dtype(item_type) - item_count = np.prod(element_shape) - item_size = item_dtype.itemsize - size = element_count * item_count * item_size - if size > buffer_byte_length: - raise ValueError("Buffer did not have enough data for the accessor") - - buffer_ = self._buffers[buffer_index] - binary_data = self.get_binary_data(buffer_index) - if len(binary_data) < buffer_["byteLength"]: - raise ValueError("Not enough binary data for the buffer") - - if element_byte_stride == 0: - element_byte_stride = item_size * item_count - # The same buffer can store interleaved elements - if element_byte_stride < item_size * item_count: - raise ValueError("Items should not overlap") - - dtype = np.dtype( - { - "names": ["element"], - "formats": [str(element_shape) + item_dtype.str], - "offsets": [0], - "itemsize": element_byte_stride, - } - ) - - byte_offset = accessor_byte_offset + element_byte_offset - if byte_offset % item_size != 0: - raise ValueError("Misaligned data") - byte_length = element_count * element_byte_stride - buffer_view = binary_data[byte_offset : byte_offset + byte_length].view(dtype)[ - "element" - ] - - # Convert matrix data from column-major (OpenGL) to row-major order - if element_type in ("MAT2", "MAT3", "MAT4"): - buffer_view = np.transpose(buffer_view, (0, 2, 1)) - - return buffer_view - - def _get_primitive_attribute( - self, primitive_attributes: Dict[str, Any], key: str, dtype - ) -> Optional[np.ndarray]: - accessor_index = primitive_attributes.get(key) - if accessor_index is None: - return None - primitive_attribute = self._access_data(accessor_index) - if key == "JOINTS_0": - pass - elif dtype == np.uint8: - primitive_attribute /= 255.0 - elif dtype == np.uint16: - primitive_attribute /= 65535.0 - else: - if dtype != np.float32: - raise ValueError("Unexpected data type") - primitive_attribute = primitive_attribute.astype(dtype) - return primitive_attribute - - def get_binary_data(self, buffer_index: int): - """ - Get the binary data from a buffer as a 1D numpy array of bytes. - This is implemented for explicit uri data buffers or the main GLB data - segment. - """ - buffer_ = self._buffers[buffer_index] - binary_data = self._binary_data.get(buffer_index) - if binary_data is None: # Lazily decode binary data - uri = buffer_.get("uri") - if not uri.startswith(_DATA_URI_PREFIX): - raise NotImplementedError("Unexpected URI type") - binary_data = b64decode(uri[len(_DATA_URI_PREFIX) :]) - binary_data = np.frombuffer(binary_data, dtype=np.uint8) - self._binary_data[buffer_index] = binary_data - return binary_data - - def get_texture_for_mesh( - self, primitive: Dict[str, Any], indices: torch.Tensor - ) -> Optional[TexturesBase]: - """ - Get the texture object representing the given mesh primitive. - - Args: - primitive: the mesh primitive being loaded. - indices: the face indices of the mesh - """ - attributes = primitive["attributes"] - vertex_colors = self._get_primitive_attribute(attributes, "COLOR_0", np.float32) - if vertex_colors is not None: - return TexturesVertex([torch.from_numpy(vertex_colors)]) - - vertex_texcoords_0 = self._get_primitive_attribute( - attributes, "TEXCOORD_0", np.float32 - ) - if vertex_texcoords_0 is not None: - verts_uvs = torch.from_numpy(vertex_texcoords_0) - verts_uvs[:, 1] = 1 - verts_uvs[:, -1] - faces_uvs = indices - material_index = primitive.get("material", 0) - material = self._json_data["materials"][material_index] - material_roughness = material["pbrMetallicRoughness"] - if "baseColorTexture" in material_roughness: - texture_index = material_roughness["baseColorTexture"]["index"] - texture_json = self._json_data["textures"][texture_index] - # Todo - include baseColorFactor when also given - # Todo - look at the sampler - image_index = texture_json["source"] - map = self._get_texture_map_image(image_index) - elif "baseColorFactor" in material_roughness: - # Constant color? - map = torch.FloatTensor(material_roughness["baseColorFactor"])[ - None, None, :3 - ] - texture = TexturesUV( - # pyre-fixme[61]: `map` may not be initialized here. - maps=[map], # alpha channel ignored - faces_uvs=[faces_uvs], - verts_uvs=[verts_uvs], - ) - return texture - - return None - - def load(self, include_textures: bool) -> List[Tuple[Optional[str], Meshes]]: - """ - Attempt to load all the meshes making up the default scene from - the file as a list of possibly-named Meshes objects. - - Args: - include_textures: Whether to try loading textures. - - Returns: - Meshes object containing one mesh. - """ - if self._json_data is None: - raise ValueError("Initialization problem") - - # This loads the default scene from the file. - # This is usually the only one. - # It is possible to have multiple scenes, in which case - # you could choose another here instead of taking the default. - scene_index = self._json_data.get("scene") - - if scene_index is None: - raise ValueError("Default scene is not specified.") - - scene = self._json_data["scenes"][scene_index] - nodes = self._json_data.get("nodes", []) - meshes = self._json_data.get("meshes", []) - root_node_indices = scene["nodes"] - - mesh_transform = Transform3d() - names_meshes_list: List[Tuple[Optional[str], Meshes]] = [] - - # Keep track and apply the transform of the scene node to mesh vertices - Q = deque([(Transform3d(), node_index) for node_index in root_node_indices]) - - while Q: - parent_transform, current_node_index = Q.popleft() - - current_node = nodes[current_node_index] - - transform = _make_node_transform(current_node) - current_transform = transform.compose(parent_transform) - - if "mesh" in current_node: - mesh_index = current_node["mesh"] - mesh = meshes[mesh_index] - mesh_name = mesh.get("name", None) - mesh_transform = current_transform - - for primitive in mesh["primitives"]: - attributes = primitive["attributes"] - accessor_index = attributes["POSITION"] - positions = torch.from_numpy( - self._access_data(accessor_index).copy() - ) - positions = mesh_transform.transform_points(positions) - - mode = primitive.get("mode", _PrimitiveMode.TRIANGLES) - if mode != _PrimitiveMode.TRIANGLES: - raise NotImplementedError("Non triangular meshes") - - if "indices" in primitive: - accessor_index = primitive["indices"] - indices = self._access_data(accessor_index).astype(np.int64) - else: - indices = np.arange(0, len(positions), dtype=np.int64) - indices = torch.from_numpy(indices.reshape(-1, 3)) - - texture = None - if include_textures: - texture = self.get_texture_for_mesh(primitive, indices) - - mesh_obj = Meshes( - verts=[positions], faces=[indices], textures=texture - ) - names_meshes_list.append((mesh_name, mesh_obj)) - - if "children" in current_node: - children_node_indices = current_node["children"] - Q.extend( - [ - (current_transform, node_index) - for node_index in children_node_indices - ] - ) - - return names_meshes_list - - -def load_meshes( - path: PathOrStr, - path_manager: PathManager, - include_textures: bool = True, -) -> List[Tuple[Optional[str], Meshes]]: - """ - Loads all the meshes from the default scene in the given GLB file. - and returns them separately. - - Args: - path: path to read from - path_manager: PathManager object for interpreting the path - include_textures: whether to load textures - - Returns: - List of (name, mesh) pairs, where the name is the optional name property - from the GLB file, or None if it is absent, and the mesh is a Meshes - object containing one mesh. - """ - with _open_file(path, path_manager, "rb") as f: - loader = _GLTFLoader(cast(BinaryIO, f)) - names_meshes_list = loader.load(include_textures=include_textures) - return names_meshes_list - - -class _GLTFWriter: - def __init__(self, data: Meshes, buffer_stream: BinaryIO) -> None: - self._json_data = defaultdict(list) - self.mesh = data - self.buffer_stream = buffer_stream - - # initialize json with one scene and one node - scene_index = 0 - # pyre-fixme[6]: Incompatible parameter type - self._json_data["scene"] = scene_index - self._json_data["scenes"].append({"nodes": [scene_index]}) - self._json_data["asset"] = {"version": "2.0"} - node = {"name": "Node", "mesh": 0} - self._json_data["nodes"].append(node) - - # mesh primitives - meshes = defaultdict(list) - # pyre-fixme[6]: Incompatible parameter type - meshes["name"] = "Node-Mesh" - if isinstance(self.mesh.textures, TexturesVertex): - primitives = { - "attributes": {"POSITION": 0, "COLOR_0": 2}, - "indices": 1, - "mode": _PrimitiveMode.TRIANGLES, - } - elif isinstance(self.mesh.textures, TexturesUV): - primitives = { - "attributes": {"POSITION": 0, "TEXCOORD_0": 2}, - "indices": 1, - "mode": _PrimitiveMode.TRIANGLES, - "material": 0, - } - else: - primitives = { - "attributes": {"POSITION": 0}, - "indices": 1, - "mode": _PrimitiveMode.TRIANGLES, - } - - meshes["primitives"].append(primitives) - self._json_data["meshes"].append(meshes) - - # default material - material = { - "name": "material_1", - "pbrMetallicRoughness": { - "baseColorTexture": {"index": 0}, - "baseColorFactor": [1, 1, 1, 1], - "metallicFactor": 0, - "roughnessFactor": 0.99, - }, - "emissiveFactor": [0, 0, 0], - "alphaMode": "OPAQUE", - } - self._json_data["materials"].append(material) - - # default sampler - sampler = {"magFilter": 9729, "minFilter": 9986, "wrapS": 10497, "wrapT": 10497} - self._json_data["samplers"].append(sampler) - - # default textures - texture = {"sampler": 0, "source": 0} - self._json_data["textures"].append(texture) - - def _write_accessor_json(self, key: str) -> Tuple[int, np.ndarray]: - name = "Node-Mesh_%s" % key - byte_offset = 0 - if key == "positions": - data = self.mesh.verts_packed().cpu().numpy() - component_type = _ComponentType.FLOAT - element_type = "VEC3" - buffer_view = 0 - element_min = list(map(float, np.min(data, axis=0))) - element_max = list(map(float, np.max(data, axis=0))) - byte_per_element = 3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]] - elif key == "texcoords": - component_type = _ComponentType.FLOAT - data = self.mesh.textures.verts_uvs_list()[0].cpu().numpy() - data[:, 1] = 1 - data[:, -1] # flip y tex-coordinate - element_type = "VEC2" - buffer_view = 2 - element_min = list(map(float, np.min(data, axis=0))) - element_max = list(map(float, np.max(data, axis=0))) - byte_per_element = 2 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]] - elif key == "texvertices": - component_type = _ComponentType.FLOAT - data = self.mesh.textures.verts_features_list()[0].cpu().numpy() - element_type = "VEC3" - buffer_view = 2 - element_min = list(map(float, np.min(data, axis=0))) - element_max = list(map(float, np.max(data, axis=0))) - byte_per_element = 3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]] - elif key == "indices": - component_type = _ComponentType.UNSIGNED_SHORT - data = ( - self.mesh.faces_packed() - .cpu() - .numpy() - .astype(_ITEM_TYPES[component_type]) - ) - element_type = "SCALAR" - buffer_view = 1 - element_min = list(map(int, np.min(data, keepdims=True))) - element_max = list(map(int, np.max(data, keepdims=True))) - byte_per_element = ( - 3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.UNSIGNED_SHORT]] - ) - else: - raise NotImplementedError( - "invalid key accessor, should be one of positions, indices or texcoords" - ) - - count = int(data.shape[0]) - byte_length = count * byte_per_element - accessor_json = { - "name": name, - "componentType": component_type, - "type": element_type, - "bufferView": buffer_view, - "byteOffset": byte_offset, - "min": element_min, - "max": element_max, - "count": count * 3 if key == "indices" else count, - } - self._json_data["accessors"].append(accessor_json) - return (byte_length, data) - - def _write_bufferview(self, key: str, **kwargs): - if key not in ["positions", "texcoords", "texvertices", "indices"]: - raise ValueError( - "key must be one of positions, texcoords, texvertices or indices" - ) - - bufferview = { - "name": "bufferView_%s" % key, - "buffer": 0, - } - target = _TargetType.ARRAY_BUFFER - if key == "positions": - byte_per_element = 3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]] - bufferview["byteStride"] = int(byte_per_element) - elif key == "texcoords": - byte_per_element = 2 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]] - target = _TargetType.ARRAY_BUFFER - bufferview["byteStride"] = int(byte_per_element) - elif key == "texvertices": - byte_per_element = 3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.FLOAT]] - target = _TargetType.ELEMENT_ARRAY_BUFFER - bufferview["byteStride"] = int(byte_per_element) - elif key == "indices": - byte_per_element = ( - 3 * _DTYPE_BYTES[_ITEM_TYPES[_ComponentType.UNSIGNED_SHORT]] - ) - target = _TargetType.ELEMENT_ARRAY_BUFFER - - bufferview["target"] = target - bufferview["byteOffset"] = kwargs.get("offset") - bufferview["byteLength"] = kwargs.get("byte_length") - self._json_data["bufferViews"].append(bufferview) - - def _write_image_buffer(self, **kwargs) -> Tuple[int, bytes]: - image_np = self.mesh.textures.maps_list()[0].cpu().numpy() - image_array = (image_np * 255.0).astype(np.uint8) - im = Image.fromarray(image_array) - with BytesIO() as f: - im.save(f, format="PNG") - image_data = f.getvalue() - - image_data_byte_length = len(image_data) - bufferview_image = { - "buffer": 0, - } - bufferview_image["byteOffset"] = kwargs.get("offset") - bufferview_image["byteLength"] = image_data_byte_length - self._json_data["bufferViews"].append(bufferview_image) - - image = {"name": "texture", "mimeType": "image/png", "bufferView": 3} - self._json_data["images"].append(image) - return (image_data_byte_length, image_data) - - def save(self): - # check validity of mesh - if self.mesh.verts_packed() is None or self.mesh.faces_packed() is None: - raise ValueError("invalid mesh to save, verts or face indices are empty") - - # accessors for positions, texture uvs and face indices - pos_byte, pos_data = self._write_accessor_json("positions") - idx_byte, idx_data = self._write_accessor_json("indices") - include_textures = False - if self.mesh.textures is not None: - if hasattr(self.mesh.textures, "verts_features_list"): - tex_byte, tex_data = self._write_accessor_json("texvertices") - include_textures = True - texcoords = False - elif self.mesh.textures.verts_uvs_list()[0] is not None: - tex_byte, tex_data = self._write_accessor_json("texcoords") - include_textures = True - texcoords = True - - # bufferViews for positions, texture coords and indices - byte_offset = 0 - self._write_bufferview("positions", byte_length=pos_byte, offset=byte_offset) - byte_offset += pos_byte - - self._write_bufferview("indices", byte_length=idx_byte, offset=byte_offset) - byte_offset += idx_byte - - if include_textures: - if texcoords: - self._write_bufferview( - "texcoords", byte_length=tex_byte, offset=byte_offset - ) - else: - self._write_bufferview( - "texvertices", byte_length=tex_byte, offset=byte_offset - ) - byte_offset += tex_byte - - # image bufferView - include_image = False - if self.mesh.textures is not None and hasattr(self.mesh.textures, "maps_list"): - include_image = True - image_byte, image_data = self._write_image_buffer(offset=byte_offset) - byte_offset += image_byte - - # buffers - self._json_data["buffers"].append({"byteLength": int(byte_offset)}) - - # organize into a glb - json_bytes = bytes(json.dumps(self._json_data, cls=OurEncoder), "utf-8") - json_length = len(json_bytes) - - # write header - version = 2 - total_header_length = 28 # (file header = 12) + 2 * (chunk header = 8) - file_length = json_length + byte_offset + total_header_length - header = struct.pack(" None: - self.known_suffixes = (".glb",) - - def read( - self, - path: PathOrStr, - include_textures: bool, - device, - path_manager: PathManager, - **kwargs, - ) -> Optional[Meshes]: - if not endswith(path, self.known_suffixes): - return None - - names_meshes_list = load_meshes( - path=path, - path_manager=path_manager, - include_textures=include_textures, - ) - - meshes_list = [mesh for name, mesh in names_meshes_list] - mesh = join_meshes_as_scene(meshes_list) - return mesh.to(device) - - def save( - self, - data: Meshes, - path: PathOrStr, - path_manager: PathManager, - binary: Optional[bool], - **kwargs, - ) -> bool: - """ - Writes all the meshes from the default scene to GLB file. - - Args: - data: meshes to save - path: path of the GLB file to write into - path_manager: PathManager object for interpreting the path - - Return True if saving succeeds and False otherwise - """ - - if not endswith(path, self.known_suffixes): - return False - - with _open_file(path, path_manager, "wb") as f: - writer = _GLTFWriter(data, cast(BinaryIO, f)) - writer.save() - return True diff --git a/pytorch3d/pytorch3d/io/mtl_io.py b/pytorch3d/pytorch3d/io/mtl_io.py deleted file mode 100644 index 166f98a8ed55707ecb22a54a05c8be85031d7487..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/io/mtl_io.py +++ /dev/null @@ -1,535 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -"""This module implements utility functions for loading .mtl files and textures.""" -import os -import warnings -from typing import Dict, List, Optional, Tuple - -import numpy as np -import torch -import torch.nn.functional as F -from iopath.common.file_io import PathManager -from pytorch3d.common.compat import meshgrid_ij -from pytorch3d.common.datatypes import Device -from pytorch3d.io.utils import _open_file, _read_image - - -def make_mesh_texture_atlas( - material_properties: Dict, - texture_images: Dict, - face_material_names, - faces_uvs: torch.Tensor, - verts_uvs: torch.Tensor, - texture_size: int, - texture_wrap: Optional[str], -) -> torch.Tensor: - """ - Given properties for materials defined in the .mtl file, and the face texture uv - coordinates, construct an (F, R, R, 3) texture atlas where R is the texture_size - and F is the number of faces in the mesh. - - Args: - material_properties: dict of properties for each material. If a material - does not have any properties it will have an empty dict. - texture_images: dict of material names and texture images - face_material_names: numpy array of the material name corresponding to each - face. Faces which don't have an associated material will be an empty string. - For these faces, a uniform white texture is assigned. - faces_uvs: LongTensor of shape (F, 3,) giving the index into the verts_uvs for - each face in the mesh. - verts_uvs: FloatTensor of shape (V, 2) giving the uv coordinates for each vertex. - texture_size: the resolution of the per face texture map returned by this function. - Each face will have a texture map of shape (texture_size, texture_size, 3). - texture_wrap: string, one of ["repeat", "clamp", None] - If `texture_wrap="repeat"` for uv values outside the range [0, 1] the integer part - is ignored and a repeating pattern is formed. - If `texture_wrap="clamp"` the values are clamped to the range [0, 1]. - If None, do nothing. - - Returns: - atlas: FloatTensor of shape (F, texture_size, texture_size, 3) giving the per - face texture map. - """ - # Create an R x R texture map per face in the mesh - R = texture_size - F = faces_uvs.shape[0] - - # Initialize the per face texture map to a white color. - # TODO: allow customization of this base color? - atlas = torch.ones(size=(F, R, R, 3), dtype=torch.float32, device=faces_uvs.device) - - # Check for empty materials. - if not material_properties and not texture_images: - return atlas - - # Iterate through the material properties - not - # all materials have texture images so this is - # done first separately to the texture interpolation. - for material_name, props in material_properties.items(): - # Bool to indicate which faces use this texture map. - faces_material_ind = torch.from_numpy(face_material_names == material_name).to( - faces_uvs.device - ) - if faces_material_ind.sum() > 0: - # For these faces, update the base color to the - # diffuse material color. - if "diffuse_color" not in props: - continue - atlas[faces_material_ind, ...] = props["diffuse_color"][None, :] - - # If there are vertex texture coordinates, create an (F, 3, 2) - # tensor of the vertex textures per face. - faces_verts_uvs = verts_uvs[faces_uvs] if len(verts_uvs) > 0 else None - - # Some meshes only have material properties and no texture image. - # In this case, return the atlas here. - if faces_verts_uvs is None: - return atlas - - if texture_wrap == "repeat": - # If texture uv coordinates are outside the range [0, 1] follow - # the convention GL_REPEAT in OpenGL i.e the integer part of the coordinate - # will be ignored and a repeating pattern is formed. - # Shapenet data uses this format see: - # https://shapenet.org/qaforum/index.php?qa=15&qa_1=why-is-the-texture-coordinate-in-the-obj-file-not-in-the-range # noqa: B950 - if (faces_verts_uvs > 1).any() or (faces_verts_uvs < 0).any(): - msg = "Texture UV coordinates outside the range [0, 1]. \ - The integer part will be ignored to form a repeating pattern." - warnings.warn(msg) - faces_verts_uvs = faces_verts_uvs % 1 - elif texture_wrap == "clamp": - # Clamp uv coordinates to the [0, 1] range. - faces_verts_uvs = faces_verts_uvs.clamp(0.0, 1.0) - - # Iterate through the materials used in this mesh. Update the - # texture atlas for the faces which use this material. - # Faces without texture are white. - for material_name, image in list(texture_images.items()): - # Only use the RGB colors - if image.shape[2] == 4: - image = image[:, :, :3] - - # Reverse the image y direction - image = torch.flip(image, [0]).type_as(faces_verts_uvs) - - # Bool to indicate which faces use this texture map. - faces_material_ind = torch.from_numpy(face_material_names == material_name).to( - faces_verts_uvs.device - ) - - # Find the subset of faces which use this texture with this texture image - uvs_subset = faces_verts_uvs[faces_material_ind, :, :] - - # Update the texture atlas for the faces which use this texture. - # TODO: should the texture map values be multiplied - # by the diffuse material color (i.e. use *= as the atlas has - # been initialized to the diffuse color)?. This is - # not being done in SoftRas. - atlas[faces_material_ind, :, :] = make_material_atlas(image, uvs_subset, R) - - return atlas - - -def make_material_atlas( - image: torch.Tensor, faces_verts_uvs: torch.Tensor, texture_size: int -) -> torch.Tensor: - r""" - Given a single texture image and the uv coordinates for all the - face vertices, create a square texture map per face using - the formulation from [1]. - - For a triangle with vertices (v0, v1, v2) we can create a barycentric coordinate system - with the x axis being the vector (v0 - v2) and the y axis being the vector (v1 - v2). - The barycentric coordinates range from [0, 1] in the +x and +y direction so this creates - a triangular texture space with vertices at (0, 1), (0, 0) and (1, 0). - - The per face texture map is of shape (texture_size, texture_size, 3) - which is a square. To map a triangular texture to a square grid, each - triangle is parametrized as follows (e.g. R = texture_size = 3): - - The triangle texture is first divided into RxR = 9 subtriangles which each - map to one grid cell. The numbers in the grid cells and triangles show the mapping. - - ..code-block::python - - Triangular Texture Space: - - 1 - |\ - |6 \ - |____\ - |\ 7 |\ - |3 \ |4 \ - |____\|____\ - |\ 8 |\ 5 |\ - |0 \ |1 \ |2 \ - |____\|____\|____\ - 0 1 - - Square per face texture map: - - R ____________________ - | | | | - | 6 | 7 | 8 | - |______|______|______| - | | | | - | 3 | 4 | 5 | - |______|______|______| - | | | | - | 0 | 1 | 2 | - |______|______|______| - 0 R - - - The barycentric coordinates of each grid cell are calculated using the - xy coordinates: - - ..code-block::python - - The cartesian coordinates are: - - Grid 1: - - R ____________________ - | | | | - | 20 | 21 | 22 | - |______|______|______| - | | | | - | 10 | 11 | 12 | - |______|______|______| - | | | | - | 00 | 01 | 02 | - |______|______|______| - 0 R - - where 02 means y = 0, x = 2 - - Now consider this subset of the triangle which corresponds to - grid cells 0 and 8: - - ..code-block::python - - 1/R ________ - |\ 8 | - | \ | - | 0 \ | - |_______\| - 0 1/R - - The centroids of the triangles are: - 0: (1/3, 1/3) * 1/R - 8: (2/3, 2/3) * 1/R - - For each grid cell we can now calculate the centroid `(c_y, c_x)` - of the corresponding texture triangle: - - if `(x + y) < R`, then offset the centroid of - triangle 0 by `(y, x) * (1/R)` - - if `(x + y) > R`, then offset the centroid of - triangle 8 by `((R-1-y), (R-1-x)) * (1/R)`. - - This is equivalent to updating the portion of Grid 1 - above the diagonal, replacing `(y, x)` with `((R-1-y), (R-1-x))`: - - ..code-block::python - - R _____________________ - | | | | - | 20 | 01 | 00 | - |______|______|______| - | | | | - | 10 | 11 | 10 | - |______|______|______| - | | | | - | 00 | 01 | 02 | - |______|______|______| - 0 R - - The barycentric coordinates (w0, w1, w2) are then given by: - - ..code-block::python - - w0 = c_x - w1 = c_y - w2 = 1- w0 - w1 - - Args: - image: FloatTensor of shape (H, W, 3) - faces_verts_uvs: uv coordinates for each vertex in each face (F, 3, 2) - texture_size: int - - Returns: - atlas: a FloatTensor of shape (F, texture_size, texture_size, 3) giving a - per face texture map. - - [1] Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based - 3D Reasoning', ICCV 2019 - """ - R = texture_size - device = faces_verts_uvs.device - rng = torch.arange(R, device=device) - - # Meshgrid returns (row, column) i.e (Y, X) - # Change order to (X, Y) to make the grid. - Y, X = meshgrid_ij(rng, rng) - # pyre-fixme[28]: Unexpected keyword argument `axis`. - grid = torch.stack([X, Y], axis=-1) # (R, R, 2) - - # Grid cells below the diagonal: x + y < R. - below_diag = grid.sum(-1) < R - - # map a [0, R] grid -> to a [0, 1] barycentric coordinates of - # the texture triangle centroids. - bary = torch.zeros((R, R, 3), device=device) # (R, R, 3) - slc = torch.arange(2, device=device)[:, None] - # w0, w1 - bary[below_diag, slc] = ((grid[below_diag] + 1.0 / 3.0) / R).T - # w0, w1 for above diagonal grid cells. - bary[~below_diag, slc] = (((R - 1.0 - grid[~below_diag]) + 2.0 / 3.0) / R).T - # w2 = 1. - w0 - w1 - bary[..., -1] = 1 - bary[..., :2].sum(dim=-1) - - # Calculate the uv position in the image for each pixel - # in the per face texture map - # (F, 1, 1, 3, 2) * (R, R, 3, 1) -> (F, R, R, 3, 2) -> (F, R, R, 2) - uv_pos = (faces_verts_uvs[:, None, None] * bary[..., None]).sum(-2) - - # bi-linearly interpolate the textures from the images - # using the uv coordinates given by uv_pos. - textures = _bilinear_interpolation_grid_sample(image, uv_pos) - - return textures - - -def _bilinear_interpolation_vectorized( - image: torch.Tensor, grid: torch.Tensor -) -> torch.Tensor: - """ - Bi linearly interpolate the image using the uv positions in the flow-field - grid (following the naming conventions for torch.nn.functional.grid_sample). - - This implementation uses the same steps as in the SoftRasterizer CUDA kernel - for loading textures. We are keeping it for reference to make it easy to - compare if required. - - However it doesn't properly handle the out of bound values in the same way as - the grid_sample function does with the padding_mode argument. - This vectorized version requires less memory than - _bilinear_interpolation_grid_sample but is slightly slower. - - Args: - image: FloatTensor of shape (H, W, D) a single image/input tensor with D - channels. - grid: FloatTensor of shape (N, R, R, 2) giving the pixel locations of the - points at which to sample a value in the image. The grid values must - be in the range [0, 1]. u is the x direction and v is the y direction. - - Returns: - out: FloatTensor of shape (N, H, W, D) giving the interpolated - D dimensional value from image at each of the pixel locations in grid. - - """ - H, W, _ = image.shape - # Convert [0, 1] to the range [0, W-1] and [0, H-1] - grid = grid * torch.tensor([W - 1, H - 1]).type_as(grid) - weight_1 = grid - grid.int() - weight_0 = 1.0 - weight_1 - - grid_x, grid_y = grid.unbind(-1) - y0 = grid_y.to(torch.int64) - y1 = (grid_y + 1).to(torch.int64) - x0 = grid_x.to(torch.int64) - x1 = x0 + 1 - - weight_x0, weight_y0 = weight_0.unbind(-1) - weight_x1, weight_y1 = weight_1.unbind(-1) - - # Bi-linear interpolation - # griditions = [[y, x], [(y+1), x] - # [y, (x+1)], [(y+1), (x+1)]] - # weights = [[wx0*wy0, wx0*wy1], - # [wx1*wy0, wx1*wy1]] - out = ( - image[y0, x0] * (weight_x0 * weight_y0)[..., None] - + image[y1, x0] * (weight_x0 * weight_y1)[..., None] - + image[y0, x1] * (weight_x1 * weight_y0)[..., None] - + image[y1, x1] * (weight_x1 * weight_y1)[..., None] - ) - - return out - - -def _bilinear_interpolation_grid_sample( - image: torch.Tensor, grid: torch.Tensor -) -> torch.Tensor: - """ - Bi linearly interpolate the image using the uv positions in the flow-field - grid (following the conventions for torch.nn.functional.grid_sample). - - This implementation is faster than _bilinear_interpolation_vectorized but - requires more memory so can cause OOMs. If speed is an issue try this function - instead. - - Args: - image: FloatTensor of shape (H, W, D) a single image/input tensor with D - channels. - grid: FloatTensor of shape (N, R, R, 2) giving the pixel locations of the - points at which to sample a value in the image. The grid values must - be in the range [0, 1]. u is the x direction and v is the y direction. - - Returns: - out: FloatTensor of shape (N, H, W, D) giving the interpolated - D dimensional value from image at each of the pixel locations in grid. - """ - - N = grid.shape[0] - # convert [0, 1] to the range [-1, 1] expected by grid_sample. - grid = grid * 2.0 - 1.0 - image = image.permute(2, 0, 1)[None, ...].expand(N, -1, -1, -1) # (N, 3, H, W) - # Align_corners has to be set to True to match the output of the SoftRas - # cuda kernel for bilinear sampling. - out = F.grid_sample(image, grid, mode="bilinear", align_corners=True) - return out.permute(0, 2, 3, 1) - - -MaterialProperties = Dict[str, Dict[str, torch.Tensor]] -TextureFiles = Dict[str, str] -TextureImages = Dict[str, torch.Tensor] - - -def _parse_mtl( - f: str, path_manager: PathManager, device: Device = "cpu" -) -> Tuple[MaterialProperties, TextureFiles]: - material_properties = {} - texture_files = {} - material_name = "" - - with _open_file(f, path_manager, "r") as f: - for line in f: - tokens = line.strip().split() - if not tokens: - continue - if tokens[0] == "newmtl": - material_name = tokens[1] - material_properties[material_name] = {} - elif tokens[0] == "map_Kd": - # Diffuse texture map - # Account for the case where filenames might have spaces - filename = line.strip()[7:] - texture_files[material_name] = filename - elif tokens[0] == "Kd": - # RGB diffuse reflectivity - kd = np.array(tokens[1:4]).astype(np.float32) - kd = torch.from_numpy(kd).to(device) - material_properties[material_name]["diffuse_color"] = kd - elif tokens[0] == "Ka": - # RGB ambient reflectivity - ka = np.array(tokens[1:4]).astype(np.float32) - ka = torch.from_numpy(ka).to(device) - material_properties[material_name]["ambient_color"] = ka - elif tokens[0] == "Ks": - # RGB specular reflectivity - ks = np.array(tokens[1:4]).astype(np.float32) - ks = torch.from_numpy(ks).to(device) - material_properties[material_name]["specular_color"] = ks - elif tokens[0] == "Ns": - # Specular exponent - ns = np.array(tokens[1:4]).astype(np.float32) - ns = torch.from_numpy(ns).to(device) - material_properties[material_name]["shininess"] = ns - - return material_properties, texture_files - - -def _load_texture_images( - material_names: List[str], - data_dir: str, - material_properties: MaterialProperties, - texture_files: TextureFiles, - path_manager: PathManager, -) -> Tuple[MaterialProperties, TextureImages]: - final_material_properties = {} - texture_images = {} - - used_material_names = list(material_names) - if not used_material_names and material_properties: - if len(material_properties) > 1: - raise ValueError( - "Multiple materials but no usemtl declarations in the obj file" - ) - # No materials were specified in obj file and only one is in the - # specified .mtl file, so we use it. - used_material_names.append(next(iter(material_properties.keys()))) - - # Only keep the materials referenced in the obj. - for material_name in used_material_names: - if material_name in texture_files: - # Load the texture image. - path = os.path.join(data_dir, texture_files[material_name]) - if path_manager.exists(path): - image = ( - _read_image(path, path_manager=path_manager, format="RGB") / 255.0 - ) - image = torch.from_numpy(image) - texture_images[material_name] = image - else: - msg = f"Texture file does not exist: {path}" - warnings.warn(msg) - - if material_name in material_properties: - final_material_properties[material_name] = material_properties[ - material_name - ] - - return final_material_properties, texture_images - - -def load_mtl( - f: str, - *, - material_names: List[str], - data_dir: str, - device: Device = "cpu", - path_manager: PathManager, -) -> Tuple[MaterialProperties, TextureImages]: - """ - Load texture images and material reflectivity values for ambient, diffuse - and specular light (Ka, Kd, Ks, Ns). - - Args: - f: path to the material information. - material_names: a list of the material names found in the .obj file. - data_dir: the directory where the material texture files are located. - device: Device (as str or torch.tensor) on which to return the new tensors. - path_manager: PathManager for interpreting both f and material_names. - - Returns: - material_properties: dict of properties for each material. If a material - does not have any properties it will have an empty dict. - { - material_name_1: { - "ambient_color": tensor of shape (1, 3), - "diffuse_color": tensor of shape (1, 3), - "specular_color": tensor of shape (1, 3), - "shininess": tensor of shape (1) - }, - material_name_2: {}, - ... - } - texture_images: dict of material names and texture images - { - material_name_1: (H, W, 3) image, - ... - } - """ - material_properties, texture_files = _parse_mtl(f, path_manager, device) - return _load_texture_images( - material_names, - data_dir, - material_properties, - texture_files, - path_manager=path_manager, - ) diff --git a/pytorch3d/pytorch3d/io/obj_io.py b/pytorch3d/pytorch3d/io/obj_io.py deleted file mode 100644 index 834c51edf53cdbc2a40f1e08326afd85965c38c7..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/io/obj_io.py +++ /dev/null @@ -1,939 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -"""This module implements utility functions for loading and saving meshes.""" -import os -import warnings -from collections import namedtuple -from pathlib import Path -from typing import List, Optional - -import numpy as np -import torch -from iopath.common.file_io import PathManager -from PIL import Image -from pytorch3d.common.datatypes import Device -from pytorch3d.io.mtl_io import load_mtl, make_mesh_texture_atlas -from pytorch3d.io.utils import _check_faces_indices, _make_tensor, _open_file, PathOrStr -from pytorch3d.renderer import TexturesAtlas, TexturesUV -from pytorch3d.structures import join_meshes_as_batch, Meshes - -from .pluggable_formats import endswith, MeshFormatInterpreter - - -# Faces & Aux type returned from load_obj function. -_Faces = namedtuple("Faces", "verts_idx normals_idx textures_idx materials_idx") -_Aux = namedtuple( - "Properties", "normals verts_uvs material_colors texture_images texture_atlas" -) - - -def _format_faces_indices(faces_indices, max_index: int, device, pad_value=None): - """ - Format indices and check for invalid values. Indices can refer to - values in one of the face properties: vertices, textures or normals. - See comments of the load_obj function for more details. - - Args: - faces_indices: List of ints of indices. - max_index: Max index for the face property. - pad_value: if any of the face_indices are padded, specify - the value of the padding (e.g. -1). This is only used - for texture indices indices where there might - not be texture information for all the faces. - - Returns: - faces_indices: List of ints of indices. - - Raises: - ValueError if indices are not in a valid range. - """ - faces_indices = _make_tensor( - faces_indices, cols=3, dtype=torch.int64, device=device - ) - - if pad_value is not None: - mask = faces_indices.eq(pad_value).all(dim=-1) - - # Change to 0 based indexing. - faces_indices[(faces_indices > 0)] -= 1 - - # Negative indexing counts from the end. - faces_indices[(faces_indices < 0)] += max_index - - if pad_value is not None: - # pyre-fixme[61]: `mask` is undefined, or not always defined. - faces_indices[mask] = pad_value - - return _check_faces_indices(faces_indices, max_index, pad_value) - - -def load_obj( - f, - load_textures: bool = True, - create_texture_atlas: bool = False, - texture_atlas_size: int = 4, - texture_wrap: Optional[str] = "repeat", - device: Device = "cpu", - path_manager: Optional[PathManager] = None, -): - """ - Load a mesh from a .obj file and optionally textures from a .mtl file. - Currently this handles verts, faces, vertex texture uv coordinates, normals, - texture images and material reflectivity values. - - Note .obj files are 1-indexed. The tensors returned from this function - are 0-indexed. OBJ spec reference: http://www.martinreddy.net/gfx/3d/OBJ.spec - - Example .obj file format: - :: - # this is a comment - v 1.000000 -1.000000 -1.000000 - v 1.000000 -1.000000 1.000000 - v -1.000000 -1.000000 1.000000 - v -1.000000 -1.000000 -1.000000 - v 1.000000 1.000000 -1.000000 - vt 0.748573 0.750412 - vt 0.749279 0.501284 - vt 0.999110 0.501077 - vt 0.999455 0.750380 - vn 0.000000 0.000000 -1.000000 - vn -1.000000 -0.000000 -0.000000 - vn -0.000000 -0.000000 1.000000 - f 5/2/1 1/2/1 4/3/1 - f 5/1/1 4/3/1 2/4/1 - - The first character of the line denotes the type of input: - :: - - v is a vertex - - vt is the texture coordinate of one vertex - - vn is the normal of one vertex - - f is a face - - Faces are interpreted as follows: - :: - 5/2/1 describes the first vertex of the first triangle - - 5: index of vertex [1.000000 1.000000 -1.000000] - - 2: index of texture coordinate [0.749279 0.501284] - - 1: index of normal [0.000000 0.000000 -1.000000] - - If there are faces with more than 3 vertices - they are subdivided into triangles. Polygonal faces are assumed to have - vertices ordered counter-clockwise so the (right-handed) normal points - out of the screen e.g. a proper rectangular face would be specified like this: - :: - 0_________1 - | | - | | - 3 ________2 - - The face would be split into two triangles: (0, 2, 1) and (0, 3, 2), - both of which are also oriented counter-clockwise and have normals - pointing out of the screen. - - Args: - f: A file-like object (with methods read, readline, tell, and seek), - a pathlib path or a string containing a file name. - load_textures: Boolean indicating whether material files are loaded - create_texture_atlas: Bool, If True a per face texture map is created and - a tensor `texture_atlas` is also returned in `aux`. - texture_atlas_size: Int specifying the resolution of the texture map per face - when `create_texture_atlas=True`. A (texture_size, texture_size, 3) - map is created per face. - texture_wrap: string, one of ["repeat", "clamp"]. This applies when computing - the texture atlas. - If `texture_mode="repeat"`, for uv values outside the range [0, 1] the integer part - is ignored and a repeating pattern is formed. - If `texture_mode="clamp"` the values are clamped to the range [0, 1]. - If None, then there is no transformation of the texture values. - device: Device (as str or torch.device) on which to return the new tensors. - path_manager: optionally a PathManager object to interpret paths. - - Returns: - 6-element tuple containing - - - **verts**: FloatTensor of shape (V, 3). - - **faces**: NamedTuple with fields: - - verts_idx: LongTensor of vertex indices, shape (F, 3). - - normals_idx: (optional) LongTensor of normal indices, shape (F, 3). - - textures_idx: (optional) LongTensor of texture indices, shape (F, 3). - This can be used to index into verts_uvs. - - materials_idx: (optional) List of indices indicating which - material the texture is derived from for each face. - If there is no material for a face, the index is -1. - This can be used to retrieve the corresponding values - in material_colors/texture_images after they have been - converted to tensors or Materials/Textures data - structures - see textures.py and materials.py for - more info. - - **aux**: NamedTuple with fields: - - normals: FloatTensor of shape (N, 3) - - verts_uvs: FloatTensor of shape (T, 2), giving the uv coordinate per - vertex. If a vertex is shared between two faces, it can have - a different uv value for each instance. Therefore it is - possible that the number of verts_uvs is greater than - num verts i.e. T > V. - vertex. - - material_colors: if `load_textures=True` and the material has associated - properties this will be a dict of material names and properties of the form: - - .. code-block:: python - - { - material_name_1: { - "ambient_color": tensor of shape (1, 3), - "diffuse_color": tensor of shape (1, 3), - "specular_color": tensor of shape (1, 3), - "shininess": tensor of shape (1) - }, - material_name_2: {}, - ... - } - - If a material does not have any properties it will have an - empty dict. If `load_textures=False`, `material_colors` will None. - - - texture_images: if `load_textures=True` and the material has a texture map, - this will be a dict of the form: - - .. code-block:: python - - { - material_name_1: (H, W, 3) image, - ... - } - If `load_textures=False`, `texture_images` will None. - - texture_atlas: if `load_textures=True` and `create_texture_atlas=True`, - this will be a FloatTensor of the form: (F, texture_size, textures_size, 3) - If the material does not have a texture map, then all faces - will have a uniform white texture. Otherwise `texture_atlas` will be - None. - """ - data_dir = "./" - if isinstance(f, (str, bytes, Path)): - # pyre-fixme[6]: For 1st argument expected `PathLike[Variable[AnyStr <: - # [str, bytes]]]` but got `Union[Path, bytes, str]`. - data_dir = os.path.dirname(f) - if path_manager is None: - path_manager = PathManager() - with _open_file(f, path_manager, "r") as f: - return _load_obj( - f, - data_dir=data_dir, - load_textures=load_textures, - create_texture_atlas=create_texture_atlas, - texture_atlas_size=texture_atlas_size, - texture_wrap=texture_wrap, - path_manager=path_manager, - device=device, - ) - - -def load_objs_as_meshes( - files: list, - device: Optional[Device] = None, - load_textures: bool = True, - create_texture_atlas: bool = False, - texture_atlas_size: int = 4, - texture_wrap: Optional[str] = "repeat", - path_manager: Optional[PathManager] = None, -): - """ - Load meshes from a list of .obj files using the load_obj function, and - return them as a Meshes object. This only works for meshes which have a - single texture image for the whole mesh. See the load_obj function for more - details. material_colors and normals are not stored. - - Args: - files: A list of file-like objects (with methods read, readline, tell, - and seek), pathlib paths or strings containing file names. - device: Desired device of returned Meshes. Default: - uses the current device for the default tensor type. - load_textures: Boolean indicating whether material files are loaded - create_texture_atlas, texture_atlas_size, texture_wrap: as for load_obj. - path_manager: optionally a PathManager object to interpret paths. - - Returns: - New Meshes object. - """ - mesh_list = [] - for f_obj in files: - verts, faces, aux = load_obj( - f_obj, - load_textures=load_textures, - create_texture_atlas=create_texture_atlas, - texture_atlas_size=texture_atlas_size, - texture_wrap=texture_wrap, - path_manager=path_manager, - ) - tex = None - if create_texture_atlas: - # TexturesAtlas type - tex = TexturesAtlas(atlas=[aux.texture_atlas.to(device)]) - else: - # TexturesUV type - tex_maps = aux.texture_images - if tex_maps is not None and len(tex_maps) > 0: - verts_uvs = aux.verts_uvs.to(device) # (V, 2) - faces_uvs = faces.textures_idx.to(device) # (F, 3) - image = list(tex_maps.values())[0].to(device)[None] - tex = TexturesUV( - verts_uvs=[verts_uvs], faces_uvs=[faces_uvs], maps=image - ) - - mesh = Meshes( - verts=[verts.to(device)], faces=[faces.verts_idx.to(device)], textures=tex - ) - mesh_list.append(mesh) - if len(mesh_list) == 1: - return mesh_list[0] - return join_meshes_as_batch(mesh_list) - - -class MeshObjFormat(MeshFormatInterpreter): - def __init__(self) -> None: - self.known_suffixes = (".obj",) - - def read( - self, - path: PathOrStr, - include_textures: bool, - device: Device, - path_manager: PathManager, - create_texture_atlas: bool = False, - texture_atlas_size: int = 4, - texture_wrap: Optional[str] = "repeat", - **kwargs, - ) -> Optional[Meshes]: - if not endswith(path, self.known_suffixes): - return None - mesh = load_objs_as_meshes( - files=[path], - device=device, - load_textures=include_textures, - create_texture_atlas=create_texture_atlas, - texture_atlas_size=texture_atlas_size, - texture_wrap=texture_wrap, - path_manager=path_manager, - ) - return mesh - - def save( - self, - data: Meshes, - path: PathOrStr, - path_manager: PathManager, - binary: Optional[bool], - decimal_places: Optional[int] = None, - **kwargs, - ) -> bool: - if not endswith(path, self.known_suffixes): - return False - - verts = data.verts_list()[0] - faces = data.faces_list()[0] - - verts_uvs: Optional[torch.Tensor] = None - faces_uvs: Optional[torch.Tensor] = None - texture_map: Optional[torch.Tensor] = None - - if isinstance(data.textures, TexturesUV): - verts_uvs = data.textures.verts_uvs_padded()[0] - faces_uvs = data.textures.faces_uvs_padded()[0] - texture_map = data.textures.maps_padded()[0] - - save_obj( - f=path, - verts=verts, - faces=faces, - decimal_places=decimal_places, - path_manager=path_manager, - verts_uvs=verts_uvs, - faces_uvs=faces_uvs, - texture_map=texture_map, - ) - return True - - -def _parse_face( - line, - tokens, - material_idx, - faces_verts_idx, - faces_normals_idx, - faces_textures_idx, - faces_materials_idx, -) -> None: - face = tokens[1:] - face_list = [f.split("/") for f in face] - face_verts = [] - face_normals = [] - face_textures = [] - - for vert_props in face_list: - # Vertex index. - face_verts.append(int(vert_props[0])) - if len(vert_props) > 1: - if vert_props[1] != "": - # Texture index is present e.g. f 4/1/1. - face_textures.append(int(vert_props[1])) - if len(vert_props) > 2: - # Normal index present e.g. 4/1/1 or 4//1. - face_normals.append(int(vert_props[2])) - if len(vert_props) > 3: - raise ValueError( - "Face vertices can only have 3 properties. \ - Face vert %s, Line: %s" - % (str(vert_props), str(line)) - ) - - # Triplets must be consistent for all vertices in a face e.g. - # legal statement: f 4/1/1 3/2/1 2/1/1. - # illegal statement: f 4/1/1 3//1 2//1. - # If the face does not have normals or textures indices - # fill with pad value = -1. This will ensure that - # all the face index tensors will have F values where - # F is the number of faces. - if len(face_normals) > 0: - if not (len(face_verts) == len(face_normals)): - raise ValueError( - "Face %s is an illegal statement. \ - Vertex properties are inconsistent. Line: %s" - % (str(face), str(line)) - ) - else: - face_normals = [-1] * len(face_verts) # Fill with -1 - if len(face_textures) > 0: - if not (len(face_verts) == len(face_textures)): - raise ValueError( - "Face %s is an illegal statement. \ - Vertex properties are inconsistent. Line: %s" - % (str(face), str(line)) - ) - else: - face_textures = [-1] * len(face_verts) # Fill with -1 - - # Subdivide faces with more than 3 vertices. - # See comments of the load_obj function for more details. - for i in range(len(face_verts) - 2): - faces_verts_idx.append((face_verts[0], face_verts[i + 1], face_verts[i + 2])) - faces_normals_idx.append( - (face_normals[0], face_normals[i + 1], face_normals[i + 2]) - ) - faces_textures_idx.append( - (face_textures[0], face_textures[i + 1], face_textures[i + 2]) - ) - faces_materials_idx.append(material_idx) - - -def _parse_obj(f, data_dir: str): - """ - Load a mesh from a file-like object. See load_obj function for more details - about the return values. - """ - verts, normals, verts_uvs = [], [], [] - faces_verts_idx, faces_normals_idx, faces_textures_idx = [], [], [] - faces_materials_idx = [] - material_names = [] - mtl_path = None - - lines = [line.strip() for line in f] - - # startswith expects each line to be a string. If the file is read in as - # bytes then first decode to strings. - if lines and isinstance(lines[0], bytes): - lines = [el.decode("utf-8") for el in lines] - - materials_idx = -1 - - for line in lines: - tokens = line.strip().split() - if line.startswith("mtllib"): - if len(tokens) < 2: - raise ValueError("material file name is not specified") - # NOTE: only allow one .mtl file per .obj. - # Definitions for multiple materials can be included - # in this one .mtl file. - mtl_path = line[len(tokens[0]) :].strip() # Take the remainder of the line - mtl_path = os.path.join(data_dir, mtl_path) - elif len(tokens) and tokens[0] == "usemtl": - material_name = tokens[1] - # materials are often repeated for different parts - # of a mesh. - if material_name not in material_names: - material_names.append(material_name) - materials_idx = len(material_names) - 1 - else: - materials_idx = material_names.index(material_name) - elif line.startswith("v "): # Line is a vertex. - vert = [float(x) for x in tokens[1:4]] - if len(vert) != 3: - msg = "Vertex %s does not have 3 values. Line: %s" - raise ValueError(msg % (str(vert), str(line))) - verts.append(vert) - elif line.startswith("vt "): # Line is a texture. - tx = [float(x) for x in tokens[1:3]] - if len(tx) != 2: - raise ValueError( - "Texture %s does not have 2 values. Line: %s" % (str(tx), str(line)) - ) - verts_uvs.append(tx) - elif line.startswith("vn "): # Line is a normal. - norm = [float(x) for x in tokens[1:4]] - if len(norm) != 3: - msg = "Normal %s does not have 3 values. Line: %s" - raise ValueError(msg % (str(norm), str(line))) - normals.append(norm) - elif line.startswith("f "): # Line is a face. - # Update face properties info. - _parse_face( - line, - tokens, - materials_idx, - faces_verts_idx, - faces_normals_idx, - faces_textures_idx, - faces_materials_idx, - ) - - return ( - verts, - normals, - verts_uvs, - faces_verts_idx, - faces_normals_idx, - faces_textures_idx, - faces_materials_idx, - material_names, - mtl_path, - ) - - -def _load_materials( - material_names: List[str], - f: Optional[str], - *, - data_dir: str, - load_textures: bool, - device: Device, - path_manager: PathManager, -): - """ - Load materials and optionally textures from the specified path. - - Args: - material_names: a list of the material names found in the .obj file. - f: path to the material information. - data_dir: the directory where the material texture files are located. - load_textures: whether textures should be loaded. - device: Device (as str or torch.device) on which to return the new tensors. - path_manager: PathManager object to interpret paths. - - Returns: - material_colors: dict of properties for each material. - texture_images: dict of material names and texture images. - """ - if not load_textures: - return None, None - - if f is None: - warnings.warn("No mtl file provided") - return None, None - - if not path_manager.exists(f): - warnings.warn(f"Mtl file does not exist: {f}") - return None, None - - # Texture mode uv wrap - return load_mtl( - f, - material_names=material_names, - data_dir=data_dir, - path_manager=path_manager, - device=device, - ) - - -def _load_obj( - f_obj, - *, - data_dir: str, - load_textures: bool = True, - create_texture_atlas: bool = False, - texture_atlas_size: int = 4, - texture_wrap: Optional[str] = "repeat", - path_manager: PathManager, - device: Device = "cpu", -): - """ - Load a mesh from a file-like object. See load_obj function more details. - Any material files associated with the obj are expected to be in the - directory given by data_dir. - """ - - if texture_wrap is not None and texture_wrap not in ["repeat", "clamp"]: - msg = "texture_wrap must be one of ['repeat', 'clamp'] or None, got %s" - raise ValueError(msg % texture_wrap) - - ( - verts, - normals, - verts_uvs, - faces_verts_idx, - faces_normals_idx, - faces_textures_idx, - faces_materials_idx, - material_names, - mtl_path, - ) = _parse_obj(f_obj, data_dir) - - verts = _make_tensor(verts, cols=3, dtype=torch.float32, device=device) # (V, 3) - normals = _make_tensor( - normals, - cols=3, - dtype=torch.float32, - device=device, - ) # (N, 3) - verts_uvs = _make_tensor( - verts_uvs, - cols=2, - dtype=torch.float32, - device=device, - ) # (T, 2) - - faces_verts_idx = _format_faces_indices( - faces_verts_idx, verts.shape[0], device=device - ) - - # Repeat for normals and textures if present. - if len(faces_normals_idx): - faces_normals_idx = _format_faces_indices( - faces_normals_idx, normals.shape[0], device=device, pad_value=-1 - ) - if len(faces_textures_idx): - faces_textures_idx = _format_faces_indices( - faces_textures_idx, verts_uvs.shape[0], device=device, pad_value=-1 - ) - if len(faces_materials_idx): - faces_materials_idx = torch.tensor( - faces_materials_idx, dtype=torch.int64, device=device - ) - - texture_atlas = None - material_colors, texture_images = _load_materials( - material_names, - mtl_path, - data_dir=data_dir, - load_textures=load_textures, - path_manager=path_manager, - device=device, - ) - - if material_colors and not material_names: - # usemtl was not present but single material was present in the .mtl file - material_names.append(next(iter(material_colors.keys()))) - # replace all -1 by 0 material idx - if torch.is_tensor(faces_materials_idx): - faces_materials_idx.clamp_(min=0) - - if create_texture_atlas: - # Using the images and properties from the - # material file make a per face texture map. - - # Create an array of strings of material names for each face. - # If faces_materials_idx == -1 then that face doesn't have a material. - idx = faces_materials_idx.cpu().numpy() - face_material_names = np.array(material_names)[idx] # (F,) - face_material_names[idx == -1] = "" - - # Construct the atlas. - texture_atlas = make_mesh_texture_atlas( - material_colors, - texture_images, - face_material_names, - faces_textures_idx, - verts_uvs, - texture_atlas_size, - texture_wrap, - ) - - faces = _Faces( - verts_idx=faces_verts_idx, - normals_idx=faces_normals_idx, - textures_idx=faces_textures_idx, - materials_idx=faces_materials_idx, - ) - aux = _Aux( - normals=normals if len(normals) else None, - verts_uvs=verts_uvs if len(verts_uvs) else None, - material_colors=material_colors, - texture_images=texture_images, - texture_atlas=texture_atlas, - ) - return verts, faces, aux - - -def save_obj( - f: PathOrStr, - verts, - faces, - decimal_places: Optional[int] = None, - path_manager: Optional[PathManager] = None, - *, - normals: Optional[torch.Tensor] = None, - faces_normals_idx: Optional[torch.Tensor] = None, - verts_uvs: Optional[torch.Tensor] = None, - faces_uvs: Optional[torch.Tensor] = None, - texture_map: Optional[torch.Tensor] = None, -) -> None: - """ - Save a mesh to an .obj file. - - Args: - f: File (str or path) to which the mesh should be written. - verts: FloatTensor of shape (V, 3) giving vertex coordinates. - faces: LongTensor of shape (F, 3) giving faces. - decimal_places: Number of decimal places for saving. - path_manager: Optional PathManager for interpreting f if - it is a str. - normals: FloatTensor of shape (V, 3) giving normals for faces_normals_idx - to index into. - faces_normals_idx: LongTensor of shape (F, 3) giving the index into - normals for each vertex in the face. - verts_uvs: FloatTensor of shape (V, 2) giving the uv coordinate per vertex. - faces_uvs: LongTensor of shape (F, 3) giving the index into verts_uvs for - each vertex in the face. - texture_map: FloatTensor of shape (H, W, 3) representing the texture map - for the mesh which will be saved as an image. The values are expected - to be in the range [0, 1], - """ - if len(verts) and (verts.dim() != 2 or verts.size(1) != 3): - message = "'verts' should either be empty or of shape (num_verts, 3)." - raise ValueError(message) - - if len(faces) and (faces.dim() != 2 or faces.size(1) != 3): - message = "'faces' should either be empty or of shape (num_faces, 3)." - raise ValueError(message) - - if (normals is None) != (faces_normals_idx is None): - message = "'normals' and 'faces_normals_idx' must both be None or neither." - raise ValueError(message) - - if faces_normals_idx is not None and ( - faces_normals_idx.dim() != 2 or faces_normals_idx.size(1) != 3 - ): - message = ( - "'faces_normals_idx' should either be empty or of shape (num_faces, 3)." - ) - raise ValueError(message) - - if normals is not None and (normals.dim() != 2 or normals.size(1) != 3): - message = "'normals' should either be empty or of shape (num_verts, 3)." - raise ValueError(message) - - if faces_uvs is not None and (faces_uvs.dim() != 2 or faces_uvs.size(1) != 3): - message = "'faces_uvs' should either be empty or of shape (num_faces, 3)." - raise ValueError(message) - - if verts_uvs is not None and (verts_uvs.dim() != 2 or verts_uvs.size(1) != 2): - message = "'verts_uvs' should either be empty or of shape (num_verts, 2)." - raise ValueError(message) - - if texture_map is not None and (texture_map.dim() != 3 or texture_map.size(2) != 3): - message = "'texture_map' should either be empty or of shape (H, W, 3)." - raise ValueError(message) - - if path_manager is None: - path_manager = PathManager() - - save_texture = all(t is not None for t in [faces_uvs, verts_uvs, texture_map]) - output_path = Path(f) - - # Save the .obj file - with _open_file(f, path_manager, "w") as f: - if save_texture: - # Add the header required for the texture info to be loaded correctly - obj_header = "\nmtllib {0}.mtl\nusemtl mesh\n\n".format(output_path.stem) - f.write(obj_header) - _save( - f, - verts, - faces, - decimal_places, - normals=normals, - faces_normals_idx=faces_normals_idx, - verts_uvs=verts_uvs, - faces_uvs=faces_uvs, - save_texture=save_texture, - save_normals=normals is not None, - ) - - # Save the .mtl and .png files associated with the texture - if save_texture: - image_path = output_path.with_suffix(".png") - mtl_path = output_path.with_suffix(".mtl") - if isinstance(f, str): - # Back to str for iopath interpretation. - image_path = str(image_path) - mtl_path = str(mtl_path) - - # Save texture map to output folder - # pyre-fixme[16] # undefined attribute cpu - texture_map = texture_map.detach().cpu() * 255.0 - image = Image.fromarray(texture_map.numpy().astype(np.uint8)) - with _open_file(image_path, path_manager, "wb") as im_f: - image.save(im_f) - - # Create .mtl file with the material name and texture map filename - # TODO: enable material properties to also be saved. - with _open_file(mtl_path, path_manager, "w") as f_mtl: - lines = f"newmtl mesh\n" f"map_Kd {output_path.stem}.png\n" - f_mtl.write(lines) - - -# TODO (nikhilar) Speed up this function. -def _save( - f, - verts, - faces, - decimal_places: Optional[int] = None, - *, - normals: Optional[torch.Tensor] = None, - faces_normals_idx: Optional[torch.Tensor] = None, - verts_uvs: Optional[torch.Tensor] = None, - faces_uvs: Optional[torch.Tensor] = None, - save_texture: bool = False, - save_normals: bool = False, -) -> None: - - if len(verts) and (verts.dim() != 2 or verts.size(1) != 3): - message = "'verts' should either be empty or of shape (num_verts, 3)." - raise ValueError(message) - - if len(faces) and (faces.dim() != 2 or faces.size(1) != 3): - message = "'faces' should either be empty or of shape (num_faces, 3)." - raise ValueError(message) - - if not (len(verts) or len(faces)): - warnings.warn("Empty 'verts' and 'faces' arguments provided") - return - - verts, faces = verts.cpu(), faces.cpu() - - lines = "" - - if decimal_places is None: - float_str = "%f" - else: - float_str = "%" + ".%df" % decimal_places - - if len(verts): - V, D = verts.shape - for i in range(V): - vert = [float_str % verts[i, j] for j in range(D)] - lines += "v %s\n" % " ".join(vert) - - if save_normals: - assert normals is not None - assert faces_normals_idx is not None - lines += _write_normals(normals, faces_normals_idx, float_str) - - if save_texture: - assert faces_uvs is not None - assert verts_uvs is not None - - if faces_uvs is not None and (faces_uvs.dim() != 2 or faces_uvs.size(1) != 3): - message = "'faces_uvs' should either be empty or of shape (num_faces, 3)." - raise ValueError(message) - - if verts_uvs is not None and (verts_uvs.dim() != 2 or verts_uvs.size(1) != 2): - message = "'verts_uvs' should either be empty or of shape (num_verts, 2)." - raise ValueError(message) - - verts_uvs, faces_uvs = verts_uvs.cpu(), faces_uvs.cpu() - - # Save verts uvs after verts - if len(verts_uvs): - uV, uD = verts_uvs.shape - for i in range(uV): - uv = [float_str % verts_uvs[i, j] for j in range(uD)] - lines += "vt %s\n" % " ".join(uv) - - f.write(lines) - - if torch.any(faces >= verts.shape[0]) or torch.any(faces < 0): - warnings.warn("Faces have invalid indices") - - if len(faces): - _write_faces( - f, - faces, - faces_uvs if save_texture else None, - faces_normals_idx if save_normals else None, - ) - - -def _write_normals( - normals: torch.Tensor, faces_normals_idx: torch.Tensor, float_str: str -) -> str: - if faces_normals_idx.dim() != 2 or faces_normals_idx.size(1) != 3: - message = ( - "'faces_normals_idx' should either be empty or of shape (num_faces, 3)." - ) - raise ValueError(message) - - if normals.dim() != 2 or normals.size(1) != 3: - message = "'normals' should either be empty or of shape (num_verts, 3)." - raise ValueError(message) - - normals, faces_normals_idx = normals.cpu(), faces_normals_idx.cpu() - - lines = [] - V, D = normals.shape - for i in range(V): - normal = [float_str % normals[i, j] for j in range(D)] - lines.append("vn %s\n" % " ".join(normal)) - return "".join(lines) - - -def _write_faces( - f, - faces: torch.Tensor, - faces_uvs: Optional[torch.Tensor], - faces_normals_idx: Optional[torch.Tensor], -) -> None: - F, P = faces.shape - for i in range(F): - if faces_normals_idx is not None: - if faces_uvs is not None: - # Format faces as {verts_idx}/{verts_uvs_idx}/{verts_normals_idx} - face = [ - "%d/%d/%d" - % ( - faces[i, j] + 1, - faces_uvs[i, j] + 1, - faces_normals_idx[i, j] + 1, - ) - for j in range(P) - ] - else: - # Format faces as {verts_idx}//{verts_normals_idx} - face = [ - "%d//%d" % (faces[i, j] + 1, faces_normals_idx[i, j] + 1) - for j in range(P) - ] - elif faces_uvs is not None: - # Format faces as {verts_idx}/{verts_uvs_idx} - face = ["%d/%d" % (faces[i, j] + 1, faces_uvs[i, j] + 1) for j in range(P)] - else: - face = ["%d" % (faces[i, j] + 1) for j in range(P)] - - if i + 1 < F: - f.write("f %s\n" % " ".join(face)) - else: - # No newline at the end of the file. - f.write("f %s" % " ".join(face)) diff --git a/pytorch3d/pytorch3d/io/off_io.py b/pytorch3d/pytorch3d/io/off_io.py deleted file mode 100644 index 4262269ee4bd38881e02c652c5cdbb2cc5d673f5..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/io/off_io.py +++ /dev/null @@ -1,494 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -""" -This module implements utility functions for loading and saving -meshes as .off files. - -This format is introduced, for example, at -http://www.geomview.org/docs/html/OFF.html . -""" -import warnings -from typing import cast, Optional, Tuple, Union - -import numpy as np -import torch -from iopath.common.file_io import PathManager -from pytorch3d.io.utils import _check_faces_indices, _open_file, PathOrStr -from pytorch3d.renderer import TexturesAtlas, TexturesVertex -from pytorch3d.structures import Meshes - -from .pluggable_formats import endswith, MeshFormatInterpreter - - -def _is_line_empty(line: Union[str, bytes]) -> bool: - """ - Returns whether line is not relevant in an OFF file. - """ - line = line.strip() - return len(line) == 0 or line[:1] == b"#" - - -def _count_next_line_periods(file) -> int: - """ - Returns the number of . characters before any # on the next - meaningful line. - """ - old_offset = file.tell() - line = file.readline() - while _is_line_empty(line): - line = file.readline() - if len(line) == 0: - raise ValueError("Premature end of file") - - contents = line.split(b"#")[0] - count = contents.count(b".") - file.seek(old_offset) - return count - - -def _read_faces_lump( - file, n_faces: int, n_colors: Optional[int] -) -> Optional[Tuple[np.ndarray, int, Optional[np.ndarray]]]: - """ - Parse n_faces faces and faces_colors from the file, - if they all have the same number of vertices. - This is used in two ways. - 1) To try to read all faces. - 2) To read faces one-by-one if that failed. - - Args: - file: file-like object being read. - n_faces: The known number of faces yet to read. - n_colors: The number of colors if known already. - - Returns: - - 2D numpy array of faces - - number of colors found - - 2D numpy array of face colors if found. - of None if there are faces with different numbers of vertices. - """ - if n_faces == 0: - return np.array([[]]), 0, None - old_offset = file.tell() - try: - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", message=".* Empty input file.*", category=UserWarning - ) - data = np.loadtxt(file, dtype=np.float32, ndmin=2, max_rows=n_faces) - except ValueError as e: - if n_faces > 1 and "Wrong number of columns" in e.args[0]: - file.seek(old_offset) - return None - raise ValueError("Not enough face data.") from None - - if len(data) != n_faces: - raise ValueError("Not enough face data.") - face_size = int(data[0, 0]) - if (data[:, 0] != face_size).any(): - msg = "A line of face data did not have the specified length." - raise ValueError(msg) - if face_size < 3: - raise ValueError("Faces must have at least 3 vertices.") - - n_colors_found = data.shape[1] - 1 - face_size - if n_colors is not None and n_colors_found != n_colors: - raise ValueError("Number of colors differs between faces.") - n_colors = n_colors_found - if n_colors not in [0, 3, 4]: - raise ValueError("Unexpected number of colors.") - - face_raw_data = data[:, 1 : 1 + face_size].astype("int64") - if face_size == 3: - face_data = face_raw_data - else: - face_arrays = [ - face_raw_data[:, [0, i + 1, i + 2]] for i in range(face_size - 2) - ] - face_data = np.vstack(face_arrays) - - if n_colors == 0: - return face_data, 0, None - colors = data[:, 1 + face_size :] - if face_size == 3: - return face_data, n_colors, colors - return face_data, n_colors, np.tile(colors, (face_size - 2, 1)) - - -def _read_faces( - file, n_faces: int -) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]: - """ - Returns faces and face colors from the file. - - Args: - file: file-like object being read. - n_faces: The known number of faces. - - Returns: - 2D numpy arrays of faces and face colors, or None for each if - they are not present. - """ - if n_faces == 0: - return None, None - - color_is_int = 0 == _count_next_line_periods(file) - color_scale = 1 / 255.0 if color_is_int else 1 - - faces_ncolors_colors = _read_faces_lump(file, n_faces=n_faces, n_colors=None) - if faces_ncolors_colors is not None: - faces, _, colors = faces_ncolors_colors - if colors is None: - return faces, None - return faces, colors * color_scale - - faces_list, colors_list = [], [] - n_colors = None - for _ in range(n_faces): - faces_ncolors_colors = _read_faces_lump(file, n_faces=1, n_colors=n_colors) - faces_found, n_colors, colors_found = cast( - Tuple[np.ndarray, int, Optional[np.ndarray]], faces_ncolors_colors - ) - faces_list.append(faces_found) - colors_list.append(colors_found) - faces = np.vstack(faces_list) - if n_colors == 0: - colors = None - else: - colors = np.vstack(colors_list) * color_scale - return faces, colors - - -def _read_verts(file, n_verts: int) -> Tuple[np.ndarray, Optional[np.ndarray]]: - """ - Returns verts and vertex colors from the file. - - Args: - file: file-like object being read. - n_verts: The known number of faces. - - Returns: - 2D numpy arrays of verts and (if present) - vertex colors. - """ - - color_is_int = 3 == _count_next_line_periods(file) - color_scale = 1 / 255.0 if color_is_int else 1 - - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", message=".* Empty input file.*", category=UserWarning - ) - data = np.loadtxt(file, dtype=np.float32, ndmin=2, max_rows=n_verts) - if data.shape[0] != n_verts: - raise ValueError("Not enough vertex data.") - if data.shape[1] not in [3, 6, 7]: - raise ValueError("Bad vertex data.") - - if data.shape[1] == 3: - return data, None - return data[:, :3], data[:, 3:] * color_scale # [] - - -def _load_off_stream(file) -> dict: - """ - Load the data from a stream of an .off file. - - Example .off file format: - - off - 8 6 1927 { number of vertices, faces, and (not used) edges } - # comment { comments with # sign } - 0 0 0 { start of vertex list } - 0 0 1 - 0 1 1 - 0 1 0 - 1 0 0 - 1 0 1 - 1 1 1 - 1 1 0 - 4 0 1 2 3 { start of face list } - 4 7 6 5 4 - 4 0 4 5 1 - 4 1 5 6 2 - 4 2 6 7 3 - 4 3 7 4 0 - - Args: - file: A binary file-like object (with methods read, readline, - tell and seek). - - Returns dictionary possibly containing: - verts: (always present) FloatTensor of shape (V, 3). - verts_colors: FloatTensor of shape (V, C) where C is 3 or 4. - faces: LongTensor of vertex indices, split into triangles, shape (F, 3). - faces_colors: FloatTensor of shape (F, C), where C is 3 or 4. - """ - header = file.readline() - - while _is_line_empty(header): - header = file.readline() - - if header[:3].lower() == b"off": - header = header[3:] - - while _is_line_empty(header): - header = file.readline() - - items = header.split() - if len(items) < 3: - raise ValueError("Invalid counts line: %s" % header) - - try: - n_verts = int(items[0]) - except ValueError: - raise ValueError("Invalid counts line: %s" % header) from None - try: - n_faces = int(items[1]) - except ValueError: - raise ValueError("Invalid counts line: %s" % header) from None - - if (len(items) > 3 and not items[3].startswith(b"#")) or n_verts < 0 or n_faces < 0: - raise ValueError("Invalid counts line: %s" % header) - - verts, verts_colors = _read_verts(file, n_verts) - faces, faces_colors = _read_faces(file, n_faces) - - end = file.read().strip() - if len(end) != 0: - raise ValueError("Extra data at end of file: " + str(end[:20])) - - out = {"verts": verts} - if verts_colors is not None: - out["verts_colors"] = verts_colors - if faces is not None: - out["faces"] = faces - if faces_colors is not None: - out["faces_colors"] = faces_colors - return out - - -def _write_off_data( - file, - verts: torch.Tensor, - verts_colors: Optional[torch.Tensor] = None, - faces: Optional[torch.LongTensor] = None, - faces_colors: Optional[torch.Tensor] = None, - decimal_places: Optional[int] = None, -) -> None: - """ - Internal implementation for saving 3D data to a .off file. - - Args: - file: Binary file object to which the 3D data should be written. - verts: FloatTensor of shape (V, 3) giving vertex coordinates. - verts_colors: FloatTensor of shape (V, C) giving vertex colors where C is 3 or 4. - faces: LongTensor of shape (F, 3) giving faces. - faces_colors: FloatTensor of shape (V, C) giving face colors where C is 3 or 4. - decimal_places: Number of decimal places for saving. - """ - nfaces = 0 if faces is None else faces.shape[0] - file.write(f"off\n{verts.shape[0]} {nfaces} 0\n".encode("ascii")) - - if verts_colors is not None: - verts = torch.cat((verts, verts_colors), dim=1) - if decimal_places is None: - float_str = "%f" - else: - float_str = "%" + ".%df" % decimal_places - np.savetxt(file, verts.cpu().detach().numpy(), float_str) - - if faces is not None: - _check_faces_indices(faces, max_index=verts.shape[0]) - - if faces_colors is not None: - face_data = torch.cat( - [ - cast(torch.Tensor, faces).cpu().to(torch.float64), - faces_colors.detach().cpu().to(torch.float64), - ], - dim=1, - ) - format = "3 %d %d %d" + " %f" * faces_colors.shape[1] - np.savetxt(file, face_data.numpy(), format) - elif faces is not None: - np.savetxt(file, faces.cpu().detach().numpy(), "3 %d %d %d") - - -def _save_off( - file, - *, - verts: torch.Tensor, - verts_colors: Optional[torch.Tensor] = None, - faces: Optional[torch.LongTensor] = None, - faces_colors: Optional[torch.Tensor] = None, - decimal_places: Optional[int] = None, - path_manager: PathManager, -) -> None: - """ - Save a mesh to an ascii .off file. - - Args: - file: File (or path) to which the mesh should be written. - verts: FloatTensor of shape (V, 3) giving vertex coordinates. - verts_colors: FloatTensor of shape (V, C) giving vertex colors where C is 3 or 4. - faces: LongTensor of shape (F, 3) giving faces. - faces_colors: FloatTensor of shape (V, C) giving face colors where C is 3 or 4. - decimal_places: Number of decimal places for saving. - """ - if len(verts) and not (verts.dim() == 2 and verts.size(1) == 3): - message = "Argument 'verts' should either be empty or of shape (num_verts, 3)." - raise ValueError(message) - - if verts_colors is not None and 0 == len(verts_colors): - verts_colors = None - if faces_colors is not None and 0 == len(faces_colors): - faces_colors = None - if faces is not None and 0 == len(faces): - faces = None - - if verts_colors is not None: - if not (verts_colors.dim() == 2 and verts_colors.size(1) in [3, 4]): - message = "verts_colors should have shape (num_faces, C)." - raise ValueError(message) - if verts_colors.shape[0] != verts.shape[0]: - message = "verts_colors should have the same length as verts." - raise ValueError(message) - - if faces is not None and not (faces.dim() == 2 and faces.size(1) == 3): - message = "Argument 'faces' if present should have shape (num_faces, 3)." - raise ValueError(message) - if faces_colors is not None and faces is None: - message = "Cannot have face colors without faces" - raise ValueError(message) - - if faces_colors is not None: - if not (faces_colors.dim() == 2 and faces_colors.size(1) in [3, 4]): - message = "faces_colors should have shape (num_faces, C)." - raise ValueError(message) - if faces_colors.shape[0] != cast(torch.LongTensor, faces).shape[0]: - message = "faces_colors should have the same length as faces." - raise ValueError(message) - - with _open_file(file, path_manager, "wb") as f: - _write_off_data(f, verts, verts_colors, faces, faces_colors, decimal_places) - - -class MeshOffFormat(MeshFormatInterpreter): - """ - Loads and saves meshes in the ascii OFF format. This is a simple - format which can only deal with the following texture types: - - - TexturesVertex, i.e. one color for each vertex - - TexturesAtlas with R=1, i.e. one color for each face. - - There are some possible features of OFF files which we do not support - and which appear to be rare: - - - Four dimensional data. - - Binary data. - - Vertex Normals. - - Texture coordinates. - - "COFF" header. - - Example .off file format: - - off - 8 6 1927 { number of vertices, faces, and (not used) edges } - # comment { comments with # sign } - 0 0 0 { start of vertex list } - 0 0 1 - 0 1 1 - 0 1 0 - 1 0 0 - 1 0 1 - 1 1 1 - 1 1 0 - 4 0 1 2 3 { start of face list } - 4 7 6 5 4 - 4 0 4 5 1 - 4 1 5 6 2 - 4 2 6 7 3 - 4 3 7 4 0 - - """ - - def __init__(self) -> None: - self.known_suffixes = (".off",) - - def read( - self, - path: PathOrStr, - include_textures: bool, - device, - path_manager: PathManager, - **kwargs, - ) -> Optional[Meshes]: - if not endswith(path, self.known_suffixes): - return None - - with _open_file(path, path_manager, "rb") as f: - data = _load_off_stream(f) - verts = torch.from_numpy(data["verts"]).to(device) - if "faces" in data: - faces = torch.from_numpy(data["faces"]).to(dtype=torch.int64, device=device) - else: - faces = torch.zeros((0, 3), dtype=torch.int64, device=device) - - textures = None - if "verts_colors" in data: - if "faces_colors" in data: - msg = "Faces colors ignored because vertex colors provided too." - warnings.warn(msg) - verts_colors = torch.from_numpy(data["verts_colors"]).to(device) - textures = TexturesVertex([verts_colors]) - elif "faces_colors" in data: - faces_colors = torch.from_numpy(data["faces_colors"]).to(device) - textures = TexturesAtlas([faces_colors[:, None, None, :]]) - - mesh = Meshes( - verts=[verts.to(device)], faces=[faces.to(device)], textures=textures - ) - return mesh - - def save( - self, - data: Meshes, - path: PathOrStr, - path_manager: PathManager, - binary: Optional[bool], - decimal_places: Optional[int] = None, - **kwargs, - ) -> bool: - if not endswith(path, self.known_suffixes): - return False - - verts = data.verts_list()[0] - faces = data.faces_list()[0] - if isinstance(data.textures, TexturesVertex): - [verts_colors] = data.textures.verts_features_list() - else: - verts_colors = None - - faces_colors = None - if isinstance(data.textures, TexturesAtlas): - [atlas] = data.textures.atlas_list() - F, R, _, D = atlas.shape - if R == 1: - faces_colors = atlas[:, 0, 0, :] - - _save_off( - file=path, - verts=verts, - faces=faces, - verts_colors=verts_colors, - faces_colors=faces_colors, - decimal_places=decimal_places, - path_manager=path_manager, - ) - return True diff --git a/pytorch3d/pytorch3d/io/pluggable.py b/pytorch3d/pytorch3d/io/pluggable.py deleted file mode 100644 index 0c37859e8f4e392b1fb50d10a3880647887bd665..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/io/pluggable.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -from collections import deque -from pathlib import Path -from typing import Deque, Optional, Union - -from iopath.common.file_io import PathManager -from pytorch3d.common.datatypes import Device -from pytorch3d.structures import Meshes, Pointclouds - -from .obj_io import MeshObjFormat -from .off_io import MeshOffFormat -from .pluggable_formats import MeshFormatInterpreter, PointcloudFormatInterpreter -from .ply_io import MeshPlyFormat, PointcloudPlyFormat - - -""" -This module has the master functions for loading and saving data. - -The main usage is via the IO object, and its methods -`load_mesh`, `save_mesh`, `load_pointcloud` and `save_pointcloud`. - -For example, to load a mesh you might do:: - - from pytorch3d.io import IO - - mesh = IO().load_mesh("mymesh.obj") - -and to save a point cloud you might do:: - - pcl = Pointclouds(...) - IO().save_pointcloud(pcl, "output_pointcloud.obj") - -""" - - -class IO: - """ - This class is the interface to flexible loading and saving of meshes and point clouds. - - In simple cases the user will just initialize an instance of this class as `IO()` - and then use its load and save functions. The arguments of the initializer are not - usually needed. - - The user can add their own formats for saving and loading by passing their own objects - to the register_* functions. - - Args: - include_default_formats: If False, the built-in file formats will not be available. - Then only user-registered formats can be used. - path_manager: Used to customize how paths given as strings are interpreted. - """ - - def __init__( - self, - include_default_formats: bool = True, - path_manager: Optional[PathManager] = None, - ) -> None: - if path_manager is None: - self.path_manager = PathManager() - else: - self.path_manager = path_manager - - self.mesh_interpreters: Deque[MeshFormatInterpreter] = deque() - self.pointcloud_interpreters: Deque[PointcloudFormatInterpreter] = deque() - - if include_default_formats: - self.register_default_formats() - - def register_default_formats(self) -> None: - self.register_meshes_format(MeshObjFormat()) - self.register_meshes_format(MeshOffFormat()) - self.register_meshes_format(MeshPlyFormat()) - self.register_pointcloud_format(PointcloudPlyFormat()) - - def register_meshes_format(self, interpreter: MeshFormatInterpreter) -> None: - """ - Register a new interpreter for a new mesh file format. - - Args: - interpreter: the new interpreter to use, which must be an instance - of a class which inherits MeshFormatInterpreter. - """ - if not isinstance(interpreter, MeshFormatInterpreter): - raise ValueError("Invalid interpreter") - self.mesh_interpreters.appendleft(interpreter) - - def register_pointcloud_format( - self, interpreter: PointcloudFormatInterpreter - ) -> None: - """ - Register a new interpreter for a new point cloud file format. - - Args: - interpreter: the new interpreter to use, which must be an instance - of a class which inherits PointcloudFormatInterpreter. - """ - if not isinstance(interpreter, PointcloudFormatInterpreter): - raise ValueError("Invalid interpreter") - self.pointcloud_interpreters.appendleft(interpreter) - - def load_mesh( - self, - path: Union[str, Path], - include_textures: bool = True, - device: Device = "cpu", - **kwargs, - ) -> Meshes: - """ - Attempt to load a mesh from the given file, using a registered format. - Materials are not returned. If you have a .obj file with materials - you might want to load them with the load_obj function instead. - - Args: - path: file to read - include_textures: whether to try to load texture information - device: device on which to leave the data. - - Returns: - new Meshes object containing one mesh. - """ - for mesh_interpreter in self.mesh_interpreters: - mesh = mesh_interpreter.read( - path, - include_textures=include_textures, - path_manager=self.path_manager, - device=device, - **kwargs, - ) - if mesh is not None: - return mesh - - raise ValueError(f"No mesh interpreter found to read {path}.") - - def save_mesh( - self, - data: Meshes, - path: Union[str, Path], - binary: Optional[bool] = None, - include_textures: bool = True, - **kwargs, - ) -> None: - """ - Attempt to save a mesh to the given file, using a registered format. - - Args: - data: a 1-element Meshes - path: file to write - binary: If there is a choice, whether to save in a binary format. - include_textures: If textures are present, whether to try to save - them. - """ - if not isinstance(data, Meshes): - raise ValueError("Meshes object expected.") - - if len(data) != 1: - raise ValueError("Can only save a single mesh.") - - for mesh_interpreter in self.mesh_interpreters: - success = mesh_interpreter.save( - data, path, path_manager=self.path_manager, binary=binary, **kwargs - ) - if success: - return - - raise ValueError(f"No mesh interpreter found to write to {path}.") - - def load_pointcloud( - self, path: Union[str, Path], device: Device = "cpu", **kwargs - ) -> Pointclouds: - """ - Attempt to load a point cloud from the given file, using a registered format. - - Args: - path: file to read - device: Device (as str or torch.device) on which to load the data. - - Returns: - new Pointclouds object containing one mesh. - """ - for pointcloud_interpreter in self.pointcloud_interpreters: - pointcloud = pointcloud_interpreter.read( - path, path_manager=self.path_manager, device=device, **kwargs - ) - if pointcloud is not None: - return pointcloud - - raise ValueError(f"No point cloud interpreter found to read {path}.") - - def save_pointcloud( - self, - data: Pointclouds, - path: Union[str, Path], - binary: Optional[bool] = None, - **kwargs, - ) -> None: - """ - Attempt to save a point cloud to the given file, using a registered format. - - Args: - data: a 1-element Pointclouds - path: file to write - binary: If there is a choice, whether to save in a binary format. - """ - if not isinstance(data, Pointclouds): - raise ValueError("Pointclouds object expected.") - - if len(data) != 1: - raise ValueError("Can only save a single point cloud.") - - for pointcloud_interpreter in self.pointcloud_interpreters: - success = pointcloud_interpreter.save( - data, path, path_manager=self.path_manager, binary=binary, **kwargs - ) - if success: - return - - raise ValueError(f"No point cloud interpreter found to write to {path}.") diff --git a/pytorch3d/pytorch3d/io/pluggable_formats.py b/pytorch3d/pytorch3d/io/pluggable_formats.py deleted file mode 100644 index 8973b7c701e249fe8576ec1878e66dd9ae17758d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/io/pluggable_formats.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import pathlib -from typing import Optional, Tuple - -from iopath.common.file_io import PathManager -from pytorch3d.common.datatypes import Device -from pytorch3d.io.utils import PathOrStr -from pytorch3d.structures import Meshes, Pointclouds - - -""" -This module has the base classes which must be extended to define -an interpreter for loading and saving data in a particular format. -These can be registered on an IO object so that they can be used in -its load_* and save_* functions. -""" - - -def endswith(path: PathOrStr, suffixes: Tuple[str, ...]) -> bool: - """ - Returns whether the path ends with one of the given suffixes. - If `path` is not actually a path, returns True. This is useful - for allowing interpreters to bypass inappropriate paths, but - always accepting streams. - """ - if isinstance(path, pathlib.Path): - return path.suffix.lower() in suffixes - if isinstance(path, str): - return path.lower().endswith(suffixes) - return True - - -class MeshFormatInterpreter: - """ - This is a base class for an interpreter which can read or write - a mesh in a particular format. - """ - - def read( - self, - path: PathOrStr, - include_textures: bool, - device: Device, - path_manager: PathManager, - **kwargs, - ) -> Optional[Meshes]: - """ - Read the data from the specified file and return it as - a Meshes object. - - Args: - path: path to load. - include_textures: whether to try to load texture information. - device: torch.device to load data on to. - path_manager: PathManager to interpret the path. - - Returns: - None if self is not the appropriate object to interpret the given - path. - Otherwise, the read Meshes object. - """ - raise NotImplementedError() - - def save( - self, - data: Meshes, - path: PathOrStr, - path_manager: PathManager, - binary: Optional[bool], - **kwargs, - ) -> bool: - """ - Save the given Meshes object to the given path. - - Args: - data: mesh to save - path: path to save to, which may be overwritten. - path_manager: PathManager to interpret the path. - binary: If there is a choice, whether to save in a binary format. - - Returns: - False: if self is not the appropriate object to write to the given path. - True: on success. - """ - raise NotImplementedError() - - -class PointcloudFormatInterpreter: - """ - This is a base class for an interpreter which can read or write - a point cloud in a particular format. - """ - - def read( - self, path: PathOrStr, device: Device, path_manager: PathManager, **kwargs - ) -> Optional[Pointclouds]: - """ - Read the data from the specified file and return it as - a Pointclouds object. - - Args: - path: path to load. - device: torch.device to load data on to. - path_manager: PathManager to interpret the path. - - Returns: - None if self is not the appropriate object to interpret the given - path. - Otherwise, the read Pointclouds object. - """ - raise NotImplementedError() - - def save( - self, - data: Pointclouds, - path: PathOrStr, - path_manager: PathManager, - binary: Optional[bool], - **kwargs, - ) -> bool: - """ - Save the given Pointclouds object to the given path. - - Args: - data: point cloud object to save - path: path to save to, which may be overwritten. - path_manager: PathManager to interpret the path. - binary: If there is a choice, whether to save in a binary format. - - Returns: - False: if self is not the appropriate object to write to the given path. - True: on success. - """ - raise NotImplementedError() diff --git a/pytorch3d/pytorch3d/io/ply_io.py b/pytorch3d/pytorch3d/io/ply_io.py deleted file mode 100644 index 1d59b1934c1e717dbe06e993a05eae0ef6a2beef..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/io/ply_io.py +++ /dev/null @@ -1,1537 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -""" -This module implements utility functions for loading and saving -meshes and point clouds as PLY files. -""" -import itertools -import os -import struct -import sys -import warnings -from collections import namedtuple -from dataclasses import asdict, dataclass -from io import BytesIO, TextIOBase -from typing import List, Optional, Tuple - -import numpy as np -import torch -from iopath.common.file_io import PathManager -from pytorch3d.io.utils import ( - _check_faces_indices, - _make_tensor, - _open_file, - _read_image, - PathOrStr, -) -from pytorch3d.renderer import TexturesUV, TexturesVertex -from pytorch3d.structures import Meshes, Pointclouds - -from .pluggable_formats import ( - endswith, - MeshFormatInterpreter, - PointcloudFormatInterpreter, -) - - -_PlyTypeData = namedtuple("_PlyTypeData", "size struct_char np_type") - -_PLY_TYPES = { - "char": _PlyTypeData(1, "b", np.byte), - "uchar": _PlyTypeData(1, "B", np.ubyte), - "short": _PlyTypeData(2, "h", np.short), - "ushort": _PlyTypeData(2, "H", np.ushort), - "int": _PlyTypeData(4, "i", np.int32), - "uint": _PlyTypeData(4, "I", np.uint32), - "float": _PlyTypeData(4, "f", np.float32), - "double": _PlyTypeData(8, "d", np.float64), - "int8": _PlyTypeData(1, "b", np.byte), - "uint8": _PlyTypeData(1, "B", np.ubyte), - "int16": _PlyTypeData(2, "h", np.short), - "uint16": _PlyTypeData(2, "H", np.ushort), - "int32": _PlyTypeData(4, "i", np.int32), - "uint32": _PlyTypeData(4, "I", np.uint32), - "float32": _PlyTypeData(4, "f", np.float32), - "float64": _PlyTypeData(8, "d", np.float64), -} - -_Property = namedtuple("_Property", "name data_type list_size_type") - - -class _PlyElementType: - """ - Description of an element of a Ply file. - Members: - self.properties: (List[_Property]) description of all the properties. - Each one contains a name and data type. - self.count: (int) number of such elements in the file - self.name: (str) name of the element - """ - - def __init__(self, name: str, count: int) -> None: - self.name = name - self.count = count - self.properties: List[_Property] = [] - - def add_property( - self, name: str, data_type: str, list_size_type: Optional[str] = None - ): - """Adds a new property. - - Args: - name: (str) name of the property. - data_type: (str) PLY data type. - list_size_type: (str) PLY data type of the list size, or None if not - a list. - """ - for property in self.properties: - if property.name == name: - msg = "Cannot have two properties called %s in %s." - raise ValueError(msg % (name, self.name)) - self.properties.append(_Property(name, data_type, list_size_type)) - - def is_fixed_size(self) -> bool: - """Return whether the Element has no list properties - - Returns: - True if none of the properties are lists. - """ - for property in self.properties: - if property.list_size_type is not None: - return False - return True - - def is_constant_type_fixed_size(self) -> bool: - """Return whether the Element has all properties of the same non-list - type. - - Returns: - True if none of the properties are lists and all the properties - share a type. - """ - if not self.is_fixed_size(): - return False - first_type = _PLY_TYPES[self.properties[0].data_type] - for property in self.properties: - if _PLY_TYPES[property.data_type] != first_type: - return False - return True - - def try_constant_list(self) -> bool: - """Whether the element is just a single list, which might have a - constant size, and therefore we could try to parse quickly with numpy. - - Returns: - True if the only property is a list. - """ - if len(self.properties) != 1: - return False - if self.properties[0].list_size_type is None: - return False - return True - - -class _PlyHeader: - def __init__(self, f) -> None: - """ - Load a header of a Ply file from a file-like object. - Members: - self.elements: (List[_PlyElementType]) element description - self.ascii: (bool) Whether in ascii format - self.big_endian: (bool) (if not ascii) whether big endian - self.obj_info: (List[str]) arbitrary extra data - self.comments: (List[str]) comments - - Args: - f: file-like object. - """ - if f.readline() not in [b"ply\n", b"ply\r\n", "ply\n"]: - raise ValueError("Invalid file header.") - seen_format = False - self.elements: List[_PlyElementType] = [] - self.comments: List[str] = [] - self.obj_info: List[str] = [] - while True: - line = f.readline() - if isinstance(line, bytes): - line = line.decode("ascii") - line = line.strip() - if line == "end_header": - if not self.elements: - raise ValueError("No elements found.") - if not self.elements[-1].properties: - raise ValueError("Found an element with no properties.") - if not seen_format: - raise ValueError("No format line found.") - break - if not seen_format: - if line == "format ascii 1.0": - seen_format = True - self.ascii = True - continue - if line == "format binary_little_endian 1.0": - seen_format = True - self.ascii = False - self.big_endian = False - continue - if line == "format binary_big_endian 1.0": - seen_format = True - self.ascii = False - self.big_endian = True - continue - if line.startswith("format"): - raise ValueError("Invalid format line.") - if line.startswith("comment "): - self.comments.append(line[8:]) - continue - if line.startswith("comment") or len(line) == 0: - continue - if line.startswith("element"): - self._parse_element(line) - continue - if line.startswith("obj_info "): - self.obj_info.append(line[9:]) - continue - if line.startswith("property"): - self._parse_property(line) - continue - raise ValueError("Invalid line: %s." % line) - - def _parse_property(self, line: str): - """ - Decode a ply file header property line. - - Args: - line: (str) the ply file's line. - """ - if not self.elements: - raise ValueError("Encountered property before any element.") - items = line.split(" ") - if len(items) not in [3, 5]: - raise ValueError("Invalid line: %s" % line) - datatype = items[1] - name = items[-1] - if datatype == "list": - datatype = items[3] - list_size_type = items[2] - if list_size_type not in _PLY_TYPES: - raise ValueError("Invalid datatype: %s" % list_size_type) - else: - list_size_type = None - if datatype not in _PLY_TYPES: - raise ValueError("Invalid datatype: %s" % datatype) - self.elements[-1].add_property(name, datatype, list_size_type) - - def _parse_element(self, line: str): - """ - Decode a ply file header element line. - - Args: - line: (str) the ply file's line. - """ - if self.elements and not self.elements[-1].properties: - raise ValueError("Found an element with no properties.") - items = line.split(" ") - if len(items) != 3: - raise ValueError("Invalid line: %s" % line) - try: - count = int(items[2]) - except ValueError: - msg = "Number of items for %s was not a number." - raise ValueError(msg % items[1]) from None - self.elements.append(_PlyElementType(items[1], count)) - - -def _read_ply_fixed_size_element_ascii(f, definition: _PlyElementType): - """ - Given an element which has no lists and one type, read the - corresponding data. - - For example - - element vertex 8 - property float x - property float y - property float z - - Args: - f: file-like object being read. - definition: The element object which describes what we are reading. - - Returns: - 1-element list containing a 2D numpy array corresponding to the data. - The rows are the different values. There is one column for each property. - """ - np_type = _PLY_TYPES[definition.properties[0].data_type].np_type - old_offset = f.tell() - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", message=".* Empty input file.*", category=UserWarning - ) - data = np.loadtxt( - f, dtype=np_type, comments=None, ndmin=2, max_rows=definition.count - ) - if not len(data): # np.loadtxt() seeks even on empty data - f.seek(old_offset) - if data.shape[1] != len(definition.properties): - raise ValueError("Inconsistent data for %s." % definition.name) - if data.shape[0] != definition.count: - raise ValueError("Not enough data for %s." % definition.name) - return [data] - - -def _read_ply_nolist_element_ascii(f, definition: _PlyElementType): - """ - Given an element which has no lists and multiple types, read the - corresponding data, by loading all the data as float64 and converting - the relevant parts later. - - For example, given - - element vertex 8 - property float x - property float y - property float z - property uchar red - property uchar green - property uchar blue - - the output will have two arrays, the first containing (x,y,z) - and the second (red,green,blue). - - Args: - f: file-like object being read. - definition: The element object which describes what we are reading. - - Returns: - List of 2D numpy arrays corresponding to the data. - """ - old_offset = f.tell() - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", message=".* Empty input file.*", category=UserWarning - ) - data = np.loadtxt( - f, dtype=np.float64, comments=None, ndmin=2, max_rows=definition.count - ) - if not len(data): # np.loadtxt() seeks even on empty data - f.seek(old_offset) - if data.shape[1] != len(definition.properties): - raise ValueError("Inconsistent data for %s." % definition.name) - if data.shape[0] != definition.count: - raise ValueError("Not enough data for %s." % definition.name) - pieces = [] - offset = 0 - for dtype, it in itertools.groupby(p.data_type for p in definition.properties): - count = sum(1 for _ in it) - end_offset = offset + count - piece = data[:, offset:end_offset].astype(_PLY_TYPES[dtype].np_type) - pieces.append(piece) - offset = end_offset - return pieces - - -def _try_read_ply_constant_list_ascii(f, definition: _PlyElementType): - """ - If definition is an element which is a single list, attempt to read the - corresponding data assuming every value has the same length. - If the data is ragged, return None and leave f undisturbed. - - For example, if the element is - - element face 2 - property list uchar int vertex_index - - and the data is - - 4 0 1 2 3 - 4 7 6 5 4 - - then the function will return - - [[0, 1, 2, 3], - [7, 6, 5, 4]] - - but if the data is - - 4 0 1 2 3 - 3 6 5 4 - - then the function will return None. - - Args: - f: file-like object being read. - definition: The element object which describes what we are reading. - - Returns: - If every element has the same size, 2D numpy array corresponding to the - data. The rows are the different values. Otherwise None. - """ - np_type = _PLY_TYPES[definition.properties[0].data_type].np_type - old_offset = f.tell() - try: - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", message=".* Empty input file.*", category=UserWarning - ) - data = np.loadtxt( - f, dtype=np_type, comments=None, ndmin=2, max_rows=definition.count - ) - except ValueError: - f.seek(old_offset) - return None - if not len(data): # np.loadtxt() seeks even on empty data - f.seek(old_offset) - if (data[:, 0] != data.shape[1] - 1).any(): - msg = "A line of %s data did not have the specified length." - raise ValueError(msg % definition.name) - if data.shape[0] != definition.count: - raise ValueError("Not enough data for %s." % definition.name) - return data[:, 1:] - - -def _parse_heterogeneous_property_ascii(datum, line_iter, property: _Property): - """ - Read a general data property from an ascii .ply file. - - Args: - datum: list to append the single value to. That value will be a numpy - array if the property is a list property, otherwise an int or - float. - line_iter: iterator to words on the line from which we read. - property: the property object describing the property we are reading. - """ - value = next(line_iter, None) - if value is None: - raise ValueError("Too little data for an element.") - if property.list_size_type is None: - try: - if property.data_type in ["double", "float"]: - datum.append(float(value)) - else: - datum.append(int(value)) - except ValueError: - raise ValueError("Bad numerical data.") from None - else: - try: - length = int(value) - except ValueError: - raise ValueError("A list length was not a number.") from None - list_value = np.zeros(length, dtype=_PLY_TYPES[property.data_type].np_type) - for i in range(length): - inner_value = next(line_iter, None) - if inner_value is None: - raise ValueError("Too little data for an element.") - try: - list_value[i] = float(inner_value) - except ValueError: - raise ValueError("Bad numerical data.") from None - datum.append(list_value) - - -def _read_ply_element_ascii(f, definition: _PlyElementType): - """ - Decode all instances of a single element from an ascii .ply file. - - Args: - f: file-like object being read. - definition: The element object which describes what we are reading. - - Returns: - In simple cases where every element has the same size, 2D numpy array - corresponding to the data. The rows are the different values. - Otherwise a list of lists of values, where the outer list is - each occurrence of the element, and the inner lists have one value per - property. - """ - if not definition.count: - return [] - if definition.is_constant_type_fixed_size(): - return _read_ply_fixed_size_element_ascii(f, definition) - if definition.is_fixed_size(): - return _read_ply_nolist_element_ascii(f, definition) - if definition.try_constant_list(): - data = _try_read_ply_constant_list_ascii(f, definition) - if data is not None: - return data - - # We failed to read the element as a lump, must process each line manually. - data = [] - for _i in range(definition.count): - line_string = f.readline() - if line_string == "": - raise ValueError("Not enough data for %s." % definition.name) - datum = [] - line_iter = iter(line_string.strip().split()) - for property in definition.properties: - _parse_heterogeneous_property_ascii(datum, line_iter, property) - data.append(datum) - if next(line_iter, None) is not None: - raise ValueError("Too much data for an element.") - return data - - -def _read_raw_array( - f, aim: str, length: int, dtype: type = np.uint8, dtype_size: int = 1 -): - """ - Read [length] elements from a file. - - Args: - f: file object - aim: name of target for error message - length: number of elements - dtype: numpy type - dtype_size: number of bytes per element. - - Returns: - new numpy array - """ - - if isinstance(f, BytesIO): - # np.fromfile is faster but won't work on a BytesIO - needed_bytes = length * dtype_size - bytes_data = bytearray(needed_bytes) - n_bytes_read = f.readinto(bytes_data) - if n_bytes_read != needed_bytes: - raise ValueError("Not enough data for %s." % aim) - data = np.frombuffer(bytes_data, dtype=dtype) - else: - data = np.fromfile(f, dtype=dtype, count=length) - if data.shape[0] != length: - raise ValueError("Not enough data for %s." % aim) - return data - - -def _read_ply_fixed_size_element_binary( - f, definition: _PlyElementType, big_endian: bool -): - """ - Given an element which has no lists and one type, read the - corresponding data. - - For example - - element vertex 8 - property float x - property float y - property float z - - - Args: - f: file-like object being read. - definition: The element object which describes what we are reading. - big_endian: (bool) whether the document is encoded as big endian. - - Returns: - 1-element list containing a 2D numpy array corresponding to the data. - The rows are the different values. There is one column for each property. - """ - ply_type = _PLY_TYPES[definition.properties[0].data_type] - np_type = ply_type.np_type - type_size = ply_type.size - needed_length = definition.count * len(definition.properties) - data = _read_raw_array(f, definition.name, needed_length, np_type, type_size) - - if (sys.byteorder == "big") != big_endian: - data = data.byteswap() - return [data.reshape(definition.count, len(definition.properties))] - - -def _read_ply_element_binary_nolists(f, definition: _PlyElementType, big_endian: bool): - """ - Given an element which has no lists, read the corresponding data as tuple - of numpy arrays, one for each set of adjacent columns with the same type. - - For example, given - - element vertex 8 - property float x - property float y - property float z - property uchar red - property uchar green - property uchar blue - - the output will have two arrays, the first containing (x,y,z) - and the second (red,green,blue). - - Args: - f: file-like object being read. - definition: The element object which describes what we are reading. - big_endian: (bool) whether the document is encoded as big endian. - - Returns: - List of 2D numpy arrays corresponding to the data. The rows are the different - values. - """ - size = sum(_PLY_TYPES[prop.data_type].size for prop in definition.properties) - needed_bytes = size * definition.count - data = _read_raw_array(f, definition.name, needed_bytes).reshape(-1, size) - offset = 0 - pieces = [] - for dtype, it in itertools.groupby(p.data_type for p in definition.properties): - count = sum(1 for _ in it) - bytes_each = count * _PLY_TYPES[dtype].size - end_offset = offset + bytes_each - - # what we want to do is - # piece = data[:, offset:end_offset].view(_PLY_TYPES[dtype].np_type) - # but it fails in the general case - # because of https://github.com/numpy/numpy/issues/9496. - piece = np.lib.stride_tricks.as_strided( - data[:1, offset:end_offset].view(_PLY_TYPES[dtype].np_type), - shape=(definition.count, count), - strides=(data.strides[0], _PLY_TYPES[dtype].size), - ) - - if (sys.byteorder == "big") != big_endian: - piece = piece.byteswap() - pieces.append(piece) - offset = end_offset - return pieces - - -def _try_read_ply_constant_list_binary( - f, definition: _PlyElementType, big_endian: bool -): - """ - If definition is an element which is a single list, attempt to read the - corresponding data assuming every value has the same length. - If the data is ragged, return None and leave f undisturbed. - - For example, if the element is - - element face 2 - property list uchar int vertex_index - - and the data is - - 4 0 1 2 3 - 4 7 6 5 4 - - then the function will return - - [[0, 1, 2, 3], - [7, 6, 5, 4]] - - but if the data is - - 4 0 1 2 3 - 3 6 5 4 - - then the function will return None. - - Args: - f: file-like object being read. - definition: The element object which describes what we are reading. - big_endian: (bool) whether the document is encoded as big endian. - - Returns: - If every element has the same size, 2D numpy array corresponding to the - data. The rows are the different values. Otherwise None. - """ - property = definition.properties[0] - endian_str = ">" if big_endian else "<" - length_format = endian_str + _PLY_TYPES[property.list_size_type].struct_char - length_struct = struct.Struct(length_format) - - def get_length(): - bytes_data = f.read(length_struct.size) - if len(bytes_data) != length_struct.size: - raise ValueError("Not enough data for %s." % definition.name) - [length] = length_struct.unpack(bytes_data) - return length - - old_offset = f.tell() - - length = get_length() - np_type = _PLY_TYPES[definition.properties[0].data_type].np_type - type_size = _PLY_TYPES[definition.properties[0].data_type].size - data_size = type_size * length - - output = np.zeros((definition.count, length), dtype=np_type) - - for i in range(definition.count): - bytes_data = f.read(data_size) - if len(bytes_data) != data_size: - raise ValueError("Not enough data for %s" % definition.name) - output[i] = np.frombuffer(bytes_data, dtype=np_type) - if i + 1 == definition.count: - break - if length != get_length(): - f.seek(old_offset) - return None - if (sys.byteorder == "big") != big_endian: - output = output.byteswap() - - return output - - -def _read_ply_element_binary(f, definition: _PlyElementType, big_endian: bool) -> list: - """ - Decode all instances of a single element from a binary .ply file. - - Args: - f: file-like object being read. - definition: The element object which describes what we are reading. - big_endian: (bool) whether the document is encoded as big endian. - - Returns: - In simple cases where every element has the same size, 2D numpy array - corresponding to the data. The rows are the different values. - Otherwise a list of lists/tuples of values, where the outer list is - each occurrence of the element, and the inner lists have one value per - property. - """ - if not definition.count: - return [] - - if definition.is_constant_type_fixed_size(): - return _read_ply_fixed_size_element_binary(f, definition, big_endian) - if definition.is_fixed_size(): - return _read_ply_element_binary_nolists(f, definition, big_endian) - if definition.try_constant_list(): - data = _try_read_ply_constant_list_binary(f, definition, big_endian) - if data is not None: - return data - - # We failed to read the element as a lump, must process each line manually. - endian_str = ">" if big_endian else "<" - property_structs = [] - for property in definition.properties: - initial_type = property.list_size_type or property.data_type - property_structs.append( - struct.Struct(endian_str + _PLY_TYPES[initial_type].struct_char) - ) - - data = [] - for _i in range(definition.count): - datum = [] - for property, property_struct in zip(definition.properties, property_structs): - size = property_struct.size - initial_data = f.read(size) - if len(initial_data) != size: - raise ValueError("Not enough data for %s" % definition.name) - [initial] = property_struct.unpack(initial_data) - if property.list_size_type is None: - datum.append(initial) - else: - type_size = _PLY_TYPES[property.data_type].size - needed_bytes = type_size * initial - list_data = f.read(needed_bytes) - if len(list_data) != needed_bytes: - raise ValueError("Not enough data for %s" % definition.name) - np_type = _PLY_TYPES[property.data_type].np_type - list_np = np.frombuffer(list_data, dtype=np_type) - if (sys.byteorder == "big") != big_endian: - list_np = list_np.byteswap() - datum.append(list_np) - data.append(datum) - return data - - -def _load_ply_raw_stream(f) -> Tuple[_PlyHeader, dict]: - """ - Implementation for _load_ply_raw which takes a stream. - - Args: - f: A binary or text file-like object. - - Returns: - header: A _PlyHeader object describing the metadata in the ply file. - elements: A dictionary of element names to values. If an element is regular, in - the sense of having no lists or being one uniformly-sized list, then the - value will be a 2D numpy array. If not, it is a list of the relevant - property values. - """ - - header = _PlyHeader(f) - elements = {} - if header.ascii: - for element in header.elements: - elements[element.name] = _read_ply_element_ascii(f, element) - else: - if isinstance(f, TextIOBase): - raise ValueError( - "Cannot safely read a binary ply file using a Text stream." - ) - big = header.big_endian - for element in header.elements: - elements[element.name] = _read_ply_element_binary(f, element, big) - end = f.read().strip() - if len(end) != 0: - raise ValueError("Extra data at end of file: " + str(end[:20])) - return header, elements - - -def _load_ply_raw(f, path_manager: PathManager) -> Tuple[_PlyHeader, dict]: - """ - Load the data from a .ply file. - - Args: - f: A binary or text file-like object (with methods read, readline, - tell and seek), a pathlib path or a string containing a file name. - If the ply file is binary, a text stream is not supported. - It is recommended to use a binary stream. - path_manager: PathManager for loading if f is a str. - - Returns: - header: A _PlyHeader object describing the metadata in the ply file. - elements: A dictionary of element names to values. If an element is - regular, in the sense of having no lists or being one - uniformly-sized list, then the value will be a 2D numpy array. - If it has no lists but more than one type, it will be a list of arrays. - If not, it is a list of the relevant property values. - """ - with _open_file(f, path_manager, "rb") as f: - header, elements = _load_ply_raw_stream(f) - return header, elements - - -@dataclass(frozen=True) -class _VertsColumnIndices: - """ - Contains the relevant layout of the verts section of file being read. - Members - point_idxs: List[int] of 3 point columns. - color_idxs: List[int] of 3 color columns if they are present, - otherwise None. - color_scale: value to scale colors by. - normal_idxs: List[int] of 3 normals columns if they are present, - otherwise None. - """ - - point_idxs: List[int] - color_idxs: Optional[List[int]] - color_scale: float - normal_idxs: Optional[List[int]] - texture_uv_idxs: Optional[List[int]] - - -def _get_verts_column_indices( - vertex_head: _PlyElementType, -) -> _VertsColumnIndices: - """ - Get the columns of verts, verts_colors, and verts_normals in the vertex - element of a parsed ply file, together with a color scale factor. - When the colors are in byte format, they are scaled from 0..255 to [0,1]. - Otherwise they are not scaled. - - For example, if the vertex element looks as follows: - - element vertex 892 - property double x - property double y - property double z - property double nx - property double ny - property double nz - property uchar red - property uchar green - property uchar blue - property double texture_u - property double texture_v - - then the return value will be ([0,1,2], [6,7,8], 1.0/255, [3,4,5]) - - Args: - vertex_head: as returned from load_ply_raw. - - Returns: - _VertsColumnIndices object - """ - point_idxs: List[Optional[int]] = [None, None, None] - color_idxs: List[Optional[int]] = [None, None, None] - normal_idxs: List[Optional[int]] = [None, None, None] - texture_uv_idxs: List[Optional[int]] = [None, None] - for i, prop in enumerate(vertex_head.properties): - if prop.list_size_type is not None: - raise ValueError("Invalid vertices in file: did not expect list.") - for j, letter in enumerate(["x", "y", "z"]): - if prop.name == letter: - point_idxs[j] = i - for j, name in enumerate(["red", "green", "blue"]): - if prop.name == name: - color_idxs[j] = i - for j, name in enumerate(["nx", "ny", "nz"]): - if prop.name == name: - normal_idxs[j] = i - for j, name in enumerate(["texture_u", "texture_v"]): - if prop.name == name: - texture_uv_idxs[j] = i - if None in point_idxs: - raise ValueError("Invalid vertices in file.") - color_scale = 1.0 - if all( - idx is not None and _PLY_TYPES[vertex_head.properties[idx].data_type].size == 1 - for idx in color_idxs - ): - color_scale = 1.0 / 255 - return _VertsColumnIndices( - point_idxs=point_idxs, - color_idxs=None if None in color_idxs else color_idxs, - color_scale=color_scale, - normal_idxs=None if None in normal_idxs else normal_idxs, - texture_uv_idxs=None if None in texture_uv_idxs else texture_uv_idxs, - ) - - -@dataclass(frozen=True) -class _VertsData: - """ - Contains the data of the verts section of file being read. - Members: - verts: FloatTensor of shape (V, 3). - verts_colors: None or FloatTensor of shape (V, 3). - verts_normals: None or FloatTensor of shape (V, 3). - """ - - verts: torch.Tensor - verts_colors: Optional[torch.Tensor] = None - verts_normals: Optional[torch.Tensor] = None - verts_texture_uvs: Optional[torch.Tensor] = None - - -def _get_verts(header: _PlyHeader, elements: dict) -> _VertsData: - """ - Get the vertex locations, colors and normals from a parsed ply file. - - Args: - header, elements: as returned from load_ply_raw. - - Returns: - _VertsData object - """ - - vertex = elements.get("vertex", None) - if vertex is None: - raise ValueError("The ply file has no vertex element.") - if not isinstance(vertex, list): - raise ValueError("Invalid vertices in file.") - vertex_head = next(head for head in header.elements if head.name == "vertex") - - column_idxs = _get_verts_column_indices(vertex_head) - - # Case of no vertices - if vertex_head.count == 0: - verts = torch.zeros((0, 3), dtype=torch.float32) - if column_idxs.color_idxs is None: - return _VertsData(verts=verts) - return _VertsData( - verts=verts, verts_colors=torch.zeros((0, 3), dtype=torch.float32) - ) - - # Simple case where the only data is the vertices themselves - if ( - len(vertex) == 1 - and isinstance(vertex[0], np.ndarray) - and vertex[0].ndim == 2 - and vertex[0].shape[1] == 3 - ): - return _VertsData(verts=_make_tensor(vertex[0], cols=3, dtype=torch.float32)) - - vertex_colors = None - vertex_normals = None - vertex_texture_uvs = None - - if len(vertex) == 1: - # This is the case where the whole vertex element has one type, - # so it was read as a single array and we can index straight into it. - verts = torch.tensor(vertex[0][:, column_idxs.point_idxs], dtype=torch.float32) - if column_idxs.color_idxs is not None: - vertex_colors = column_idxs.color_scale * torch.tensor( - vertex[0][:, column_idxs.color_idxs], dtype=torch.float32 - ) - if column_idxs.normal_idxs is not None: - vertex_normals = torch.tensor( - vertex[0][:, column_idxs.normal_idxs], dtype=torch.float32 - ) - if column_idxs.texture_uv_idxs is not None: - vertex_texture_uvs = torch.tensor( - vertex[0][:, column_idxs.texture_uv_idxs], dtype=torch.float32 - ) - else: - # The vertex element is heterogeneous. It was read as several arrays, - # part by part, where a part is a set of properties with the same type. - # For each property (=column in the file), we store in - # prop_to_partnum_col its partnum (i.e. the index of what part it is - # in) and its column number (its index within its part). - prop_to_partnum_col = [ - (partnum, col) - for partnum, array in enumerate(vertex) - for col in range(array.shape[1]) - ] - verts = torch.empty(size=(vertex_head.count, 3), dtype=torch.float32) - for axis in range(3): - partnum, col = prop_to_partnum_col[column_idxs.point_idxs[axis]] - verts.numpy()[:, axis] = vertex[partnum][:, col] - # Note that in the previous line, we made the assignment - # as numpy arrays by casting verts. If we took the (more - # obvious) method of converting the right hand side to - # torch, then we might have an extra data copy because - # torch wants contiguity. The code would be like: - # if not vertex[partnum].flags["C_CONTIGUOUS"]: - # vertex[partnum] = np.ascontiguousarray(vertex[partnum]) - # verts[:, axis] = torch.tensor((vertex[partnum][:, col])) - if column_idxs.color_idxs is not None: - vertex_colors = torch.empty( - size=(vertex_head.count, 3), dtype=torch.float32 - ) - for color in range(3): - partnum, col = prop_to_partnum_col[column_idxs.color_idxs[color]] - vertex_colors.numpy()[:, color] = vertex[partnum][:, col] - vertex_colors *= column_idxs.color_scale - if column_idxs.normal_idxs is not None: - vertex_normals = torch.empty( - size=(vertex_head.count, 3), dtype=torch.float32 - ) - for axis in range(3): - partnum, col = prop_to_partnum_col[column_idxs.normal_idxs[axis]] - vertex_normals.numpy()[:, axis] = vertex[partnum][:, col] - if column_idxs.texture_uv_idxs is not None: - vertex_texture_uvs = torch.empty( - size=(vertex_head.count, 2), - dtype=torch.float32, - ) - for axis in range(2): - partnum, col = prop_to_partnum_col[column_idxs.texture_uv_idxs[axis]] - vertex_texture_uvs.numpy()[:, axis] = vertex[partnum][:, col] - return _VertsData( - verts=verts, - verts_colors=vertex_colors, - verts_normals=vertex_normals, - verts_texture_uvs=vertex_texture_uvs, - ) - - -@dataclass(frozen=True) -class _PlyData: - """ - Contains the data from a PLY file which has been read. - Members: - header: _PlyHeader of file metadata from the header - verts: FloatTensor of shape (V, 3). - faces: None or LongTensor of vertex indices, shape (F, 3). - verts_colors: None or FloatTensor of shape (V, 3). - verts_normals: None or FloatTensor of shape (V, 3). - """ - - header: _PlyHeader - verts: torch.Tensor - faces: Optional[torch.Tensor] - verts_colors: Optional[torch.Tensor] - verts_normals: Optional[torch.Tensor] - verts_texture_uvs: Optional[torch.Tensor] - - -def _load_ply(f, *, path_manager: PathManager) -> _PlyData: - """ - Load the data from a .ply file. - - Args: - f: A binary or text file-like object (with methods read, readline, - tell and seek), a pathlib path or a string containing a file name. - If the ply file is in the binary ply format rather than the text - ply format, then a text stream is not supported. - It is easiest to use a binary stream in all cases. - path_manager: PathManager for loading if f is a str. - - Returns: - _PlyData object - """ - header, elements = _load_ply_raw(f, path_manager=path_manager) - - verts_data = _get_verts(header, elements) - - face = elements.get("face", None) - if face is not None: - face_head = next(head for head in header.elements if head.name == "face") - if ( - len(face_head.properties) != 1 - or face_head.properties[0].list_size_type is None - ): - raise ValueError("Unexpected form of faces data.") - # face_head.properties[0].name is usually "vertex_index" or "vertex_indices" - # but we don't need to enforce this. - - if face is None: - faces = None - elif not len(face): - # pyre is happier when this condition is not joined to the - # previous one with `or`. - faces = None - elif isinstance(face, np.ndarray) and face.ndim == 2: # Homogeneous elements - if face.shape[1] < 3: - raise ValueError("Faces must have at least 3 vertices.") - face_arrays = [face[:, [0, i + 1, i + 2]] for i in range(face.shape[1] - 2)] - faces = torch.LongTensor(np.vstack(face_arrays).astype(np.int64)) - else: - face_list = [] - for (face_item,) in face: - if face_item.ndim != 1: - raise ValueError("Bad face data.") - if face_item.shape[0] < 3: - raise ValueError("Faces must have at least 3 vertices.") - for i in range(face_item.shape[0] - 2): - face_list.append([face_item[0], face_item[i + 1], face_item[i + 2]]) - faces = torch.tensor(face_list, dtype=torch.int64) - - if faces is not None: - _check_faces_indices(faces, max_index=verts_data.verts.shape[0]) - - return _PlyData(**asdict(verts_data), faces=faces, header=header) - - -def load_ply( - f, *, path_manager: Optional[PathManager] = None -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Load the verts and faces from a .ply file. - Note that the preferred way to load data from such a file - is to use the IO.load_mesh and IO.load_pointcloud functions, - which can read more of the data. - - Example .ply file format:: - - ply - format ascii 1.0 { ascii/binary, format version number } - comment made by Greg Turk { comments keyword specified, like all lines } - comment this file is a cube - element vertex 8 { define "vertex" element, 8 of them in file } - property float x { vertex contains float "x" coordinate } - property float y { y coordinate is also a vertex property } - property float z { z coordinate, too } - element face 6 { there are 6 "face" elements in the file } - property list uchar int vertex_index { "vertex_indices" is a list of ints } - end_header { delimits the end of the header } - 0 0 0 { start of vertex list } - 0 0 1 - 0 1 1 - 0 1 0 - 1 0 0 - 1 0 1 - 1 1 1 - 1 1 0 - 4 0 1 2 3 { start of face list } - 4 7 6 5 4 - 4 0 4 5 1 - 4 1 5 6 2 - 4 2 6 7 3 - 4 3 7 4 0 - - Args: - f: A binary or text file-like object (with methods read, readline, - tell and seek), a pathlib path or a string containing a file name. - If the ply file is in the binary ply format rather than the text - ply format, then a text stream is not supported. - It is easiest to use a binary stream in all cases. - path_manager: PathManager for loading if f is a str. - - Returns: - verts: FloatTensor of shape (V, 3). - faces: LongTensor of vertex indices, shape (F, 3). - """ - - if path_manager is None: - path_manager = PathManager() - data = _load_ply(f, path_manager=path_manager) - faces = data.faces - if faces is None: - faces = torch.zeros(0, 3, dtype=torch.int64) - - return data.verts, faces - - -def _write_ply_header( - f, - *, - verts: torch.Tensor, - faces: Optional[torch.LongTensor], - verts_normals: Optional[torch.Tensor], - verts_colors: Optional[torch.Tensor], - ascii: bool, - colors_as_uint8: bool, -) -> None: - """ - Internal implementation for writing header when saving to a .ply file. - - Args: - f: File object to which the 3D data should be written. - verts: FloatTensor of shape (V, 3) giving vertex coordinates. - faces: LongTensor of shape (F, 3) giving faces. - verts_normals: FloatTensor of shape (V, 3) giving vertex normals. - verts_colors: FloatTensor of shape (V, 3) giving vertex colors. - ascii: (bool) whether to use the ascii ply format. - colors_as_uint8: Whether to save colors as numbers in the range - [0, 255] instead of float32. - """ - assert not len(verts) or (verts.dim() == 2 and verts.size(1) == 3) - assert faces is None or not len(faces) or (faces.dim() == 2 and faces.size(1) == 3) - assert verts_normals is None or ( - verts_normals.dim() == 2 and verts_normals.size(1) == 3 - ) - assert verts_colors is None or ( - verts_colors.dim() == 2 and verts_colors.size(1) == 3 - ) - - if ascii: - f.write(b"ply\nformat ascii 1.0\n") - elif sys.byteorder == "big": - f.write(b"ply\nformat binary_big_endian 1.0\n") - else: - f.write(b"ply\nformat binary_little_endian 1.0\n") - f.write(f"element vertex {verts.shape[0]}\n".encode("ascii")) - f.write(b"property float x\n") - f.write(b"property float y\n") - f.write(b"property float z\n") - if verts_normals is not None: - f.write(b"property float nx\n") - f.write(b"property float ny\n") - f.write(b"property float nz\n") - if verts_colors is not None: - color_ply_type = b"uchar" if colors_as_uint8 else b"float" - for color in (b"red", b"green", b"blue"): - f.write(b"property " + color_ply_type + b" " + color + b"\n") - if len(verts) and faces is not None: - f.write(f"element face {faces.shape[0]}\n".encode("ascii")) - f.write(b"property list uchar int vertex_index\n") - f.write(b"end_header\n") - - -def _save_ply( - f, - *, - verts: torch.Tensor, - faces: Optional[torch.LongTensor], - verts_normals: Optional[torch.Tensor], - verts_colors: Optional[torch.Tensor], - ascii: bool, - decimal_places: Optional[int] = None, - colors_as_uint8: bool, -) -> None: - """ - Internal implementation for saving 3D data to a .ply file. - - Args: - f: File object to which the 3D data should be written. - verts: FloatTensor of shape (V, 3) giving vertex coordinates. - faces: LongTensor of shape (F, 3) giving faces. - verts_normals: FloatTensor of shape (V, 3) giving vertex normals. - verts_colors: FloatTensor of shape (V, 3) giving vertex colors. - ascii: (bool) whether to use the ascii ply format. - decimal_places: Number of decimal places for saving if ascii=True. - colors_as_uint8: Whether to save colors as numbers in the range - [0, 255] instead of float32. - """ - _write_ply_header( - f, - verts=verts, - faces=faces, - verts_normals=verts_normals, - verts_colors=verts_colors, - ascii=ascii, - colors_as_uint8=colors_as_uint8, - ) - - if not (len(verts)): - warnings.warn("Empty 'verts' provided") - return - - color_np_type = np.ubyte if colors_as_uint8 else np.float32 - verts_dtype = [("verts", np.float32, 3)] - if verts_normals is not None: - verts_dtype.append(("normals", np.float32, 3)) - if verts_colors is not None: - verts_dtype.append(("colors", color_np_type, 3)) - - vert_data = np.zeros(verts.shape[0], dtype=verts_dtype) - vert_data["verts"] = verts.detach().cpu().numpy() - if verts_normals is not None: - vert_data["normals"] = verts_normals.detach().cpu().numpy() - if verts_colors is not None: - color_data = verts_colors.detach().cpu().numpy() - if colors_as_uint8: - vert_data["colors"] = np.rint(color_data * 255) - else: - vert_data["colors"] = color_data - - if ascii: - if decimal_places is None: - float_str = b"%f" - else: - float_str = b"%" + b".%df" % decimal_places - float_group_str = (float_str + b" ") * 3 - formats = [float_group_str] - if verts_normals is not None: - formats.append(float_group_str) - if verts_colors is not None: - formats.append(b"%d %d %d " if colors_as_uint8 else float_group_str) - formats[-1] = formats[-1][:-1] + b"\n" - for line_data in vert_data: - for data, format in zip(line_data, formats): - f.write(format % tuple(data)) - else: - if isinstance(f, BytesIO): - # tofile only works with real files, but is faster than this. - f.write(vert_data.tobytes()) - else: - vert_data.tofile(f) - - if faces is not None: - faces_array = faces.detach().cpu().numpy() - - _check_faces_indices(faces, max_index=verts.shape[0]) - - if len(faces_array): - if ascii: - np.savetxt(f, faces_array, "3 %d %d %d") - else: - faces_recs = np.zeros( - len(faces_array), - dtype=[("count", np.uint8), ("vertex_indices", np.uint32, 3)], - ) - faces_recs["count"] = 3 - faces_recs["vertex_indices"] = faces_array - faces_uints = faces_recs.view(np.uint8) - - if isinstance(f, BytesIO): - f.write(faces_uints.tobytes()) - else: - faces_uints.tofile(f) - - -def save_ply( - f, - verts: torch.Tensor, - faces: Optional[torch.LongTensor] = None, - verts_normals: Optional[torch.Tensor] = None, - ascii: bool = False, - decimal_places: Optional[int] = None, - path_manager: Optional[PathManager] = None, -) -> None: - """ - Save a mesh to a .ply file. - - Args: - f: File (or path) to which the mesh should be written. - verts: FloatTensor of shape (V, 3) giving vertex coordinates. - faces: LongTensor of shape (F, 3) giving faces. - verts_normals: FloatTensor of shape (V, 3) giving vertex normals. - ascii: (bool) whether to use the ascii ply format. - decimal_places: Number of decimal places for saving if ascii=True. - path_manager: PathManager for interpreting f if it is a str. - """ - - if len(verts) and not (verts.dim() == 2 and verts.size(1) == 3): - message = "Argument 'verts' should either be empty or of shape (num_verts, 3)." - raise ValueError(message) - - if ( - faces is not None - and len(faces) - and not (faces.dim() == 2 and faces.size(1) == 3) - ): - message = "Argument 'faces' should either be empty or of shape (num_faces, 3)." - raise ValueError(message) - - if ( - verts_normals is not None - and len(verts_normals) - and not ( - verts_normals.dim() == 2 - and verts_normals.size(1) == 3 - and verts_normals.size(0) == verts.size(0) - ) - ): - message = "Argument 'verts_normals' should either be empty or of shape (num_verts, 3)." - raise ValueError(message) - - if path_manager is None: - path_manager = PathManager() - with _open_file(f, path_manager, "wb") as f: - _save_ply( - f, - verts=verts, - faces=faces, - verts_normals=verts_normals, - verts_colors=None, - ascii=ascii, - decimal_places=decimal_places, - colors_as_uint8=False, - ) - - -class MeshPlyFormat(MeshFormatInterpreter): - def __init__(self) -> None: - self.known_suffixes = (".ply",) - - def read( - self, - path: PathOrStr, - include_textures: bool, - device, - path_manager: PathManager, - **kwargs, - ) -> Optional[Meshes]: - if not endswith(path, self.known_suffixes): - return None - - data = _load_ply(f=path, path_manager=path_manager) - faces = data.faces - if faces is None: - faces = torch.zeros(0, 3, dtype=torch.int64) - - texture = None - if include_textures: - if data.verts_colors is not None: - texture = TexturesVertex([data.verts_colors.to(device)]) - elif data.verts_texture_uvs is not None: - texture_file_path = None - for comment in data.header.comments: - if "TextureFile" in comment: - given_texture_file = comment.split(" ")[-1] - texture_file_path = os.path.join( - os.path.dirname(str(path)), given_texture_file - ) - if texture_file_path is not None: - texture_map = _read_image( - texture_file_path, path_manager, format="RGB" - ) - texture_map = torch.tensor(texture_map, dtype=torch.float32) / 255.0 - texture = TexturesUV( - [texture_map.to(device)], - [faces.to(device)], - [data.verts_texture_uvs.to(device)], - ) - - verts_normals = None - if data.verts_normals is not None: - verts_normals = [data.verts_normals.to(device)] - mesh = Meshes( - verts=[data.verts.to(device)], - faces=[faces.to(device)], - textures=texture, - verts_normals=verts_normals, - ) - return mesh - - def save( - self, - data: Meshes, - path: PathOrStr, - path_manager: PathManager, - binary: Optional[bool], - decimal_places: Optional[int] = None, - colors_as_uint8: bool = False, - **kwargs, - ) -> bool: - """ - Extra optional args: - colors_as_uint8: (bool) Whether to save colors as numbers in the - range [0, 255] instead of float32. - """ - if not endswith(path, self.known_suffixes): - return False - - verts = data.verts_list()[0] - faces = data.faces_list()[0] - - if data.has_verts_normals(): - verts_normals = data.verts_normals_list()[0] - else: - verts_normals = None - - if isinstance(data.textures, TexturesVertex): - mesh_verts_colors = data.textures.verts_features_list()[0] - n_colors = mesh_verts_colors.shape[1] - if n_colors == 3: - verts_colors = mesh_verts_colors - else: - warnings.warn( - f"Texture will not be saved as it has {n_colors} colors, not 3." - ) - verts_colors = None - else: - verts_colors = None - - with _open_file(path, path_manager, "wb") as f: - _save_ply( - f=f, - verts=verts, - faces=faces, - verts_colors=verts_colors, - verts_normals=verts_normals, - ascii=binary is False, - decimal_places=decimal_places, - colors_as_uint8=colors_as_uint8, - ) - return True - - -class PointcloudPlyFormat(PointcloudFormatInterpreter): - def __init__(self) -> None: - self.known_suffixes = (".ply",) - - def read( - self, - path: PathOrStr, - device, - path_manager: PathManager, - **kwargs, - ) -> Optional[Pointclouds]: - if not endswith(path, self.known_suffixes): - return None - - data = _load_ply(f=path, path_manager=path_manager) - features = None - if data.verts_colors is not None: - features = [data.verts_colors.to(device)] - normals = None - if data.verts_normals is not None: - normals = [data.verts_normals.to(device)] - - pointcloud = Pointclouds( - points=[data.verts.to(device)], features=features, normals=normals - ) - return pointcloud - - def save( - self, - data: Pointclouds, - path: PathOrStr, - path_manager: PathManager, - binary: Optional[bool], - decimal_places: Optional[int] = None, - colors_as_uint8: bool = False, - **kwargs, - ) -> bool: - """ - Extra optional args: - colors_as_uint8: (bool) Whether to save colors as numbers in the - range [0, 255] instead of float32. - """ - if not endswith(path, self.known_suffixes): - return False - - points = data.points_list()[0] - features = data.features_packed() - normals = data.normals_packed() - - with _open_file(path, path_manager, "wb") as f: - _save_ply( - f=f, - verts=points, - verts_colors=features, - verts_normals=normals, - faces=None, - ascii=binary is False, - decimal_places=decimal_places, - colors_as_uint8=colors_as_uint8, - ) - return True diff --git a/pytorch3d/pytorch3d/io/utils.py b/pytorch3d/pytorch3d/io/utils.py deleted file mode 100644 index ee437b3a082cf503c9050c3731450941431ee993..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/io/utils.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import contextlib -import pathlib -import warnings -from typing import cast, ContextManager, IO, Optional, Union - -import numpy as np -import torch -from iopath.common.file_io import PathManager -from PIL import Image - -from ..common.datatypes import Device - - -PathOrStr = Union[pathlib.Path, str] - - -def _open_file(f, path_manager: PathManager, mode: str = "r") -> ContextManager[IO]: - if isinstance(f, str): - # pyre-fixme[6]: For 2nd argument expected `Union[typing_extensions.Literal['... - f = path_manager.open(f, mode) - return contextlib.closing(f) - elif isinstance(f, pathlib.Path): - f = f.open(mode) - return contextlib.closing(f) - else: - return contextlib.nullcontext(cast(IO, f)) - - -def _make_tensor( - data, cols: int, dtype: torch.dtype, device: Device = "cpu" -) -> torch.Tensor: - """ - Return a 2D tensor with the specified cols and dtype filled with data, - even when data is empty. - """ - if not len(data): - return torch.zeros((0, cols), dtype=dtype, device=device) - - return torch.tensor(data, dtype=dtype, device=device) - - -def _check_faces_indices( - faces_indices: torch.Tensor, max_index: int, pad_value: Optional[int] = None -) -> torch.Tensor: - if pad_value is None: - mask = torch.ones(faces_indices.shape[:-1]).bool() # Keep all faces - else: - mask = faces_indices.ne(pad_value).any(dim=-1) - if torch.any(faces_indices[mask] >= max_index) or torch.any( - faces_indices[mask] < 0 - ): - warnings.warn("Faces have invalid indices") - return faces_indices - - -def _read_image(file_name: str, path_manager: PathManager, format=None): - """ - Read an image from a file using Pillow. - Args: - file_name: image file path. - path_manager: PathManager for interpreting file_name. - format: one of ["RGB", "BGR"] - Returns: - image: an image of shape (H, W, C). - """ - if format not in ["RGB", "BGR"]: - raise ValueError("format can only be one of [RGB, BGR]; got %s", format) - with path_manager.open(file_name, "rb") as f: - image = Image.open(f) - if format is not None: - # PIL only supports RGB. First convert to RGB and flip channels - # below for BGR. - image = image.convert("RGB") - image = np.asarray(image).astype(np.float32) - if format == "BGR": - image = image[:, :, ::-1] - return image diff --git a/pytorch3d/pytorch3d/loss/__init__.py b/pytorch3d/pytorch3d/loss/__init__.py deleted file mode 100644 index 2b8d10de9c34a3f6a5eb27f060a2eb8db5755344..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/loss/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -from .chamfer import chamfer_distance -from .mesh_edge_loss import mesh_edge_loss -from .mesh_laplacian_smoothing import mesh_laplacian_smoothing -from .mesh_normal_consistency import mesh_normal_consistency -from .point_mesh_distance import point_mesh_edge_distance, point_mesh_face_distance - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/loss/chamfer.py b/pytorch3d/pytorch3d/loss/chamfer.py deleted file mode 100644 index 3ef1d6f42fe8451517024ead5faa50dd4bd35575..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/loss/chamfer.py +++ /dev/null @@ -1,268 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Union - -import torch -import torch.nn.functional as F -from pytorch3d.ops.knn import knn_gather, knn_points -from pytorch3d.structures.pointclouds import Pointclouds - - -def _validate_chamfer_reduction_inputs( - batch_reduction: Union[str, None], point_reduction: Union[str, None] -) -> None: - """Check the requested reductions are valid. - - Args: - batch_reduction: Reduction operation to apply for the loss across the - batch, can be one of ["mean", "sum"] or None. - point_reduction: Reduction operation to apply for the loss across the - points, can be one of ["mean", "sum"] or None. - """ - if batch_reduction is not None and batch_reduction not in ["mean", "sum"]: - raise ValueError('batch_reduction must be one of ["mean", "sum"] or None') - if point_reduction is not None and point_reduction not in ["mean", "sum"]: - raise ValueError('point_reduction must be one of ["mean", "sum"] or None') - if point_reduction is None and batch_reduction is not None: - raise ValueError("Batch reduction must be None if point_reduction is None") - - -def _handle_pointcloud_input( - points: Union[torch.Tensor, Pointclouds], - lengths: Union[torch.Tensor, None], - normals: Union[torch.Tensor, None], -): - """ - If points is an instance of Pointclouds, retrieve the padded points tensor - along with the number of points per batch and the padded normals. - Otherwise, return the input points (and normals) with the number of points per cloud - set to the size of the second dimension of `points`. - """ - if isinstance(points, Pointclouds): - X = points.points_padded() - lengths = points.num_points_per_cloud() - normals = points.normals_padded() # either a tensor or None - elif torch.is_tensor(points): - if points.ndim != 3: - raise ValueError("Expected points to be of shape (N, P, D)") - X = points - if lengths is not None: - if lengths.ndim != 1 or lengths.shape[0] != X.shape[0]: - raise ValueError("Expected lengths to be of shape (N,)") - if lengths.max() > X.shape[1]: - raise ValueError("A length value was too long") - if lengths is None: - lengths = torch.full( - (X.shape[0],), X.shape[1], dtype=torch.int64, device=points.device - ) - if normals is not None and normals.ndim != 3: - raise ValueError("Expected normals to be of shape (N, P, 3") - else: - raise ValueError( - "The input pointclouds should be either " - + "Pointclouds objects or torch.Tensor of shape " - + "(minibatch, num_points, 3)." - ) - return X, lengths, normals - - -def _chamfer_distance_single_direction( - x, - y, - x_lengths, - y_lengths, - x_normals, - y_normals, - weights, - batch_reduction: Union[str, None], - point_reduction: Union[str, None], - norm: int, - abs_cosine: bool, -): - return_normals = x_normals is not None and y_normals is not None - - N, P1, D = x.shape - - # Check if inputs are heterogeneous and create a lengths mask. - is_x_heterogeneous = (x_lengths != P1).any() - x_mask = ( - torch.arange(P1, device=x.device)[None] >= x_lengths[:, None] - ) # shape [N, P1] - if y.shape[0] != N or y.shape[2] != D: - raise ValueError("y does not have the correct shape.") - if weights is not None: - if weights.size(0) != N: - raise ValueError("weights must be of shape (N,).") - if not (weights >= 0).all(): - raise ValueError("weights cannot be negative.") - if weights.sum() == 0.0: - weights = weights.view(N, 1) - if batch_reduction in ["mean", "sum"]: - return ( - (x.sum((1, 2)) * weights).sum() * 0.0, - (x.sum((1, 2)) * weights).sum() * 0.0, - ) - return ((x.sum((1, 2)) * weights) * 0.0, (x.sum((1, 2)) * weights) * 0.0) - - cham_norm_x = x.new_zeros(()) - - x_nn = knn_points(x, y, lengths1=x_lengths, lengths2=y_lengths, norm=norm, K=1) - cham_x = x_nn.dists[..., 0] # (N, P1) - - if is_x_heterogeneous: - cham_x[x_mask] = 0.0 - - if weights is not None: - cham_x *= weights.view(N, 1) - - if return_normals: - # Gather the normals using the indices and keep only value for k=0 - x_normals_near = knn_gather(y_normals, x_nn.idx, y_lengths)[..., 0, :] - - cosine_sim = F.cosine_similarity(x_normals, x_normals_near, dim=2, eps=1e-6) - # If abs_cosine, ignore orientation and take the absolute value of the cosine sim. - cham_norm_x = 1 - (torch.abs(cosine_sim) if abs_cosine else cosine_sim) - - if is_x_heterogeneous: - cham_norm_x[x_mask] = 0.0 - - if weights is not None: - cham_norm_x *= weights.view(N, 1) - - if point_reduction is not None: - # Apply point reduction - cham_x = cham_x.sum(1) # (N,) - if return_normals: - cham_norm_x = cham_norm_x.sum(1) # (N,) - if point_reduction == "mean": - x_lengths_clamped = x_lengths.clamp(min=1) - cham_x /= x_lengths_clamped - if return_normals: - cham_norm_x /= x_lengths_clamped - - if batch_reduction is not None: - # batch_reduction == "sum" - cham_x = cham_x.sum() - if return_normals: - cham_norm_x = cham_norm_x.sum() - if batch_reduction == "mean": - div = weights.sum() if weights is not None else max(N, 1) - cham_x /= div - if return_normals: - cham_norm_x /= div - - cham_dist = cham_x - cham_normals = cham_norm_x if return_normals else None - return cham_dist, cham_normals - - -def chamfer_distance( - x, - y, - x_lengths=None, - y_lengths=None, - x_normals=None, - y_normals=None, - weights=None, - batch_reduction: Union[str, None] = "mean", - point_reduction: Union[str, None] = "mean", - norm: int = 2, - single_directional: bool = False, - abs_cosine: bool = True, -): - """ - Chamfer distance between two pointclouds x and y. - - Args: - x: FloatTensor of shape (N, P1, D) or a Pointclouds object representing - a batch of point clouds with at most P1 points in each batch element, - batch size N and feature dimension D. - y: FloatTensor of shape (N, P2, D) or a Pointclouds object representing - a batch of point clouds with at most P2 points in each batch element, - batch size N and feature dimension D. - x_lengths: Optional LongTensor of shape (N,) giving the number of points in each - cloud in x. - y_lengths: Optional LongTensor of shape (N,) giving the number of points in each - cloud in y. - x_normals: Optional FloatTensor of shape (N, P1, D). - y_normals: Optional FloatTensor of shape (N, P2, D). - weights: Optional FloatTensor of shape (N,) giving weights for - batch elements for reduction operation. - batch_reduction: Reduction operation to apply for the loss across the - batch, can be one of ["mean", "sum"] or None. - point_reduction: Reduction operation to apply for the loss across the - points, can be one of ["mean", "sum"] or None. - norm: int indicates the norm used for the distance. Supports 1 for L1 and 2 for L2. - single_directional: If False (default), loss comes from both the distance between - each point in x and its nearest neighbor in y and each point in y and its nearest - neighbor in x. If True, loss is the distance between each point in x and its - nearest neighbor in y. - abs_cosine: If False, loss_normals is from one minus the cosine similarity. - If True (default), loss_normals is from one minus the absolute value of the - cosine similarity, which means that exactly opposite normals are considered - equivalent to exactly matching normals, i.e. sign does not matter. - - Returns: - 2-element tuple containing - - - **loss**: Tensor giving the reduced distance between the pointclouds - in x and the pointclouds in y. If point_reduction is None, a 2-element - tuple of Tensors containing forward and backward loss terms shaped (N, P1) - and (N, P2) (if single_directional is False) or a Tensor containing loss - terms shaped (N, P1) (if single_directional is True) is returned. - - **loss_normals**: Tensor giving the reduced cosine distance of normals - between pointclouds in x and pointclouds in y. Returns None if - x_normals and y_normals are None. If point_reduction is None, a 2-element - tuple of Tensors containing forward and backward loss terms shaped (N, P1) - and (N, P2) (if single_directional is False) or a Tensor containing loss - terms shaped (N, P1) (if single_directional is True) is returned. - """ - _validate_chamfer_reduction_inputs(batch_reduction, point_reduction) - - if not ((norm == 1) or (norm == 2)): - raise ValueError("Support for 1 or 2 norm.") - x, x_lengths, x_normals = _handle_pointcloud_input(x, x_lengths, x_normals) - y, y_lengths, y_normals = _handle_pointcloud_input(y, y_lengths, y_normals) - - cham_x, cham_norm_x = _chamfer_distance_single_direction( - x, - y, - x_lengths, - y_lengths, - x_normals, - y_normals, - weights, - batch_reduction, - point_reduction, - norm, - abs_cosine, - ) - if single_directional: - return cham_x, cham_norm_x - else: - cham_y, cham_norm_y = _chamfer_distance_single_direction( - y, - x, - y_lengths, - x_lengths, - y_normals, - x_normals, - weights, - batch_reduction, - point_reduction, - norm, - abs_cosine, - ) - if point_reduction is not None: - return ( - cham_x + cham_y, - (cham_norm_x + cham_norm_y) if cham_norm_x is not None else None, - ) - return ( - (cham_x, cham_y), - (cham_norm_x, cham_norm_y) if cham_norm_x is not None else None, - ) diff --git a/pytorch3d/pytorch3d/loss/mesh_edge_loss.py b/pytorch3d/pytorch3d/loss/mesh_edge_loss.py deleted file mode 100644 index e54ddf9fdf57cb13b756bc213c6d0f60852fd642..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/loss/mesh_edge_loss.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch - - -def mesh_edge_loss(meshes, target_length: float = 0.0): - """ - Computes mesh edge length regularization loss averaged across all meshes - in a batch. Each mesh contributes equally to the final loss, regardless of - the number of edges per mesh in the batch by weighting each mesh with the - inverse number of edges. For example, if mesh 3 (out of N) has only E=4 - edges, then the loss for each edge in mesh 3 should be multiplied by 1/E to - contribute to the final loss. - - Args: - meshes: Meshes object with a batch of meshes. - target_length: Resting value for the edge length. - - Returns: - loss: Average loss across the batch. Returns 0 if meshes contains - no meshes or all empty meshes. - """ - if meshes.isempty(): - return torch.tensor( - [0.0], dtype=torch.float32, device=meshes.device, requires_grad=True - ) - - N = len(meshes) - edges_packed = meshes.edges_packed() # (sum(E_n), 3) - verts_packed = meshes.verts_packed() # (sum(V_n), 3) - edge_to_mesh_idx = meshes.edges_packed_to_mesh_idx() # (sum(E_n), ) - num_edges_per_mesh = meshes.num_edges_per_mesh() # N - - # Determine the weight for each edge based on the number of edges in the - # mesh it corresponds to. - # TODO (nikhilar) Find a faster way of computing the weights for each edge - # as this is currently a bottleneck for meshes with a large number of faces. - weights = num_edges_per_mesh.gather(0, edge_to_mesh_idx) - weights = 1.0 / weights.float() - - verts_edges = verts_packed[edges_packed] - v0, v1 = verts_edges.unbind(1) - loss = ((v0 - v1).norm(dim=1, p=2) - target_length) ** 2.0 - loss = loss * weights - - return loss.sum() / N diff --git a/pytorch3d/pytorch3d/loss/mesh_laplacian_smoothing.py b/pytorch3d/pytorch3d/loss/mesh_laplacian_smoothing.py deleted file mode 100644 index 3ce9298a15169e6c070d2b60edd4d1e442a9d987..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/loss/mesh_laplacian_smoothing.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import torch -from pytorch3d.ops import cot_laplacian - - -def mesh_laplacian_smoothing(meshes, method: str = "uniform"): - r""" - Computes the laplacian smoothing objective for a batch of meshes. - This function supports three variants of Laplacian smoothing, - namely with uniform weights("uniform"), with cotangent weights ("cot"), - and cotangent curvature ("cotcurv").For more details read [1, 2]. - - Args: - meshes: Meshes object with a batch of meshes. - method: str specifying the method for the laplacian. - Returns: - loss: Average laplacian smoothing loss across the batch. - Returns 0 if meshes contains no meshes or all empty meshes. - - Consider a mesh M = (V, F), with verts of shape Nx3 and faces of shape Mx3. - The Laplacian matrix L is a NxN tensor such that LV gives a tensor of vectors: - for a uniform Laplacian, LuV[i] points to the centroid of its neighboring - vertices, a cotangent Laplacian LcV[i] is known to be an approximation of - the surface normal, while the curvature variant LckV[i] scales the normals - by the discrete mean curvature. For vertex i, assume S[i] is the set of - neighboring vertices to i, a_ij and b_ij are the "outside" angles in the - two triangles connecting vertex v_i and its neighboring vertex v_j - for j in S[i], as seen in the diagram below. - - .. code-block:: python - - a_ij - /\ - / \ - / \ - / \ - v_i /________\ v_j - \ / - \ / - \ / - \ / - \/ - b_ij - - The definition of the Laplacian is LV[i] = sum_j w_ij (v_j - v_i) - For the uniform variant, w_ij = 1 / |S[i]| - For the cotangent variant, - w_ij = (cot a_ij + cot b_ij) / (sum_k cot a_ik + cot b_ik) - For the cotangent curvature, w_ij = (cot a_ij + cot b_ij) / (4 A[i]) - where A[i] is the sum of the areas of all triangles containing vertex v_i. - - There is a nice trigonometry identity to compute cotangents. Consider a triangle - with side lengths A, B, C and angles a, b, c. - - .. code-block:: python - - c - /|\ - / | \ - / | \ - B / H| \ A - / | \ - / | \ - /a_____|_____b\ - C - - Then cot a = (B^2 + C^2 - A^2) / 4 * area - We know that area = CH/2, and by the law of cosines we have - - A^2 = B^2 + C^2 - 2BC cos a => B^2 + C^2 - A^2 = 2BC cos a - - Putting these together, we get: - - B^2 + C^2 - A^2 2BC cos a - _______________ = _________ = (B/H) cos a = cos a / sin a = cot a - 4 * area 2CH - - - [1] Desbrun et al, "Implicit fairing of irregular meshes using diffusion - and curvature flow", SIGGRAPH 1999. - - [2] Nealan et al, "Laplacian Mesh Optimization", Graphite 2006. - """ - - if meshes.isempty(): - return torch.tensor( - [0.0], dtype=torch.float32, device=meshes.device, requires_grad=True - ) - - N = len(meshes) - verts_packed = meshes.verts_packed() # (sum(V_n), 3) - faces_packed = meshes.faces_packed() # (sum(F_n), 3) - num_verts_per_mesh = meshes.num_verts_per_mesh() # (N,) - verts_packed_idx = meshes.verts_packed_to_mesh_idx() # (sum(V_n),) - weights = num_verts_per_mesh.gather(0, verts_packed_idx) # (sum(V_n),) - weights = 1.0 / weights.float() - - # We don't want to backprop through the computation of the Laplacian; - # just treat it as a magic constant matrix that is used to transform - # verts into normals - with torch.no_grad(): - if method == "uniform": - L = meshes.laplacian_packed() - elif method in ["cot", "cotcurv"]: - L, inv_areas = cot_laplacian(verts_packed, faces_packed) - if method == "cot": - norm_w = torch.sparse.sum(L, dim=1).to_dense().view(-1, 1) - idx = norm_w > 0 - # pyre-fixme[58]: `/` is not supported for operand types `float` and - # `Tensor`. - norm_w[idx] = 1.0 / norm_w[idx] - else: - L_sum = torch.sparse.sum(L, dim=1).to_dense().view(-1, 1) - norm_w = 0.25 * inv_areas - else: - raise ValueError("Method should be one of {uniform, cot, cotcurv}") - - if method == "uniform": - loss = L.mm(verts_packed) - elif method == "cot": - # pyre-fixme[61]: `norm_w` is undefined, or not always defined. - loss = L.mm(verts_packed) * norm_w - verts_packed - elif method == "cotcurv": - # pyre-fixme[61]: `norm_w` may not be initialized here. - loss = (L.mm(verts_packed) - L_sum * verts_packed) * norm_w - loss = loss.norm(dim=1) - - loss = loss * weights - return loss.sum() / N diff --git a/pytorch3d/pytorch3d/loss/mesh_normal_consistency.py b/pytorch3d/pytorch3d/loss/mesh_normal_consistency.py deleted file mode 100644 index a1dbf670707e590bdf12d9589f5378ee5526c6ac..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/loss/mesh_normal_consistency.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch -from pytorch3d import _C - - -def mesh_normal_consistency(meshes): - r""" - Computes the normal consistency of each mesh in meshes. - We compute the normal consistency for each pair of neighboring faces. - If e = (v0, v1) is the connecting edge of two neighboring faces f0 and f1, - then the normal consistency between f0 and f1 - - .. code-block:: python - - a - /\ - / \ - / f0 \ - / \ - v0 /____e___\ v1 - \ / - \ / - \ f1 / - \ / - \/ - b - - The normal consistency is - - .. code-block:: python - - nc(f0, f1) = 1 - cos(n0, n1) - - where cos(n0, n1) = n0^n1 / ||n0|| / ||n1|| is the cosine of the angle - between the normals n0 and n1, and - - n0 = (v1 - v0) x (a - v0) - n1 = - (v1 - v0) x (b - v0) = (b - v0) x (v1 - v0) - - This means that if nc(f0, f1) = 0 then n0 and n1 point to the same - direction, while if nc(f0, f1) = 2 then n0 and n1 point opposite direction. - - .. note:: - For well-constructed meshes the assumption that only two faces share an - edge is true. This assumption could make the implementation easier and faster. - This implementation does not follow this assumption. All the faces sharing e, - which can be any in number, are discovered. - - Args: - meshes: Meshes object with a batch of meshes. - - Returns: - loss: Average normal consistency across the batch. - Returns 0 if meshes contains no meshes or all empty meshes. - """ - if meshes.isempty(): - return torch.tensor( - [0.0], dtype=torch.float32, device=meshes.device, requires_grad=True - ) - - N = len(meshes) - verts_packed = meshes.verts_packed() # (sum(V_n), 3) - faces_packed = meshes.faces_packed() # (sum(F_n), 3) - edges_packed = meshes.edges_packed() # (sum(E_n), 2) - verts_packed_to_mesh_idx = meshes.verts_packed_to_mesh_idx() # (sum(V_n),) - face_to_edge = meshes.faces_packed_to_edges_packed() # (sum(F_n), 3) - E = edges_packed.shape[0] # sum(E_n) - F = faces_packed.shape[0] # sum(F_n) - - # We don't want gradients for the following operation. The goal is to - # find for each edge e all the vertices associated with e. In the example - # above, the vertices associated with e are (a, b), i.e. the points connected - # on faces to e. - with torch.no_grad(): - edge_idx = face_to_edge.reshape(F * 3) # (3 * F,) indexes into edges - vert_idx = ( - faces_packed.view(1, F, 3).expand(3, F, 3).transpose(0, 1).reshape(3 * F, 3) - ) - edge_idx, edge_sort_idx = edge_idx.sort() - vert_idx = vert_idx[edge_sort_idx] - - # In well constructed meshes each edge is shared by precisely 2 faces - # However, in many meshes, this assumption is not always satisfied. - # We want to find all faces that share an edge, a number which can - # vary and which depends on the topology. - # In particular, we find the vertices not on the edge on the shared faces. - # In the example above, we want to associate edge e with vertices a and b. - # This operation is done more efficiently in cpu with lists. - # TODO(gkioxari) find a better way to do this. - - # edge_idx represents the index of the edge for each vertex. We can count - # the number of vertices which are associated with each edge. - # There can be a different number for each edge. - edge_num = edge_idx.bincount(minlength=E) - - # This calculates all pairs of vertices which are opposite to the same edge. - vert_edge_pair_idx = _C.mesh_normal_consistency_find_verts(edge_num.cpu()).to( - edge_num.device - ) - - if vert_edge_pair_idx.shape[0] == 0: - return torch.tensor( - [0.0], dtype=torch.float32, device=meshes.device, requires_grad=True - ) - - v0_idx = edges_packed[edge_idx, 0] - v0 = verts_packed[v0_idx] - v1_idx = edges_packed[edge_idx, 1] - v1 = verts_packed[v1_idx] - - # two of the following cross products are zeros as they are cross product - # with either (v1-v0)x(v1-v0) or (v1-v0)x(v0-v0) - n_temp0 = (v1 - v0).cross(verts_packed[vert_idx[:, 0]] - v0, dim=1) - n_temp1 = (v1 - v0).cross(verts_packed[vert_idx[:, 1]] - v0, dim=1) - n_temp2 = (v1 - v0).cross(verts_packed[vert_idx[:, 2]] - v0, dim=1) - n = n_temp0 + n_temp1 + n_temp2 - n0 = n[vert_edge_pair_idx[:, 0]] - n1 = -n[vert_edge_pair_idx[:, 1]] - loss = 1 - torch.cosine_similarity(n0, n1, dim=1) - - verts_packed_to_mesh_idx = verts_packed_to_mesh_idx[vert_idx[:, 0]] - verts_packed_to_mesh_idx = verts_packed_to_mesh_idx[vert_edge_pair_idx[:, 0]] - num_normals = verts_packed_to_mesh_idx.bincount(minlength=N) - weights = 1.0 / num_normals[verts_packed_to_mesh_idx].float() - - loss = loss * weights - return loss.sum() / N diff --git a/pytorch3d/pytorch3d/loss/point_mesh_distance.py b/pytorch3d/pytorch3d/loss/point_mesh_distance.py deleted file mode 100644 index fc45bc124e8a4711ff080ab9ed89db1649d1d809..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/loss/point_mesh_distance.py +++ /dev/null @@ -1,396 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from pytorch3d import _C -from pytorch3d.structures import Meshes, Pointclouds -from torch.autograd import Function -from torch.autograd.function import once_differentiable - - -""" -This file defines distances between meshes and pointclouds. -The functions make use of the definition of a distance between a point and -an edge segment or the distance of a point and a triangle (face). - -The exact mathematical formulations and implementations of these -distances can be found in `csrc/utils/geometry_utils.cuh`. -""" - -_DEFAULT_MIN_TRIANGLE_AREA: float = 5e-3 - - -# PointFaceDistance -class _PointFaceDistance(Function): - """ - Torch autograd Function wrapper PointFaceDistance Cuda implementation - """ - - @staticmethod - def forward( - ctx, - points, - points_first_idx, - tris, - tris_first_idx, - max_points, - min_triangle_area=_DEFAULT_MIN_TRIANGLE_AREA, - ): - """ - Args: - ctx: Context object used to calculate gradients. - points: FloatTensor of shape `(P, 3)` - points_first_idx: LongTensor of shape `(N,)` indicating the first point - index in each example in the batch - tris: FloatTensor of shape `(T, 3, 3)` of triangular faces. The `t`-th - triangular face is spanned by `(tris[t, 0], tris[t, 1], tris[t, 2])` - tris_first_idx: LongTensor of shape `(N,)` indicating the first face - index in each example in the batch - max_points: Scalar equal to maximum number of points in the batch - min_triangle_area: (float, defaulted) Triangles of area less than this - will be treated as points/lines. - Returns: - dists: FloatTensor of shape `(P,)`, where `dists[p]` is the squared - euclidean distance of `p`-th point to the closest triangular face - in the corresponding example in the batch - idxs: LongTensor of shape `(P,)` indicating the closest triangular face - in the corresponding example in the batch. - - `dists[p]` is - `d(points[p], tris[idxs[p], 0], tris[idxs[p], 1], tris[idxs[p], 2])` - where `d(u, v0, v1, v2)` is the distance of point `u` from the triangular - face `(v0, v1, v2)` - - """ - dists, idxs = _C.point_face_dist_forward( - points, - points_first_idx, - tris, - tris_first_idx, - max_points, - min_triangle_area, - ) - ctx.save_for_backward(points, tris, idxs) - ctx.min_triangle_area = min_triangle_area - return dists - - @staticmethod - @once_differentiable - def backward(ctx, grad_dists): - grad_dists = grad_dists.contiguous() - points, tris, idxs = ctx.saved_tensors - min_triangle_area = ctx.min_triangle_area - grad_points, grad_tris = _C.point_face_dist_backward( - points, tris, idxs, grad_dists, min_triangle_area - ) - return grad_points, None, grad_tris, None, None, None - - -point_face_distance = _PointFaceDistance.apply - - -# FacePointDistance -class _FacePointDistance(Function): - """ - Torch autograd Function wrapper FacePointDistance Cuda implementation - """ - - @staticmethod - def forward( - ctx, - points, - points_first_idx, - tris, - tris_first_idx, - max_tris, - min_triangle_area=_DEFAULT_MIN_TRIANGLE_AREA, - ): - """ - Args: - ctx: Context object used to calculate gradients. - points: FloatTensor of shape `(P, 3)` - points_first_idx: LongTensor of shape `(N,)` indicating the first point - index in each example in the batch - tris: FloatTensor of shape `(T, 3, 3)` of triangular faces. The `t`-th - triangular face is spanned by `(tris[t, 0], tris[t, 1], tris[t, 2])` - tris_first_idx: LongTensor of shape `(N,)` indicating the first face - index in each example in the batch - max_tris: Scalar equal to maximum number of faces in the batch - min_triangle_area: (float, defaulted) Triangles of area less than this - will be treated as points/lines. - Returns: - dists: FloatTensor of shape `(T,)`, where `dists[t]` is the squared - euclidean distance of `t`-th triangular face to the closest point in the - corresponding example in the batch - idxs: LongTensor of shape `(T,)` indicating the closest point in the - corresponding example in the batch. - - `dists[t] = d(points[idxs[t]], tris[t, 0], tris[t, 1], tris[t, 2])`, - where `d(u, v0, v1, v2)` is the distance of point `u` from the triangular - face `(v0, v1, v2)`. - """ - dists, idxs = _C.face_point_dist_forward( - points, points_first_idx, tris, tris_first_idx, max_tris, min_triangle_area - ) - ctx.save_for_backward(points, tris, idxs) - ctx.min_triangle_area = min_triangle_area - return dists - - @staticmethod - @once_differentiable - def backward(ctx, grad_dists): - grad_dists = grad_dists.contiguous() - points, tris, idxs = ctx.saved_tensors - min_triangle_area = ctx.min_triangle_area - grad_points, grad_tris = _C.face_point_dist_backward( - points, tris, idxs, grad_dists, min_triangle_area - ) - return grad_points, None, grad_tris, None, None, None - - -face_point_distance = _FacePointDistance.apply - - -# PointEdgeDistance -class _PointEdgeDistance(Function): - """ - Torch autograd Function wrapper PointEdgeDistance Cuda implementation - """ - - @staticmethod - def forward(ctx, points, points_first_idx, segms, segms_first_idx, max_points): - """ - Args: - ctx: Context object used to calculate gradients. - points: FloatTensor of shape `(P, 3)` - points_first_idx: LongTensor of shape `(N,)` indicating the first point - index for each example in the mesh - segms: FloatTensor of shape `(S, 2, 3)` of edge segments. The `s`-th - edge segment is spanned by `(segms[s, 0], segms[s, 1])` - segms_first_idx: LongTensor of shape `(N,)` indicating the first edge - index for each example in the mesh - max_points: Scalar equal to maximum number of points in the batch - Returns: - dists: FloatTensor of shape `(P,)`, where `dists[p]` is the squared - euclidean distance of `p`-th point to the closest edge in the - corresponding example in the batch - idxs: LongTensor of shape `(P,)` indicating the closest edge in the - corresponding example in the batch. - - `dists[p] = d(points[p], segms[idxs[p], 0], segms[idxs[p], 1])`, - where `d(u, v0, v1)` is the distance of point `u` from the edge segment - spanned by `(v0, v1)`. - """ - dists, idxs = _C.point_edge_dist_forward( - points, points_first_idx, segms, segms_first_idx, max_points - ) - ctx.save_for_backward(points, segms, idxs) - return dists - - @staticmethod - @once_differentiable - def backward(ctx, grad_dists): - grad_dists = grad_dists.contiguous() - points, segms, idxs = ctx.saved_tensors - grad_points, grad_segms = _C.point_edge_dist_backward( - points, segms, idxs, grad_dists - ) - return grad_points, None, grad_segms, None, None - - -point_edge_distance = _PointEdgeDistance.apply - - -# EdgePointDistance -class _EdgePointDistance(Function): - """ - Torch autograd Function wrapper EdgePointDistance Cuda implementation - """ - - @staticmethod - def forward(ctx, points, points_first_idx, segms, segms_first_idx, max_segms): - """ - Args: - ctx: Context object used to calculate gradients. - points: FloatTensor of shape `(P, 3)` - points_first_idx: LongTensor of shape `(N,)` indicating the first point - index for each example in the mesh - segms: FloatTensor of shape `(S, 2, 3)` of edge segments. The `s`-th - edge segment is spanned by `(segms[s, 0], segms[s, 1])` - segms_first_idx: LongTensor of shape `(N,)` indicating the first edge - index for each example in the mesh - max_segms: Scalar equal to maximum number of edges in the batch - Returns: - dists: FloatTensor of shape `(S,)`, where `dists[s]` is the squared - euclidean distance of `s`-th edge to the closest point in the - corresponding example in the batch - idxs: LongTensor of shape `(S,)` indicating the closest point in the - corresponding example in the batch. - - `dists[s] = d(points[idxs[s]], edges[s, 0], edges[s, 1])`, - where `d(u, v0, v1)` is the distance of point `u` from the segment - spanned by `(v0, v1)`. - """ - dists, idxs = _C.edge_point_dist_forward( - points, points_first_idx, segms, segms_first_idx, max_segms - ) - ctx.save_for_backward(points, segms, idxs) - return dists - - @staticmethod - @once_differentiable - def backward(ctx, grad_dists): - grad_dists = grad_dists.contiguous() - points, segms, idxs = ctx.saved_tensors - grad_points, grad_segms = _C.edge_point_dist_backward( - points, segms, idxs, grad_dists - ) - return grad_points, None, grad_segms, None, None - - -edge_point_distance = _EdgePointDistance.apply - - -def point_mesh_edge_distance(meshes: Meshes, pcls: Pointclouds): - """ - Computes the distance between a pointcloud and a mesh within a batch. - Given a pair `(mesh, pcl)` in the batch, we define the distance to be the - sum of two distances, namely `point_edge(mesh, pcl) + edge_point(mesh, pcl)` - - `point_edge(mesh, pcl)`: Computes the squared distance of each point p in pcl - to the closest edge segment in mesh and averages across all points in pcl - `edge_point(mesh, pcl)`: Computes the squared distance of each edge segment in mesh - to the closest point in pcl and averages across all edges in mesh. - - The above distance functions are applied for all `(mesh, pcl)` pairs in the batch - and then averaged across the batch. - - Args: - meshes: A Meshes data structure containing N meshes - pcls: A Pointclouds data structure containing N pointclouds - - Returns: - loss: The `point_edge(mesh, pcl) + edge_point(mesh, pcl)` distance - between all `(mesh, pcl)` in a batch averaged across the batch. - """ - if len(meshes) != len(pcls): - raise ValueError("meshes and pointclouds must be equal sized batches") - N = len(meshes) - - # packed representation for pointclouds - points = pcls.points_packed() # (P, 3) - points_first_idx = pcls.cloud_to_packed_first_idx() - max_points = pcls.num_points_per_cloud().max().item() - - # packed representation for edges - verts_packed = meshes.verts_packed() - edges_packed = meshes.edges_packed() - segms = verts_packed[edges_packed] # (S, 2, 3) - segms_first_idx = meshes.mesh_to_edges_packed_first_idx() - max_segms = meshes.num_edges_per_mesh().max().item() - - # point to edge distance: shape (P,) - point_to_edge = point_edge_distance( - points, points_first_idx, segms, segms_first_idx, max_points - ) - - # weight each example by the inverse of number of points in the example - point_to_cloud_idx = pcls.packed_to_cloud_idx() # (sum(P_i), ) - num_points_per_cloud = pcls.num_points_per_cloud() # (N,) - weights_p = num_points_per_cloud.gather(0, point_to_cloud_idx) - # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. - weights_p = 1.0 / weights_p.float() - point_to_edge = point_to_edge * weights_p - point_dist = point_to_edge.sum() / N - - # edge to edge distance: shape (S,) - edge_to_point = edge_point_distance( - points, points_first_idx, segms, segms_first_idx, max_segms - ) - - # weight each example by the inverse of number of edges in the example - segm_to_mesh_idx = meshes.edges_packed_to_mesh_idx() # (sum(S_n),) - num_segms_per_mesh = meshes.num_edges_per_mesh() # (N,) - weights_s = num_segms_per_mesh.gather(0, segm_to_mesh_idx) - weights_s = 1.0 / weights_s.float() - edge_to_point = edge_to_point * weights_s - edge_dist = edge_to_point.sum() / N - - return point_dist + edge_dist - - -def point_mesh_face_distance( - meshes: Meshes, - pcls: Pointclouds, - min_triangle_area: float = _DEFAULT_MIN_TRIANGLE_AREA, -): - """ - Computes the distance between a pointcloud and a mesh within a batch. - Given a pair `(mesh, pcl)` in the batch, we define the distance to be the - sum of two distances, namely `point_face(mesh, pcl) + face_point(mesh, pcl)` - - `point_face(mesh, pcl)`: Computes the squared distance of each point p in pcl - to the closest triangular face in mesh and averages across all points in pcl - `face_point(mesh, pcl)`: Computes the squared distance of each triangular face in - mesh to the closest point in pcl and averages across all faces in mesh. - - The above distance functions are applied for all `(mesh, pcl)` pairs in the batch - and then averaged across the batch. - - Args: - meshes: A Meshes data structure containing N meshes - pcls: A Pointclouds data structure containing N pointclouds - min_triangle_area: (float, defaulted) Triangles of area less than this - will be treated as points/lines. - - Returns: - loss: The `point_face(mesh, pcl) + face_point(mesh, pcl)` distance - between all `(mesh, pcl)` in a batch averaged across the batch. - """ - - if len(meshes) != len(pcls): - raise ValueError("meshes and pointclouds must be equal sized batches") - N = len(meshes) - - # packed representation for pointclouds - points = pcls.points_packed() # (P, 3) - points_first_idx = pcls.cloud_to_packed_first_idx() - max_points = pcls.num_points_per_cloud().max().item() - - # packed representation for faces - verts_packed = meshes.verts_packed() - faces_packed = meshes.faces_packed() - tris = verts_packed[faces_packed] # (T, 3, 3) - tris_first_idx = meshes.mesh_to_faces_packed_first_idx() - max_tris = meshes.num_faces_per_mesh().max().item() - - # point to face distance: shape (P,) - point_to_face = point_face_distance( - points, points_first_idx, tris, tris_first_idx, max_points, min_triangle_area - ) - - # weight each example by the inverse of number of points in the example - point_to_cloud_idx = pcls.packed_to_cloud_idx() # (sum(P_i),) - num_points_per_cloud = pcls.num_points_per_cloud() # (N,) - weights_p = num_points_per_cloud.gather(0, point_to_cloud_idx) - # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. - weights_p = 1.0 / weights_p.float() - point_to_face = point_to_face * weights_p - point_dist = point_to_face.sum() / N - - # face to point distance: shape (T,) - face_to_point = face_point_distance( - points, points_first_idx, tris, tris_first_idx, max_tris, min_triangle_area - ) - - # weight each example by the inverse of number of faces in the example - tri_to_mesh_idx = meshes.faces_packed_to_mesh_idx() # (sum(T_n),) - num_tris_per_mesh = meshes.num_faces_per_mesh() # (N, ) - weights_t = num_tris_per_mesh.gather(0, tri_to_mesh_idx) - weights_t = 1.0 / weights_t.float() - face_to_point = face_to_point * weights_t - face_dist = face_to_point.sum() / N - - return point_dist + face_dist diff --git a/pytorch3d/pytorch3d/ops/__init__.py b/pytorch3d/pytorch3d/ops/__init__.py deleted file mode 100644 index 9e561ccdc414e9f5f0428cbabe0e325c70f0a85b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .ball_query import ball_query -from .cameras_alignment import corresponding_cameras_alignment -from .cubify import cubify -from .graph_conv import GraphConv -from .interp_face_attrs import interpolate_face_attributes -from .iou_box3d import box3d_overlap -from .knn import knn_gather, knn_points -from .laplacian_matrices import cot_laplacian, laplacian, norm_laplacian -from .mesh_face_areas_normals import mesh_face_areas_normals -from .mesh_filtering import taubin_smoothing -from .packed_to_padded import packed_to_padded, padded_to_packed -from .perspective_n_points import efficient_pnp -from .points_alignment import corresponding_points_alignment, iterative_closest_point -from .points_normals import ( - estimate_pointcloud_local_coord_frames, - estimate_pointcloud_normals, -) -from .points_to_volumes import ( - add_pointclouds_to_volumes, - add_points_features_to_volume_densities_features, -) -from .sample_farthest_points import sample_farthest_points -from .sample_points_from_meshes import sample_points_from_meshes -from .subdivide_meshes import SubdivideMeshes -from .utils import ( - convert_pointclouds_to_tensor, - eyes, - get_point_covariances, - is_pointclouds, - wmean, -) -from .vert_align import vert_align - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/ops/ball_query.py b/pytorch3d/pytorch3d/ops/ball_query.py deleted file mode 100644 index af271a40a1626ad8e0fe0340e56800d8b91d3778..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/ball_query.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Union - -import torch -from pytorch3d import _C -from torch.autograd import Function -from torch.autograd.function import once_differentiable - -from .knn import _KNN -from .utils import masked_gather - - -class _ball_query(Function): - """ - Torch autograd Function wrapper for Ball Query C++/CUDA implementations. - """ - - @staticmethod - def forward(ctx, p1, p2, lengths1, lengths2, K, radius): - """ - Arguments defintions the same as in the ball_query function - """ - idx, dists = _C.ball_query(p1, p2, lengths1, lengths2, K, radius) - ctx.save_for_backward(p1, p2, lengths1, lengths2, idx) - ctx.mark_non_differentiable(idx) - return dists, idx - - @staticmethod - @once_differentiable - def backward(ctx, grad_dists, grad_idx): - p1, p2, lengths1, lengths2, idx = ctx.saved_tensors - # TODO(gkioxari) Change cast to floats once we add support for doubles. - if not (grad_dists.dtype == torch.float32): - grad_dists = grad_dists.float() - if not (p1.dtype == torch.float32): - p1 = p1.float() - if not (p2.dtype == torch.float32): - p2 = p2.float() - - # Reuse the KNN backward function - # by default, norm is 2 - grad_p1, grad_p2 = _C.knn_points_backward( - p1, p2, lengths1, lengths2, idx, 2, grad_dists - ) - return grad_p1, grad_p2, None, None, None, None - - -def ball_query( - p1: torch.Tensor, - p2: torch.Tensor, - lengths1: Union[torch.Tensor, None] = None, - lengths2: Union[torch.Tensor, None] = None, - K: int = 500, - radius: float = 0.2, - return_nn: bool = True, -): - """ - Ball Query is an alternative to KNN. It can be - used to find all points in p2 that are within a specified radius - to the query point in p1 (with an upper limit of K neighbors). - - The neighbors returned are not necssarily the *nearest* to the - point in p1, just the first K values in p2 which are within the - specified radius. - - This method is faster than kNN when there are large numbers of points - in p2 and the ordering of neighbors is not important compared to the - distance being within the radius threshold. - - "Ball query’s local neighborhood guarantees a fixed region scale thus - making local region features more generalizable across space, which is - preferred for tasks requiring local pattern recognition - (e.g. semantic point labeling)" [1]. - - [1] Charles R. Qi et al, "PointNet++: Deep Hierarchical Feature Learning - on Point Sets in a Metric Space", NeurIPS 2017. - - Args: - p1: Tensor of shape (N, P1, D) giving a batch of N point clouds, each - containing up to P1 points of dimension D. These represent the centers of - the ball queries. - p2: Tensor of shape (N, P2, D) giving a batch of N point clouds, each - containing up to P2 points of dimension D. - lengths1: LongTensor of shape (N,) of values in the range [0, P1], giving the - length of each pointcloud in p1. Or None to indicate that every cloud has - length P1. - lengths2: LongTensor of shape (N,) of values in the range [0, P2], giving the - length of each pointcloud in p2. Or None to indicate that every cloud has - length P2. - K: Integer giving the upper bound on the number of samples to take - within the radius - radius: the radius around each point within which the neighbors need to be located - return_nn: If set to True returns the K neighbor points in p2 for each point in p1. - - Returns: - dists: Tensor of shape (N, P1, K) giving the squared distances to - the neighbors. This is padded with zeros both where a cloud in p2 - has fewer than S points and where a cloud in p1 has fewer than P1 points - and also if there are fewer than K points which satisfy the radius threshold. - - idx: LongTensor of shape (N, P1, K) giving the indices of the - S neighbors in p2 for points in p1. - Concretely, if `p1_idx[n, i, k] = j` then `p2[n, j]` is the k-th - neighbor to `p1[n, i]` in `p2[n]`. This is padded with -1 both where a cloud - in p2 has fewer than S points and where a cloud in p1 has fewer than P1 - points and also if there are fewer than K points which satisfy the radius threshold. - - nn: Tensor of shape (N, P1, K, D) giving the K neighbors in p2 for - each point in p1. Concretely, `p2_nn[n, i, k]` gives the k-th neighbor - for `p1[n, i]`. Returned if `return_nn` is True. The output is a tensor - of shape (N, P1, K, U). - - """ - if p1.shape[0] != p2.shape[0]: - raise ValueError("pts1 and pts2 must have the same batch dimension.") - if p1.shape[2] != p2.shape[2]: - raise ValueError("pts1 and pts2 must have the same point dimension.") - - p1 = p1.contiguous() - p2 = p2.contiguous() - P1 = p1.shape[1] - P2 = p2.shape[1] - N = p1.shape[0] - - if lengths1 is None: - lengths1 = torch.full((N,), P1, dtype=torch.int64, device=p1.device) - if lengths2 is None: - lengths2 = torch.full((N,), P2, dtype=torch.int64, device=p1.device) - - dists, idx = _ball_query.apply(p1, p2, lengths1, lengths2, K, radius) - - # Gather the neighbors if needed - points_nn = masked_gather(p2, idx) if return_nn else None - - return _KNN(dists=dists, idx=idx, knn=points_nn) diff --git a/pytorch3d/pytorch3d/ops/cameras_alignment.py b/pytorch3d/pytorch3d/ops/cameras_alignment.py deleted file mode 100644 index 12412c6761fc8814cea507ae82eaa8565900d960..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/cameras_alignment.py +++ /dev/null @@ -1,222 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import TYPE_CHECKING - -import torch - -from .. import ops - - -if TYPE_CHECKING: - from pytorch3d.renderer.cameras import CamerasBase - - -def corresponding_cameras_alignment( - cameras_src: "CamerasBase", - cameras_tgt: "CamerasBase", - estimate_scale: bool = True, - mode: str = "extrinsics", - eps: float = 1e-9, -) -> "CamerasBase": # pragma: no cover - """ - .. warning:: - The `corresponding_cameras_alignment` API is experimental - and subject to change! - - Estimates a single similarity transformation between two sets of cameras - `cameras_src` and `cameras_tgt` and returns an aligned version of - `cameras_src`. - - Given source cameras [(R_1, T_1), (R_2, T_2), ..., (R_N, T_N)] and - target cameras [(R_1', T_1'), (R_2', T_2'), ..., (R_N', T_N')], - where (R_i, T_i) is a 2-tuple of the camera rotation and translation matrix - respectively, the algorithm finds a global rotation, translation and scale - (R_A, T_A, s_A) which aligns all source cameras with the target cameras - such that the following holds: - - Under the change of coordinates using a similarity transform - (R_A, T_A, s_A) a 3D point X' is mapped to X with: :: - - X = (X' R_A + T_A) / s_A - - Then, for all cameras `i`, we assume that the following holds: :: - - X R_i + T_i = s' (X' R_i' + T_i'), - - i.e. an adjusted point X' is mapped by a camera (R_i', T_i') - to the same point as imaged from camera (R_i, T_i) after resolving - the scale ambiguity with a global scalar factor s'. - - Substituting for X above gives rise to the following: :: - - (X' R_A + T_A) / s_A R_i + T_i = s' (X' R_i' + T_i') // Β· s_A - (X' R_A + T_A) R_i + T_i s_A = (s' s_A) (X' R_i' + T_i') - s' := 1 / s_A # without loss of generality - (X' R_A + T_A) R_i + T_i s_A = X' R_i' + T_i' - X' R_A R_i + T_A R_i + T_i s_A = X' R_i' + T_i' - ^^^^^^^ ^^^^^^^^^^^^^^^^^ - ~= R_i' ~= T_i' - - i.e. after estimating R_A, T_A, s_A, the aligned source cameras have - extrinsics: :: - - cameras_src_align = (R_A R_i, T_A R_i + T_i s_A) ~= (R_i', T_i') - - We support two ways `R_A, T_A, s_A` can be estimated: - 1) `mode=='centers'` - Estimates the similarity alignment between camera centers using - Umeyama's algorithm (see `pytorch3d.ops.corresponding_points_alignment` - for details) and transforms camera extrinsics accordingly. - - 2) `mode=='extrinsics'` - Defines the alignment problem as a system - of the following equations: :: - - for all i: - [ R_A 0 ] x [ R_i 0 ] = [ R_i' 0 ] - [ T_A^T 1 ] [ (s_A T_i^T) 1 ] [ T_i' 1 ] - - `R_A, T_A` and `s_A` are then obtained by solving the - system in the least squares sense. - - The estimated camera transformation is a true similarity transform, i.e. - it cannot be a reflection. - - Args: - cameras_src: `N` cameras to be aligned. - cameras_tgt: `N` target cameras. - estimate_scale: Controls whether the alignment transform is rigid - (`estimate_scale=False`), or a similarity (`estimate_scale=True`). - `s_A` is set to `1` if `estimate_scale==False`. - mode: Controls the alignment algorithm. - Can be one either `'centers'` or `'extrinsics'`. Please refer to the - description above for details. - eps: A scalar for clamping to avoid dividing by zero. - Active when `estimate_scale==True`. - - Returns: - cameras_src_aligned: `cameras_src` after applying the alignment transform. - """ - - if cameras_src.R.shape[0] != cameras_tgt.R.shape[0]: - raise ValueError( - "cameras_src and cameras_tgt have to contain the same number of cameras!" - ) - - if mode == "centers": - align_fun = _align_camera_centers - elif mode == "extrinsics": - align_fun = _align_camera_extrinsics - else: - raise ValueError("mode has to be one of (centers, extrinsics)") - - align_t_R, align_t_T, align_t_s = align_fun( - cameras_src, cameras_tgt, estimate_scale=estimate_scale, eps=eps - ) - - # create a new cameras object and set the R and T accordingly - cameras_src_aligned = cameras_src.clone() - cameras_src_aligned.R = torch.bmm(align_t_R.expand_as(cameras_src.R), cameras_src.R) - cameras_src_aligned.T = ( - torch.bmm( - align_t_T[:, None].repeat(cameras_src.R.shape[0], 1, 1), - cameras_src.R, - )[:, 0] - + cameras_src.T * align_t_s - ) - - return cameras_src_aligned - - -def _align_camera_centers( - cameras_src: "CamerasBase", - cameras_tgt: "CamerasBase", - estimate_scale: bool = True, - eps: float = 1e-9, -): # pragma: no cover - """ - Use Umeyama's algorithm to align the camera centers. - """ - centers_src = cameras_src.get_camera_center() - centers_tgt = cameras_tgt.get_camera_center() - align_t = ops.corresponding_points_alignment( - centers_src[None], - centers_tgt[None], - estimate_scale=estimate_scale, - allow_reflection=False, - eps=eps, - ) - # the camera transform is the inverse of the estimated transform between centers - align_t_R = align_t.R.permute(0, 2, 1) - align_t_T = -(torch.bmm(align_t.T[:, None], align_t_R))[:, 0] - align_t_s = align_t.s[0] - - return align_t_R, align_t_T, align_t_s - - -def _align_camera_extrinsics( - cameras_src: "CamerasBase", - cameras_tgt: "CamerasBase", - estimate_scale: bool = True, - eps: float = 1e-9, -): # pragma: no cover - """ - Get the global rotation R_A with svd of cov(RR^T): - ``` - R_A R_i = R_i' for all i - R_A [R_1 R_2 ... R_N] = [R_1' R_2' ... R_N'] - U, _, V = svd([R_1 R_2 ... R_N]^T [R_1' R_2' ... R_N']) - R_A = (U V^T)^T - ``` - """ - RRcov = torch.bmm(cameras_src.R, cameras_tgt.R.transpose(2, 1)).mean(0) - U, _, V = torch.svd(RRcov) - align_t_R = V @ U.t() - - """ - The translation + scale `T_A` and `s_A` is computed by finding - a translation and scaling that aligns two tensors `A, B` - defined as follows: - ``` - T_A R_i + s_A T_i = T_i' ; for all i // Β· R_i^T - s_A T_i R_i^T + T_A = T_i' R_i^T ; for all i - ^^^^^^^^^ ^^^^^^^^^^ - A_i B_i - - A_i := T_i R_i^T - A = [A_1 A_2 ... A_N] - B_i := T_i' R_i^T - B = [B_1 B_2 ... B_N] - ``` - The scale s_A can be retrieved by matching the correlations of - the points sets A and B: - ``` - s_A = (A-mean(A))*(B-mean(B)).sum() / ((A-mean(A))**2).sum() - ``` - The translation `T_A` is then defined as: - ``` - T_A = mean(B) - mean(A) * s_A - ``` - """ - A = torch.bmm(cameras_src.R, cameras_src.T[:, :, None])[:, :, 0] - B = torch.bmm(cameras_src.R, cameras_tgt.T[:, :, None])[:, :, 0] - Amu = A.mean(0, keepdim=True) - Bmu = B.mean(0, keepdim=True) - if estimate_scale and A.shape[0] > 1: - # get the scaling component by matching covariances - # of centered A and centered B - Ac = A - Amu - Bc = B - Bmu - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - align_t_s = (Ac * Bc).mean() / (Ac**2).mean().clamp(eps) - else: - # set the scale to identity - align_t_s = 1.0 - # get the translation as the difference between the means of A and B - align_t_T = Bmu - align_t_s * Amu - - return align_t_R, align_t_T, align_t_s diff --git a/pytorch3d/pytorch3d/ops/cubify.py b/pytorch3d/pytorch3d/ops/cubify.py deleted file mode 100644 index 364e6226d2a5ce6d2ae3739039c705c264a7270f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/cubify.py +++ /dev/null @@ -1,273 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -from typing import Optional - -import torch -import torch.nn.functional as F - -from pytorch3d.common.compat import meshgrid_ij - -from pytorch3d.structures import Meshes - - -def unravel_index(idx, dims) -> torch.Tensor: - r""" - Equivalent to np.unravel_index - Args: - idx: A LongTensor whose elements are indices into the - flattened version of an array of dimensions dims. - dims: The shape of the array to be indexed. - Implemented only for dims=(N, H, W, D) - """ - if len(dims) != 4: - raise ValueError("Expects a 4-element list.") - N, H, W, D = dims - n = idx // (H * W * D) - h = (idx - n * H * W * D) // (W * D) - w = (idx - n * H * W * D - h * W * D) // D - d = idx - n * H * W * D - h * W * D - w * D - return torch.stack((n, h, w, d), dim=1) - - -def ravel_index(idx, dims) -> torch.Tensor: - """ - Computes the linear index in an array of shape dims. - It performs the reverse functionality of unravel_index - Args: - idx: A LongTensor of shape (N, 3). Each row corresponds to indices into an - array of dimensions dims. - dims: The shape of the array to be indexed. - Implemented only for dims=(H, W, D) - """ - if len(dims) != 3: - raise ValueError("Expects a 3-element list") - if idx.shape[1] != 3: - raise ValueError("Expects an index tensor of shape Nx3") - H, W, D = dims - linind = idx[:, 0] * W * D + idx[:, 1] * D + idx[:, 2] - return linind - - -@torch.no_grad() -def cubify( - voxels: torch.Tensor, - thresh: float, - *, - feats: Optional[torch.Tensor] = None, - device=None, - align: str = "topleft" -) -> Meshes: - r""" - Converts a voxel to a mesh by replacing each occupied voxel with a cube - consisting of 12 faces and 8 vertices. Shared vertices are merged, and - internal faces are removed. - Args: - voxels: A FloatTensor of shape (N, D, H, W) containing occupancy probabilities. - thresh: A scalar threshold. If a voxel occupancy is larger than - thresh, the voxel is considered occupied. - feats: A FloatTensor of shape (N, K, D, H, W) containing the color information - of each voxel. K is the number of channels. This is supported only when - align == "center" - device: The device of the output meshes - align: Defines the alignment of the mesh vertices and the grid locations. - Has to be one of {"topleft", "corner", "center"}. See below for explanation. - Default is "topleft". - Returns: - meshes: A Meshes object of the corresponding meshes. - - - The alignment between the vertices of the cubified mesh and the voxel locations (or pixels) - is defined by the choice of `align`. We support three modes, as shown below for a 2x2 grid: - - X---X---- X-------X --------- - | | | | | | | X | X | - X---X---- --------- --------- - | | | | | | | X | X | - --------- X-------X --------- - - topleft corner center - - In the figure, X denote the grid locations and the squares represent the added cuboids. - When `align="topleft"`, then the top left corner of each cuboid corresponds to the - pixel coordinate of the input grid. - When `align="corner"`, then the corners of the output mesh span the whole grid. - When `align="center"`, then the grid locations form the center of the cuboids. - """ - - if device is None: - device = voxels.device - - if align not in ["topleft", "corner", "center"]: - raise ValueError("Align mode must be one of (topleft, corner, center).") - - if len(voxels) == 0: - return Meshes(verts=[], faces=[]) - - N, D, H, W = voxels.size() - # vertices corresponding to a unit cube: 8x3 - cube_verts = torch.tensor( - [ - [0, 0, 0], - [0, 0, 1], - [0, 1, 0], - [0, 1, 1], - [1, 0, 0], - [1, 0, 1], - [1, 1, 0], - [1, 1, 1], - ], - dtype=torch.int64, - device=device, - ) - - # faces corresponding to a unit cube: 12x3 - cube_faces = torch.tensor( - [ - [0, 1, 2], - [1, 3, 2], # left face: 0, 1 - [2, 3, 6], - [3, 7, 6], # bottom face: 2, 3 - [0, 2, 6], - [0, 6, 4], # front face: 4, 5 - [0, 5, 1], - [0, 4, 5], # up face: 6, 7 - [6, 7, 5], - [6, 5, 4], # right face: 8, 9 - [1, 7, 3], - [1, 5, 7], # back face: 10, 11 - ], - dtype=torch.int64, - device=device, - ) - - wx = torch.tensor([0.5, 0.5], device=device).view(1, 1, 1, 1, 2) - wy = torch.tensor([0.5, 0.5], device=device).view(1, 1, 1, 2, 1) - wz = torch.tensor([0.5, 0.5], device=device).view(1, 1, 2, 1, 1) - - voxelt = voxels.ge(thresh).float() - # N x 1 x D x H x W - voxelt = voxelt.view(N, 1, D, H, W) - - # N x 1 x (D-1) x (H-1) x (W-1) - voxelt_x = F.conv3d(voxelt, wx).gt(0.5).float() - voxelt_y = F.conv3d(voxelt, wy).gt(0.5).float() - voxelt_z = F.conv3d(voxelt, wz).gt(0.5).float() - - # 12 x N x 1 x D x H x W - faces_idx = torch.ones((cube_faces.size(0), N, 1, D, H, W), device=device) - - # add left face - faces_idx[0, :, :, :, :, 1:] = 1 - voxelt_x - faces_idx[1, :, :, :, :, 1:] = 1 - voxelt_x - # add bottom face - faces_idx[2, :, :, :, :-1, :] = 1 - voxelt_y - faces_idx[3, :, :, :, :-1, :] = 1 - voxelt_y - # add front face - faces_idx[4, :, :, 1:, :, :] = 1 - voxelt_z - faces_idx[5, :, :, 1:, :, :] = 1 - voxelt_z - # add up face - faces_idx[6, :, :, :, 1:, :] = 1 - voxelt_y - faces_idx[7, :, :, :, 1:, :] = 1 - voxelt_y - # add right face - faces_idx[8, :, :, :, :, :-1] = 1 - voxelt_x - faces_idx[9, :, :, :, :, :-1] = 1 - voxelt_x - # add back face - faces_idx[10, :, :, :-1, :, :] = 1 - voxelt_z - faces_idx[11, :, :, :-1, :, :] = 1 - voxelt_z - - faces_idx *= voxelt - - # N x H x W x D x 12 - faces_idx = faces_idx.permute(1, 2, 4, 5, 3, 0).squeeze(1) - # (NHWD) x 12 - faces_idx = faces_idx.contiguous() - faces_idx = faces_idx.view(-1, cube_faces.size(0)) - - # boolean to linear index - # NF x 2 - linind = torch.nonzero(faces_idx, as_tuple=False) - - # NF x 4 - nyxz = unravel_index(linind[:, 0], (N, H, W, D)) - - # NF x 3: faces - faces = torch.index_select(cube_faces, 0, linind[:, 1]) - - grid_faces = [] - for d in range(cube_faces.size(1)): - # NF x 3 - xyz = torch.index_select(cube_verts, 0, faces[:, d]) - permute_idx = torch.tensor([1, 0, 2], device=device) - yxz = torch.index_select(xyz, 1, permute_idx) - yxz += nyxz[:, 1:] - # NF x 1 - temp = ravel_index(yxz, (H + 1, W + 1, D + 1)) - grid_faces.append(temp) - # NF x 3 - grid_faces = torch.stack(grid_faces, dim=1) - - y, x, z = meshgrid_ij(torch.arange(H + 1), torch.arange(W + 1), torch.arange(D + 1)) - y = y.to(device=device, dtype=torch.float32) - x = x.to(device=device, dtype=torch.float32) - z = z.to(device=device, dtype=torch.float32) - - if align == "center": - x = x - 0.5 - y = y - 0.5 - z = z - 0.5 - - margin = 0.0 if align == "corner" else 1.0 - y = y * 2.0 / (H - margin) - 1.0 - x = x * 2.0 / (W - margin) - 1.0 - z = z * 2.0 / (D - margin) - 1.0 - - # ((H+1)(W+1)(D+1)) x 3 - grid_verts = torch.stack((x, y, z), dim=3).view(-1, 3) - - if len(nyxz) == 0: - verts_list = [torch.tensor([], dtype=torch.float32, device=device)] * N - faces_list = [torch.tensor([], dtype=torch.int64, device=device)] * N - return Meshes(verts=verts_list, faces=faces_list) - - num_verts = grid_verts.size(0) - grid_faces += nyxz[:, 0].view(-1, 1) * num_verts - idleverts = torch.ones(num_verts * N, dtype=torch.uint8, device=device) - - indices = grid_faces.flatten() - if device.type == "cpu": - indices = torch.unique(indices) - idleverts.scatter_(0, indices, 0) - grid_faces -= nyxz[:, 0].view(-1, 1) * num_verts - split_size = torch.bincount(nyxz[:, 0], minlength=N) - faces_list = list(torch.split(grid_faces, split_size.tolist(), 0)) - - idleverts = idleverts.view(N, num_verts) - idlenum = idleverts.cumsum(1) - - verts_list = [ - grid_verts.index_select(0, (idleverts[n] == 0).nonzero(as_tuple=False)[:, 0]) - for n in range(N) - ] - - textures_list = None - if feats is not None and align == "center": - # We return a TexturesAtlas containing one color for each face - # N x K x D x H x W -> N x H x W x D x K - feats = feats.permute(0, 3, 4, 2, 1) - - # (NHWD) x K - feats = feats.reshape(-1, feats.size(4)) - feats = torch.index_select(feats, 0, linind[:, 0]) - feats = feats.reshape(-1, 1, 1, feats.size(1)) - feats_list = list(torch.split(feats, split_size.tolist(), 0)) - from pytorch3d.renderer.mesh.textures import TexturesAtlas - - textures_list = TexturesAtlas(feats_list) - - faces_list = [nface - idlenum[n][nface] for n, nface in enumerate(faces_list)] - return Meshes(verts=verts_list, faces=faces_list, textures=textures_list) diff --git a/pytorch3d/pytorch3d/ops/graph_conv.py b/pytorch3d/pytorch3d/ops/graph_conv.py deleted file mode 100644 index 1d7a6186b0acedd0cf28d6b7c9723e357acca357..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/graph_conv.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import torch -import torch.nn as nn -from pytorch3d import _C -from torch.autograd import Function -from torch.autograd.function import once_differentiable - - -class GraphConv(nn.Module): - """A single graph convolution layer.""" - - def __init__( - self, - input_dim: int, - output_dim: int, - init: str = "normal", - directed: bool = False, - ) -> None: - """ - Args: - input_dim: Number of input features per vertex. - output_dim: Number of output features per vertex. - init: Weight initialization method. Can be one of ['zero', 'normal']. - directed: Bool indicating if edges in the graph are directed. - """ - super().__init__() - self.input_dim = input_dim - self.output_dim = output_dim - self.directed = directed - self.w0 = nn.Linear(input_dim, output_dim) - self.w1 = nn.Linear(input_dim, output_dim) - - if init == "normal": - nn.init.normal_(self.w0.weight, mean=0, std=0.01) - nn.init.normal_(self.w1.weight, mean=0, std=0.01) - self.w0.bias.data.zero_() - self.w1.bias.data.zero_() - elif init == "zero": - self.w0.weight.data.zero_() - self.w1.weight.data.zero_() - else: - raise ValueError('Invalid GraphConv initialization "%s"' % init) - - def forward(self, verts, edges): - """ - Args: - verts: FloatTensor of shape (V, input_dim) where V is the number of - vertices and input_dim is the number of input features - per vertex. input_dim has to match the input_dim specified - in __init__. - edges: LongTensor of shape (E, 2) where E is the number of edges - where each edge has the indices of the two vertices which - form the edge. - - Returns: - out: FloatTensor of shape (V, output_dim) where output_dim is the - number of output features per vertex. - """ - if verts.is_cuda != edges.is_cuda: - raise ValueError("verts and edges tensors must be on the same device.") - if verts.shape[0] == 0: - # empty graph. - return verts.new_zeros((0, self.output_dim)) * verts.sum() - - verts_w0 = self.w0(verts) # (V, output_dim) - verts_w1 = self.w1(verts) # (V, output_dim) - - if torch.cuda.is_available() and verts.is_cuda and edges.is_cuda: - neighbor_sums = gather_scatter(verts_w1, edges, self.directed) - else: - neighbor_sums = gather_scatter_python( - verts_w1, edges, self.directed - ) # (V, output_dim) - - # Add neighbor features to each vertex's features. - out = verts_w0 + neighbor_sums - return out - - def __repr__(self): - Din, Dout, directed = self.input_dim, self.output_dim, self.directed - return "GraphConv(%d -> %d, directed=%r)" % (Din, Dout, directed) - - -def gather_scatter_python(input, edges, directed: bool = False): - """ - Python implementation of gather_scatter for aggregating features of - neighbor nodes in a graph. - - Given a directed graph: v0 -> v1 -> v2 the updated feature for v1 depends - on v2 in order to be consistent with Morris et al. AAAI 2019 - (https://arxiv.org/abs/1810.02244). This only affects - directed graphs; for undirected graphs v1 will depend on both v0 and v2, - no matter which way the edges are physically stored. - - Args: - input: Tensor of shape (num_vertices, input_dim). - edges: Tensor of edge indices of shape (num_edges, 2). - directed: bool indicating if edges are directed. - - Returns: - output: Tensor of same shape as input. - """ - if not (input.dim() == 2): - raise ValueError("input can only have 2 dimensions.") - if not (edges.dim() == 2): - raise ValueError("edges can only have 2 dimensions.") - if not (edges.shape[1] == 2): - raise ValueError("edges must be of shape (num_edges, 2).") - - num_vertices, input_feature_dim = input.shape - num_edges = edges.shape[0] - output = torch.zeros_like(input) - idx0 = edges[:, 0].view(num_edges, 1).expand(num_edges, input_feature_dim) - idx1 = edges[:, 1].view(num_edges, 1).expand(num_edges, input_feature_dim) - - output = output.scatter_add(0, idx0, input.gather(0, idx1)) - if not directed: - output = output.scatter_add(0, idx1, input.gather(0, idx0)) - return output - - -class GatherScatter(Function): - """ - Torch autograd Function wrapper for gather_scatter C++/CUDA implementations. - """ - - @staticmethod - def forward(ctx, input, edges, directed=False): - """ - Args: - ctx: Context object used to calculate gradients. - input: Tensor of shape (num_vertices, input_dim) - edges: Tensor of edge indices of shape (num_edges, 2) - directed: Bool indicating if edges are directed. - - Returns: - output: Tensor of same shape as input. - """ - if not (input.dim() == 2): - raise ValueError("input can only have 2 dimensions.") - if not (edges.dim() == 2): - raise ValueError("edges can only have 2 dimensions.") - if not (edges.shape[1] == 2): - raise ValueError("edges must be of shape (num_edges, 2).") - if not (input.dtype == torch.float32): - raise ValueError("input has to be of type torch.float32.") - - ctx.directed = directed - input, edges = input.contiguous(), edges.contiguous() - ctx.save_for_backward(edges) - backward = False - output = _C.gather_scatter(input, edges, directed, backward) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - grad_output = grad_output.contiguous() - edges = ctx.saved_tensors[0] - directed = ctx.directed - backward = True - grad_input = _C.gather_scatter(grad_output, edges, directed, backward) - grad_edges = None - grad_directed = None - return grad_input, grad_edges, grad_directed - - -gather_scatter = GatherScatter.apply diff --git a/pytorch3d/pytorch3d/ops/interp_face_attrs.py b/pytorch3d/pytorch3d/ops/interp_face_attrs.py deleted file mode 100644 index 705fc152eddf96771391ad481dd8709cb9ba792b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/interp_face_attrs.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch -from pytorch3d import _C -from torch.autograd import Function -from torch.autograd.function import once_differentiable - - -def interpolate_face_attributes( - pix_to_face: torch.Tensor, - barycentric_coords: torch.Tensor, - face_attributes: torch.Tensor, -) -> torch.Tensor: - """ - Interpolate arbitrary face attributes using the barycentric coordinates - for each pixel in the rasterized output. - - Args: - pix_to_face: LongTensor of shape (...) specifying the indices - of the faces (in the packed representation) which overlap each - pixel in the image. A value < 0 indicates that the pixel does not - overlap any face and should be skipped. - barycentric_coords: FloatTensor of shape (N, H, W, K, 3) specifying - the barycentric coordinates of each pixel - relative to the faces (in the packed - representation) which overlap the pixel. - face_attributes: packed attributes of shape (total_faces, 3, D), - specifying the value of the attribute for each - vertex in the face. - - Returns: - pixel_vals: tensor of shape (N, H, W, K, D) giving the interpolated - value of the face attribute for each pixel. - """ - # Check shapes - F, FV, D = face_attributes.shape - if FV != 3: - raise ValueError("Faces can only have three vertices; got %r" % FV) - N, H, W, K, _ = barycentric_coords.shape - if pix_to_face.shape != (N, H, W, K): - msg = "pix_to_face must have shape (batch_size, H, W, K); got %r" - raise ValueError(msg % (pix_to_face.shape,)) - - # On CPU use the python version - # TODO: Implement a C++ version of this function - if not pix_to_face.is_cuda: - args = (pix_to_face, barycentric_coords, face_attributes) - return interpolate_face_attributes_python(*args) - - # Otherwise flatten and call the custom autograd function - N, H, W, K = pix_to_face.shape - pix_to_face = pix_to_face.view(-1) - barycentric_coords = barycentric_coords.view(N * H * W * K, 3) - args = (pix_to_face, barycentric_coords, face_attributes) - out = _InterpFaceAttrs.apply(*args) - out = out.view(N, H, W, K, -1) - return out - - -class _InterpFaceAttrs(Function): - @staticmethod - def forward(ctx, pix_to_face, barycentric_coords, face_attrs): - args = (pix_to_face, barycentric_coords, face_attrs) - ctx.save_for_backward(*args) - return _C.interp_face_attrs_forward(*args) - - @staticmethod - @once_differentiable - def backward(ctx, grad_pix_attrs): - args = ctx.saved_tensors - args = args + (grad_pix_attrs,) - grads = _C.interp_face_attrs_backward(*args) - grad_pix_to_face = None - grad_barycentric_coords = grads[0] - grad_face_attrs = grads[1] - return grad_pix_to_face, grad_barycentric_coords, grad_face_attrs - - -def interpolate_face_attributes_python( - pix_to_face: torch.Tensor, - barycentric_coords: torch.Tensor, - face_attributes: torch.Tensor, -) -> torch.Tensor: - F, FV, D = face_attributes.shape - N, H, W, K, _ = barycentric_coords.shape - - # Replace empty pixels in pix_to_face with 0 in order to interpolate. - mask = pix_to_face < 0 - pix_to_face = pix_to_face.clone() - pix_to_face[mask] = 0 - idx = pix_to_face.view(N * H * W * K, 1, 1).expand(N * H * W * K, 3, D) - pixel_face_vals = face_attributes.gather(0, idx).view(N, H, W, K, 3, D) - pixel_vals = (barycentric_coords[..., None] * pixel_face_vals).sum(dim=-2) - pixel_vals[mask] = 0 # Replace masked values in output. - return pixel_vals diff --git a/pytorch3d/pytorch3d/ops/iou_box3d.py b/pytorch3d/pytorch3d/ops/iou_box3d.py deleted file mode 100644 index dbc6358bb41e412978a968964981354cac86f40f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/iou_box3d.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Tuple - -import torch -import torch.nn.functional as F -from pytorch3d import _C -from torch.autograd import Function - - -# -------------------------------------------------- # -# CONSTANTS # -# -------------------------------------------------- # -""" -_box_planes and _box_triangles define the 4- and 3-connectivity -of the 8 box corners. -_box_planes gives the quad faces of the 3D box -_box_triangles gives the triangle faces of the 3D box -""" -_box_planes = [ - [0, 1, 2, 3], - [3, 2, 6, 7], - [0, 1, 5, 4], - [0, 3, 7, 4], - [1, 2, 6, 5], - [4, 5, 6, 7], -] -_box_triangles = [ - [0, 1, 2], - [0, 3, 2], - [4, 5, 6], - [4, 6, 7], - [1, 5, 6], - [1, 6, 2], - [0, 4, 7], - [0, 7, 3], - [3, 2, 6], - [3, 6, 7], - [0, 1, 5], - [0, 4, 5], -] - - -def _check_coplanar(boxes: torch.Tensor, eps: float = 1e-4) -> None: - faces = torch.tensor(_box_planes, dtype=torch.int64, device=boxes.device) - verts = boxes.index_select(index=faces.view(-1), dim=1) - B = boxes.shape[0] - P, V = faces.shape - # (B, P, 4, 3) -> (B, P, 3) - v0, v1, v2, v3 = verts.reshape(B, P, V, 3).unbind(2) - - # Compute the normal - e0 = F.normalize(v1 - v0, dim=-1) - e1 = F.normalize(v2 - v0, dim=-1) - normal = F.normalize(torch.cross(e0, e1, dim=-1), dim=-1) - - # Check the fourth vertex is also on the same plane - mat1 = (v3 - v0).view(B, 1, -1) # (B, 1, P*3) - mat2 = normal.view(B, -1, 1) # (B, P*3, 1) - if not (mat1.bmm(mat2).abs() < eps).all().item(): - msg = "Plane vertices are not coplanar" - raise ValueError(msg) - - return - - -def _check_nonzero(boxes: torch.Tensor, eps: float = 1e-4) -> None: - """ - Checks that the sides of the box have a non zero area - """ - faces = torch.tensor(_box_triangles, dtype=torch.int64, device=boxes.device) - verts = boxes.index_select(index=faces.view(-1), dim=1) - B = boxes.shape[0] - T, V = faces.shape - # (B, T, 3, 3) -> (B, T, 3) - v0, v1, v2 = verts.reshape(B, T, V, 3).unbind(2) - - normals = torch.cross(v1 - v0, v2 - v0, dim=-1) # (B, T, 3) - face_areas = normals.norm(dim=-1) / 2 - - if (face_areas < eps).any().item(): - msg = "Planes have zero areas" - raise ValueError(msg) - - return - - -class _box3d_overlap(Function): - """ - Torch autograd Function wrapper for box3d_overlap C++/CUDA implementations. - Backward is not supported. - """ - - @staticmethod - def forward(ctx, boxes1, boxes2): - """ - Arguments defintions the same as in the box3d_overlap function - """ - vol, iou = _C.iou_box3d(boxes1, boxes2) - return vol, iou - - @staticmethod - def backward(ctx, grad_vol, grad_iou): - raise ValueError("box3d_overlap backward is not supported") - - -def box3d_overlap( - boxes1: torch.Tensor, boxes2: torch.Tensor, eps: float = 1e-4 -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Computes the intersection of 3D boxes1 and boxes2. - - Inputs boxes1, boxes2 are tensors of shape (B, 8, 3) - (where B doesn't have to be the same for boxes1 and boxes2), - containing the 8 corners of the boxes, as follows: - - (4) +---------+. (5) - | ` . | ` . - | (0) +---+-----+ (1) - | | | | - (7) +-----+---+. (6)| - ` . | ` . | - (3) ` +---------+ (2) - - - NOTE: Throughout this implementation, we assume that boxes - are defined by their 8 corners exactly in the order specified in the - diagram above for the function to give correct results. In addition - the vertices on each plane must be coplanar. - As an alternative to the diagram, this is a unit bounding - box which has the correct vertex ordering: - - box_corner_vertices = [ - [0, 0, 0], - [1, 0, 0], - [1, 1, 0], - [0, 1, 0], - [0, 0, 1], - [1, 0, 1], - [1, 1, 1], - [0, 1, 1], - ] - - Args: - boxes1: tensor of shape (N, 8, 3) of the coordinates of the 1st boxes - boxes2: tensor of shape (M, 8, 3) of the coordinates of the 2nd boxes - Returns: - vol: (N, M) tensor of the volume of the intersecting convex shapes - iou: (N, M) tensor of the intersection over union which is - defined as: `iou = vol / (vol1 + vol2 - vol)` - """ - if not all((8, 3) == box.shape[1:] for box in [boxes1, boxes2]): - raise ValueError("Each box in the batch must be of shape (8, 3)") - - _check_coplanar(boxes1, eps) - _check_coplanar(boxes2, eps) - _check_nonzero(boxes1, eps) - _check_nonzero(boxes2, eps) - - vol, iou = _box3d_overlap.apply(boxes1, boxes2) - - return vol, iou diff --git a/pytorch3d/pytorch3d/ops/knn.py b/pytorch3d/pytorch3d/ops/knn.py deleted file mode 100644 index 2b31c5cb1551b58209634cc87e80f0cbf51fc642..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/knn.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from collections import namedtuple -from typing import Union - -import torch -from pytorch3d import _C -from torch.autograd import Function -from torch.autograd.function import once_differentiable - - -_KNN = namedtuple("KNN", "dists idx knn") - - -class _knn_points(Function): - """ - Torch autograd Function wrapper for KNN C++/CUDA implementations. - """ - - @staticmethod - # pyre-fixme[14]: `forward` overrides method defined in `Function` inconsistently. - def forward( - ctx, - p1, - p2, - lengths1, - lengths2, - K, - version, - norm: int = 2, - return_sorted: bool = True, - ): - """ - K-Nearest neighbors on point clouds. - - Args: - p1: Tensor of shape (N, P1, D) giving a batch of N point clouds, each - containing up to P1 points of dimension D. - p2: Tensor of shape (N, P2, D) giving a batch of N point clouds, each - containing up to P2 points of dimension D. - lengths1: LongTensor of shape (N,) of values in the range [0, P1], giving the - length of each pointcloud in p1. Or None to indicate that every cloud has - length P1. - lengths2: LongTensor of shape (N,) of values in the range [0, P2], giving the - length of each pointcloud in p2. Or None to indicate that every cloud has - length P2. - K: Integer giving the number of nearest neighbors to return. - version: Which KNN implementation to use in the backend. If version=-1, - the correct implementation is selected based on the shapes of the inputs. - norm: (int) indicating the norm. Only supports 1 (for L1) and 2 (for L2). - return_sorted: (bool) whether to return the nearest neighbors sorted in - ascending order of distance. - - Returns: - p1_dists: Tensor of shape (N, P1, K) giving the squared distances to - the nearest neighbors. This is padded with zeros both where a cloud in p2 - has fewer than K points and where a cloud in p1 has fewer than P1 points. - - p1_idx: LongTensor of shape (N, P1, K) giving the indices of the - K nearest neighbors from points in p1 to points in p2. - Concretely, if `p1_idx[n, i, k] = j` then `p2[n, j]` is the k-th nearest - neighbors to `p1[n, i]` in `p2[n]`. This is padded with zeros both where a cloud - in p2 has fewer than K points and where a cloud in p1 has fewer than P1 points. - """ - if not ((norm == 1) or (norm == 2)): - raise ValueError("Support for 1 or 2 norm.") - - idx, dists = _C.knn_points_idx(p1, p2, lengths1, lengths2, norm, K, version) - - # sort KNN in ascending order if K > 1 - if K > 1 and return_sorted: - if lengths2.min() < K: - P1 = p1.shape[1] - mask = lengths2[:, None] <= torch.arange(K, device=dists.device)[None] - # mask has shape [N, K], true where dists irrelevant - mask = mask[:, None].expand(-1, P1, -1) - # mask has shape [N, P1, K], true where dists irrelevant - dists[mask] = float("inf") - dists, sort_idx = dists.sort(dim=2) - dists[mask] = 0 - else: - dists, sort_idx = dists.sort(dim=2) - idx = idx.gather(2, sort_idx) - - ctx.save_for_backward(p1, p2, lengths1, lengths2, idx) - ctx.mark_non_differentiable(idx) - ctx.norm = norm - return dists, idx - - @staticmethod - @once_differentiable - def backward(ctx, grad_dists, grad_idx): - p1, p2, lengths1, lengths2, idx = ctx.saved_tensors - norm = ctx.norm - # TODO(gkioxari) Change cast to floats once we add support for doubles. - if not (grad_dists.dtype == torch.float32): - grad_dists = grad_dists.float() - if not (p1.dtype == torch.float32): - p1 = p1.float() - if not (p2.dtype == torch.float32): - p2 = p2.float() - grad_p1, grad_p2 = _C.knn_points_backward( - p1, p2, lengths1, lengths2, idx, norm, grad_dists - ) - return grad_p1, grad_p2, None, None, None, None, None, None - - -def knn_points( - p1: torch.Tensor, - p2: torch.Tensor, - lengths1: Union[torch.Tensor, None] = None, - lengths2: Union[torch.Tensor, None] = None, - norm: int = 2, - K: int = 1, - version: int = -1, - return_nn: bool = False, - return_sorted: bool = True, -) -> _KNN: - """ - K-Nearest neighbors on point clouds. - - Args: - p1: Tensor of shape (N, P1, D) giving a batch of N point clouds, each - containing up to P1 points of dimension D. - p2: Tensor of shape (N, P2, D) giving a batch of N point clouds, each - containing up to P2 points of dimension D. - lengths1: LongTensor of shape (N,) of values in the range [0, P1], giving the - length of each pointcloud in p1. Or None to indicate that every cloud has - length P1. - lengths2: LongTensor of shape (N,) of values in the range [0, P2], giving the - length of each pointcloud in p2. Or None to indicate that every cloud has - length P2. - norm: Integer indicating the norm of the distance. Supports only 1 for L1, 2 for L2. - K: Integer giving the number of nearest neighbors to return. - version: Which KNN implementation to use in the backend. If version=-1, - the correct implementation is selected based on the shapes of the inputs. - return_nn: If set to True returns the K nearest neighbors in p2 for each point in p1. - return_sorted: (bool) whether to return the nearest neighbors sorted in - ascending order of distance. - - Returns: - dists: Tensor of shape (N, P1, K) giving the squared distances to - the nearest neighbors. This is padded with zeros both where a cloud in p2 - has fewer than K points and where a cloud in p1 has fewer than P1 points. - - idx: LongTensor of shape (N, P1, K) giving the indices of the - K nearest neighbors from points in p1 to points in p2. - Concretely, if `p1_idx[n, i, k] = j` then `p2[n, j]` is the k-th nearest - neighbors to `p1[n, i]` in `p2[n]`. This is padded with zeros both where a cloud - in p2 has fewer than K points and where a cloud in p1 has fewer than P1 - points. - - nn: Tensor of shape (N, P1, K, D) giving the K nearest neighbors in p2 for - each point in p1. Concretely, `p2_nn[n, i, k]` gives the k-th nearest neighbor - for `p1[n, i]`. Returned if `return_nn` is True. - The nearest neighbors are collected using `knn_gather` - - .. code-block:: - - p2_nn = knn_gather(p2, p1_idx, lengths2) - - which is a helper function that allows indexing any tensor of shape (N, P2, U) with - the indices `p1_idx` returned by `knn_points`. The output is a tensor - of shape (N, P1, K, U). - - """ - if p1.shape[0] != p2.shape[0]: - raise ValueError("pts1 and pts2 must have the same batch dimension.") - if p1.shape[2] != p2.shape[2]: - raise ValueError("pts1 and pts2 must have the same point dimension.") - - p1 = p1.contiguous() - p2 = p2.contiguous() - - P1 = p1.shape[1] - P2 = p2.shape[1] - - if lengths1 is None: - lengths1 = torch.full((p1.shape[0],), P1, dtype=torch.int64, device=p1.device) - if lengths2 is None: - lengths2 = torch.full((p1.shape[0],), P2, dtype=torch.int64, device=p1.device) - - p1_dists, p1_idx = _knn_points.apply( - p1, p2, lengths1, lengths2, K, version, norm, return_sorted - ) - - p2_nn = None - if return_nn: - p2_nn = knn_gather(p2, p1_idx, lengths2) - - return _KNN(dists=p1_dists, idx=p1_idx, knn=p2_nn if return_nn else None) - - -def knn_gather( - x: torch.Tensor, idx: torch.Tensor, lengths: Union[torch.Tensor, None] = None -): - """ - A helper function for knn that allows indexing a tensor x with the indices `idx` - returned by `knn_points`. - - For example, if `dists, idx = knn_points(p, x, lengths_p, lengths, K)` - where p is a tensor of shape (N, L, D) and x a tensor of shape (N, M, D), - then one can compute the K nearest neighbors of p with `p_nn = knn_gather(x, idx, lengths)`. - It can also be applied for any tensor x of shape (N, M, U) where U != D. - - Args: - x: Tensor of shape (N, M, U) containing U-dimensional features to - be gathered. - idx: LongTensor of shape (N, L, K) giving the indices returned by `knn_points`. - lengths: LongTensor of shape (N,) of values in the range [0, M], giving the - length of each example in the batch in x. Or None to indicate that every - example has length M. - Returns: - x_out: Tensor of shape (N, L, K, U) resulting from gathering the elements of x - with idx, s.t. `x_out[n, l, k] = x[n, idx[n, l, k]]`. - If `k > lengths[n]` then `x_out[n, l, k]` is filled with 0.0. - """ - N, M, U = x.shape - _N, L, K = idx.shape - - if N != _N: - raise ValueError("x and idx must have same batch dimension.") - - if lengths is None: - lengths = torch.full((x.shape[0],), M, dtype=torch.int64, device=x.device) - - idx_expanded = idx[:, :, :, None].expand(-1, -1, -1, U) - # idx_expanded has shape [N, L, K, U] - - x_out = x[:, :, None].expand(-1, -1, K, -1).gather(1, idx_expanded) - # p2_nn has shape [N, L, K, U] - - needs_mask = lengths.min() < K - if needs_mask: - # mask has shape [N, K], true where idx is irrelevant because - # there is less number of points in p2 than K - mask = lengths[:, None] <= torch.arange(K, device=x.device)[None] - - # expand mask to shape [N, L, K, U] - mask = mask[:, None].expand(-1, L, -1) - mask = mask[:, :, :, None].expand(-1, -1, -1, U) - x_out[mask] = 0.0 - - return x_out diff --git a/pytorch3d/pytorch3d/ops/laplacian_matrices.py b/pytorch3d/pytorch3d/ops/laplacian_matrices.py deleted file mode 100644 index 542fbebf52e909a399a38e3d5f3659186095094e..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/laplacian_matrices.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Tuple - -import torch - - -# ------------------------ Laplacian Matrices ------------------------ # -# This file contains implementations of differentiable laplacian matrices. -# These include -# 1) Standard Laplacian matrix -# 2) Cotangent Laplacian matrix -# 3) Norm Laplacian matrix -# -------------------------------------------------------------------- # - - -def laplacian(verts: torch.Tensor, edges: torch.Tensor) -> torch.Tensor: - """ - Computes the laplacian matrix. - The definition of the laplacian is - L[i, j] = -1 , if i == j - L[i, j] = 1 / deg(i) , if (i, j) is an edge - L[i, j] = 0 , otherwise - where deg(i) is the degree of the i-th vertex in the graph. - - Args: - verts: tensor of shape (V, 3) containing the vertices of the graph - edges: tensor of shape (E, 2) containing the vertex indices of each edge - Returns: - L: Sparse FloatTensor of shape (V, V) - """ - V = verts.shape[0] - - e0, e1 = edges.unbind(1) - - idx01 = torch.stack([e0, e1], dim=1) # (E, 2) - idx10 = torch.stack([e1, e0], dim=1) # (E, 2) - idx = torch.cat([idx01, idx10], dim=0).t() # (2, 2*E) - - # First, we construct the adjacency matrix, - # i.e. A[i, j] = 1 if (i,j) is an edge, or - # A[e0, e1] = 1 & A[e1, e0] = 1 - ones = torch.ones(idx.shape[1], dtype=torch.float32, device=verts.device) - # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`. - A = torch.sparse.FloatTensor(idx, ones, (V, V)) - - # the sum of i-th row of A gives the degree of the i-th vertex - deg = torch.sparse.sum(A, dim=1).to_dense() - - # We construct the Laplacian matrix by adding the non diagonal values - # i.e. L[i, j] = 1 ./ deg(i) if (i, j) is an edge - deg0 = deg[e0] - # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. - deg0 = torch.where(deg0 > 0.0, 1.0 / deg0, deg0) - deg1 = deg[e1] - # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. - deg1 = torch.where(deg1 > 0.0, 1.0 / deg1, deg1) - val = torch.cat([deg0, deg1]) - # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`. - L = torch.sparse.FloatTensor(idx, val, (V, V)) - - # Then we add the diagonal values L[i, i] = -1. - idx = torch.arange(V, device=verts.device) - idx = torch.stack([idx, idx], dim=0) - ones = torch.ones(idx.shape[1], dtype=torch.float32, device=verts.device) - # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`. - L -= torch.sparse.FloatTensor(idx, ones, (V, V)) - - return L - - -def cot_laplacian( - verts: torch.Tensor, faces: torch.Tensor, eps: float = 1e-12 -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Returns the Laplacian matrix with cotangent weights and the inverse of the - face areas. - - Args: - verts: tensor of shape (V, 3) containing the vertices of the graph - faces: tensor of shape (F, 3) containing the vertex indices of each face - Returns: - 2-element tuple containing - - **L**: Sparse FloatTensor of shape (V,V) for the Laplacian matrix. - Here, L[i, j] = cot a_ij + cot b_ij iff (i, j) is an edge in meshes. - See the description above for more clarity. - - **inv_areas**: FloatTensor of shape (V,) containing the inverse of sum of - face areas containing each vertex - """ - V, F = verts.shape[0], faces.shape[0] - - face_verts = verts[faces] - v0, v1, v2 = face_verts[:, 0], face_verts[:, 1], face_verts[:, 2] - - # Side lengths of each triangle, of shape (sum(F_n),) - # A is the side opposite v1, B is opposite v2, and C is opposite v3 - A = (v1 - v2).norm(dim=1) - B = (v0 - v2).norm(dim=1) - C = (v0 - v1).norm(dim=1) - - # Area of each triangle (with Heron's formula); shape is (sum(F_n),) - s = 0.5 * (A + B + C) - # note that the area can be negative (close to 0) causing nans after sqrt() - # we clip it to a small positive value - # pyre-fixme[16]: `float` has no attribute `clamp_`. - area = (s * (s - A) * (s - B) * (s - C)).clamp_(min=eps).sqrt() - - # Compute cotangents of angles, of shape (sum(F_n), 3) - A2, B2, C2 = A * A, B * B, C * C - cota = (B2 + C2 - A2) / area - cotb = (A2 + C2 - B2) / area - cotc = (A2 + B2 - C2) / area - cot = torch.stack([cota, cotb, cotc], dim=1) - cot /= 4.0 - - # Construct a sparse matrix by basically doing: - # L[v1, v2] = cota - # L[v2, v0] = cotb - # L[v0, v1] = cotc - ii = faces[:, [1, 2, 0]] - jj = faces[:, [2, 0, 1]] - idx = torch.stack([ii, jj], dim=0).view(2, F * 3) - # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`. - L = torch.sparse.FloatTensor(idx, cot.view(-1), (V, V)) - - # Make it symmetric; this means we are also setting - # L[v2, v1] = cota - # L[v0, v2] = cotb - # L[v1, v0] = cotc - L += L.t() - - # For each vertex, compute the sum of areas for triangles containing it. - idx = faces.view(-1) - inv_areas = torch.zeros(V, dtype=torch.float32, device=verts.device) - val = torch.stack([area] * 3, dim=1).view(-1) - inv_areas.scatter_add_(0, idx, val) - idx = inv_areas > 0 - # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. - inv_areas[idx] = 1.0 / inv_areas[idx] - inv_areas = inv_areas.view(-1, 1) - - return L, inv_areas - - -def norm_laplacian( - verts: torch.Tensor, edges: torch.Tensor, eps: float = 1e-12 -) -> torch.Tensor: - """ - Norm laplacian computes a variant of the laplacian matrix which weights each - affinity with the normalized distance of the neighboring nodes. - More concretely, - L[i, j] = 1. / wij where wij = ||vi - vj|| if (vi, vj) are neighboring nodes - - Args: - verts: tensor of shape (V, 3) containing the vertices of the graph - edges: tensor of shape (E, 2) containing the vertex indices of each edge - Returns: - L: Sparse FloatTensor of shape (V, V) - """ - edge_verts = verts[edges] # (E, 2, 3) - v0, v1 = edge_verts[:, 0], edge_verts[:, 1] - - # Side lengths of each edge, of shape (E,) - w01 = 1.0 / ((v0 - v1).norm(dim=1) + eps) - - # Construct a sparse matrix by basically doing: - # L[v0, v1] = w01 - # L[v1, v0] = w01 - e01 = edges.t() # (2, E) - - V = verts.shape[0] - # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`. - L = torch.sparse.FloatTensor(e01, w01, (V, V)) - L = L + L.t() - - return L diff --git a/pytorch3d/pytorch3d/ops/marching_cubes.py b/pytorch3d/pytorch3d/ops/marching_cubes.py deleted file mode 100644 index 8ae3d16e98178f11faf86a7d2ee883daefdbf5a9..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/marching_cubes.py +++ /dev/null @@ -1,303 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, Optional, Tuple - -import torch -from pytorch3d import _C -from pytorch3d.ops.marching_cubes_data import EDGE_TO_VERTICES, FACE_TABLE, INDEX -from pytorch3d.transforms import Translate -from torch.autograd import Function - - -EPS = 0.00001 - - -class Cube: - def __init__( - self, - bfl_v: Tuple[int, int, int], - volume: torch.Tensor, - isolevel: float, - ) -> None: - """ - Initializes a cube given the bottom front left vertex coordinate - and computes the cube configuration given vertex values and isolevel. - - Edge and vertex convention: - - v4_______e4____________v5 - /| /| - / | / | - e7/ | e5/ | - /___|______e6_________/ | - v7| | |v6 |e9 - | | | | - | |e8 |e10| - e11| | | | - | |______e0_________|___| - | / v0(bfl_v) | |v1 - | / | / - | /e3 | /e1 - |/_____________________|/ - v3 e2 v2 - - Args: - bfl_vertex: a tuple of size 3 corresponding to the bottom front left vertex - of the cube in (x, y, z) format - volume: the 3D scalar data - isolevel: the isosurface value used as a threshold for determining whether a point - is inside/outside the volume - """ - x, y, z = bfl_v - self.x, self.y, self.z = x, y, z - self.bfl_v = bfl_v - self.verts = [ - [x + (v & 1), y + (v >> 1 & 1), z + (v >> 2 & 1)] for v in range(8) - ] # vertex position (x, y, z) for v0-v1-v4-v5-v3-v2-v7-v6 - - # Calculates cube configuration index given values of the cube vertices - self.cube_index = 0 - for i in range(8): - v = self.verts[INDEX[i]] - value = volume[v[2]][v[1]][v[0]] - if value < isolevel: - self.cube_index |= 1 << i - - def get_vpair_from_edge(self, edge: int, W: int, H: int) -> Tuple[int, int]: - """ - Get a tuple of global vertex ID from a local edge ID - Global vertex ID is calculated as (x + dx) + (y + dy) * W + (z + dz) * W * H - - Args: - edge: local edge ID in the cube - bfl_vertex: bottom-front-left coordinate of the cube - - Returns: - a pair of global vertex ID - """ - v1, v2 = EDGE_TO_VERTICES[edge] # two end-points on the edge - v1_id = self.verts[v1][0] + self.verts[v1][1] * W + self.verts[v1][2] * W * H - v2_id = self.verts[v2][0] + self.verts[v2][1] * W + self.verts[v2][2] * W * H - return (v1_id, v2_id) - - def vert_interp( - self, - isolevel: float, - edge: int, - vol: torch.Tensor, - ) -> List: - """ - Linearly interpolate a vertex where an isosurface cuts an edge - between the two endpoint vertices, based on their values - - Args: - isolevel: the isosurface value to use as the threshold to determine - whether points are within a volume. - edge: edge (ID) to interpolate - cube: current cube vertices - vol: 3D scalar field - - Returns: - interpolated vertex: position of the interpolated vertex on the edge - """ - v1, v2 = EDGE_TO_VERTICES[edge] - p1, p2 = self.verts[v1], self.verts[v2] - val1, val2 = ( - vol[p1[2]][p1[1]][p1[0]], - vol[p2[2]][p2[1]][p2[0]], - ) - point = None - if abs(isolevel - val1) < EPS: - point = p1 - elif abs(isolevel - val2) < EPS: - point = p2 - elif abs(val1 - val2) < EPS: - point = p1 - - if point is None: - mu = (isolevel - val1) / (val2 - val1) - x1, y1, z1 = p1 - x2, y2, z2 = p2 - x = x1 + mu * (x2 - x1) - y = y1 + mu * (y2 - y1) - z = z1 + mu * (z2 - z1) - else: - x, y, z = point - return [x, y, z] - - -def marching_cubes_naive( - vol_batch: torch.Tensor, - isolevel: Optional[float] = None, - return_local_coords: bool = True, -) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: - """ - Runs the classic marching cubes algorithm, iterating over - the coordinates of the volume and using a given isolevel - for determining intersected edges of cubes. - Returns vertices and faces of the obtained mesh. - This operation is non-differentiable. - - Args: - vol_batch: a Tensor of size (N, D, H, W) corresponding to - a batch of 3D scalar fields - isolevel: the isosurface value to use as the threshold to determine - whether points are within a volume. If None, then the average of the - maximum and minimum value of the scalar field will be used. - return_local_coords: bool. If True the output vertices will be in local coordinates in - the range [-1, 1] x [-1, 1] x [-1, 1]. If False they will be in the range - [0, W-1] x [0, H-1] x [0, D-1] - Returns: - verts: [{V_0}, {V_1}, ...] List of N sets of vertices of shape (|V_i|, 3) in FloatTensor - faces: [{F_0}, {F_1}, ...] List of N sets of faces of shape (|F_i|, 3) in LongTensors - """ - batched_verts, batched_faces = [], [] - D, H, W = vol_batch.shape[1:] - - # each edge is represented with its two endpoints (represented with global id) - for i in range(len(vol_batch)): - vol = vol_batch[i] - thresh = ((vol.max() + vol.min()) / 2).item() if isolevel is None else isolevel - vpair_to_edge = {} # maps from tuple of edge endpoints to edge_id - edge_id_to_v = {} # maps from edge ID to vertex position - uniq_edge_id = {} # unique edge IDs - verts = [] # store vertex positions - faces = [] # store face indices - # enumerate each cell in the 3d grid - for z in range(0, D - 1): - for y in range(0, H - 1): - for x in range(0, W - 1): - cube = Cube((x, y, z), vol, thresh) - edge_indices = FACE_TABLE[cube.cube_index] - # cube is entirely in/out of the surface - if len(edge_indices) == 0: - continue - - # gather mesh vertices/faces by processing each cube - interp_points = [[0.0, 0.0, 0.0]] * 12 - # triangle vertex IDs and positions - tri = [] - ps = [] - for i, edge in enumerate(edge_indices): - interp_points[edge] = cube.vert_interp(thresh, edge, vol) - - # Bind interpolated vertex with a global edge_id, which - # is represented by a pair of vertex ids (v1_id, v2_id) - # corresponding to a local edge. - (v1_id, v2_id) = cube.get_vpair_from_edge(edge, W, H) - edge_id = vpair_to_edge.setdefault( - (v1_id, v2_id), len(vpair_to_edge) - ) - tri.append(edge_id) - ps.append(interp_points[edge]) - # when the isolevel are the same as the edge endpoints, the interploated - # vertices can share the same values, and lead to degenerate triangles. - if ( - (i + 1) % 3 == 0 - and ps[0] != ps[1] - and ps[1] != ps[2] - and ps[2] != ps[0] - ): - for j, edge_id in enumerate(tri): - edge_id_to_v[edge_id] = ps[j] - if edge_id not in uniq_edge_id: - uniq_edge_id[edge_id] = len(verts) - verts.append(edge_id_to_v[edge_id]) - faces.append([uniq_edge_id[tri[j]] for j in range(3)]) - tri = [] - ps = [] - - if len(faces) > 0 and len(verts) > 0: - verts = torch.tensor(verts, dtype=vol.dtype) - # Convert from world coordinates ([0, D-1], [0, H-1], [0, W-1]) to - # local coordinates in the range [-1, 1] - if return_local_coords: - verts = ( - Translate(x=+1.0, y=+1.0, z=+1.0, device=vol_batch.device) - .scale((vol_batch.new_tensor([W, H, D])[None] - 1) * 0.5) - .inverse() - ).transform_points(verts[None])[0] - batched_verts.append(verts) - batched_faces.append(torch.tensor(faces, dtype=torch.int64)) - else: - batched_verts.append([]) - batched_faces.append([]) - return batched_verts, batched_faces - - -######################################## -# Marching Cubes Implementation in C++/Cuda -######################################## -class _marching_cubes(Function): - """ - Torch Function wrapper for marching_cubes implementation. - This function is not differentiable. An autograd wrapper is used - to ensure an error if user tries to get gradients. - """ - - @staticmethod - def forward(ctx, vol, isolevel): - verts, faces, ids = _C.marching_cubes(vol, isolevel) - return verts, faces, ids - - @staticmethod - def backward(ctx, grad_verts, grad_faces): - raise ValueError("marching_cubes backward is not supported") - - -def marching_cubes( - vol_batch: torch.Tensor, - isolevel: Optional[float] = None, - return_local_coords: bool = True, -) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: - """ - Run marching cubes over a volume scalar field with a designated isolevel. - Returns vertices and faces of the obtained mesh. - This operation is non-differentiable. - - Args: - vol_batch: a Tensor of size (N, D, H, W) corresponding to - a batch of 3D scalar fields - isolevel: float used as threshold to determine if a point is inside/outside - the volume. If None, then the average of the maximum and minimum value - of the scalar field is used. - return_local_coords: bool. If True the output vertices will be in local coordinates in - the range [-1, 1] x [-1, 1] x [-1, 1]. If False they will be in the range - [0, W-1] x [0, H-1] x [0, D-1] - - Returns: - verts: [{V_0}, {V_1}, ...] List of N sets of vertices of shape (|V_i|, 3) in FloatTensor - faces: [{F_0}, {F_1}, ...] List of N sets of faces of shape (|F_i|, 3) in LongTensors - """ - batched_verts, batched_faces = [], [] - D, H, W = vol_batch.shape[1:] - for i in range(len(vol_batch)): - vol = vol_batch[i] - thresh = ((vol.max() + vol.min()) / 2).item() if isolevel is None else isolevel - verts, faces, ids = _marching_cubes.apply(vol, thresh) - if len(faces) > 0 and len(verts) > 0: - # Convert from world coordinates ([0, D-1], [0, H-1], [0, W-1]) to - # local coordinates in the range [-1, 1] - if return_local_coords: - verts = ( - Translate(x=+1.0, y=+1.0, z=+1.0, device=vol.device) - .scale((vol.new_tensor([W, H, D])[None] - 1) * 0.5) - .inverse() - ).transform_points(verts[None])[0] - # deduplication for cuda - if vol.is_cuda: - unique_ids, inverse_idx = torch.unique(ids, return_inverse=True) - verts_ = verts.new_zeros(unique_ids.shape[0], 3) - verts_[inverse_idx] = verts - verts = verts_ - faces = inverse_idx[faces] - batched_verts.append(verts) - batched_faces.append(faces) - else: - batched_verts.append([]) - batched_faces.append([]) - return batched_verts, batched_faces diff --git a/pytorch3d/pytorch3d/ops/marching_cubes_data.py b/pytorch3d/pytorch3d/ops/marching_cubes_data.py deleted file mode 100644 index 802f67da265cca2c1081fdb6e6bd770efd3fdbec..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/marching_cubes_data.py +++ /dev/null @@ -1,289 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -# Maps each edge (by index) to the corresponding cube vertices -EDGE_TO_VERTICES = [ - [0, 1], - [1, 5], - [4, 5], - [0, 4], - [2, 3], - [3, 7], - [6, 7], - [2, 6], - [0, 2], - [1, 3], - [5, 7], - [4, 6], -] - -# A list of lists mapping a cube_index (a given configuration) -# to a list of faces corresponding to that configuration. Each face is represented -# by 3 consecutive numbers. A configuration will at most have 5 faces. -# -# Table taken from http://paulbourke.net/geometry/polygonise/ -FACE_TABLE = [ - [], - [0, 8, 3], - [0, 1, 9], - [1, 8, 3, 9, 8, 1], - [1, 2, 10], - [0, 8, 3, 1, 2, 10], - [9, 2, 10, 0, 2, 9], - [2, 8, 3, 2, 10, 8, 10, 9, 8], - [3, 11, 2], - [0, 11, 2, 8, 11, 0], - [1, 9, 0, 2, 3, 11], - [1, 11, 2, 1, 9, 11, 9, 8, 11], - [3, 10, 1, 11, 10, 3], - [0, 10, 1, 0, 8, 10, 8, 11, 10], - [3, 9, 0, 3, 11, 9, 11, 10, 9], - [9, 8, 10, 10, 8, 11], - [4, 7, 8], - [4, 3, 0, 7, 3, 4], - [0, 1, 9, 8, 4, 7], - [4, 1, 9, 4, 7, 1, 7, 3, 1], - [1, 2, 10, 8, 4, 7], - [3, 4, 7, 3, 0, 4, 1, 2, 10], - [9, 2, 10, 9, 0, 2, 8, 4, 7], - [2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4], - [8, 4, 7, 3, 11, 2], - [11, 4, 7, 11, 2, 4, 2, 0, 4], - [9, 0, 1, 8, 4, 7, 2, 3, 11], - [4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1], - [3, 10, 1, 3, 11, 10, 7, 8, 4], - [1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4], - [4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3], - [4, 7, 11, 4, 11, 9, 9, 11, 10], - [9, 5, 4], - [9, 5, 4, 0, 8, 3], - [0, 5, 4, 1, 5, 0], - [8, 5, 4, 8, 3, 5, 3, 1, 5], - [1, 2, 10, 9, 5, 4], - [3, 0, 8, 1, 2, 10, 4, 9, 5], - [5, 2, 10, 5, 4, 2, 4, 0, 2], - [2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8], - [9, 5, 4, 2, 3, 11], - [0, 11, 2, 0, 8, 11, 4, 9, 5], - [0, 5, 4, 0, 1, 5, 2, 3, 11], - [2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5], - [10, 3, 11, 10, 1, 3, 9, 5, 4], - [4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10], - [5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3], - [5, 4, 8, 5, 8, 10, 10, 8, 11], - [9, 7, 8, 5, 7, 9], - [9, 3, 0, 9, 5, 3, 5, 7, 3], - [0, 7, 8, 0, 1, 7, 1, 5, 7], - [1, 5, 3, 3, 5, 7], - [9, 7, 8, 9, 5, 7, 10, 1, 2], - [10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3], - [8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2], - [2, 10, 5, 2, 5, 3, 3, 5, 7], - [7, 9, 5, 7, 8, 9, 3, 11, 2], - [9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11], - [2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7], - [11, 2, 1, 11, 1, 7, 7, 1, 5], - [9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11], - [5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0], - [11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0], - [11, 10, 5, 7, 11, 5], - [10, 6, 5], - [0, 8, 3, 5, 10, 6], - [9, 0, 1, 5, 10, 6], - [1, 8, 3, 1, 9, 8, 5, 10, 6], - [1, 6, 5, 2, 6, 1], - [1, 6, 5, 1, 2, 6, 3, 0, 8], - [9, 6, 5, 9, 0, 6, 0, 2, 6], - [5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8], - [2, 3, 11, 10, 6, 5], - [11, 0, 8, 11, 2, 0, 10, 6, 5], - [0, 1, 9, 2, 3, 11, 5, 10, 6], - [5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11], - [6, 3, 11, 6, 5, 3, 5, 1, 3], - [0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6], - [3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9], - [6, 5, 9, 6, 9, 11, 11, 9, 8], - [5, 10, 6, 4, 7, 8], - [4, 3, 0, 4, 7, 3, 6, 5, 10], - [1, 9, 0, 5, 10, 6, 8, 4, 7], - [10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4], - [6, 1, 2, 6, 5, 1, 4, 7, 8], - [1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7], - [8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6], - [7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9], - [3, 11, 2, 7, 8, 4, 10, 6, 5], - [5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11], - [0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6], - [9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6], - [8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6], - [5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11], - [0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7], - [6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9], - [10, 4, 9, 6, 4, 10], - [4, 10, 6, 4, 9, 10, 0, 8, 3], - [10, 0, 1, 10, 6, 0, 6, 4, 0], - [8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10], - [1, 4, 9, 1, 2, 4, 2, 6, 4], - [3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4], - [0, 2, 4, 4, 2, 6], - [8, 3, 2, 8, 2, 4, 4, 2, 6], - [10, 4, 9, 10, 6, 4, 11, 2, 3], - [0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6], - [3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10], - [6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1], - [9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3], - [8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1], - [3, 11, 6, 3, 6, 0, 0, 6, 4], - [6, 4, 8, 11, 6, 8], - [7, 10, 6, 7, 8, 10, 8, 9, 10], - [0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10], - [10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0], - [10, 6, 7, 10, 7, 1, 1, 7, 3], - [1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7], - [2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9], - [7, 8, 0, 7, 0, 6, 6, 0, 2], - [7, 3, 2, 6, 7, 2], - [2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7], - [2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7], - [1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11], - [11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1], - [8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6], - [0, 9, 1, 11, 6, 7], - [7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0], - [7, 11, 6], - [7, 6, 11], - [3, 0, 8, 11, 7, 6], - [0, 1, 9, 11, 7, 6], - [8, 1, 9, 8, 3, 1, 11, 7, 6], - [10, 1, 2, 6, 11, 7], - [1, 2, 10, 3, 0, 8, 6, 11, 7], - [2, 9, 0, 2, 10, 9, 6, 11, 7], - [6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8], - [7, 2, 3, 6, 2, 7], - [7, 0, 8, 7, 6, 0, 6, 2, 0], - [2, 7, 6, 2, 3, 7, 0, 1, 9], - [1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6], - [10, 7, 6, 10, 1, 7, 1, 3, 7], - [10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8], - [0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7], - [7, 6, 10, 7, 10, 8, 8, 10, 9], - [6, 8, 4, 11, 8, 6], - [3, 6, 11, 3, 0, 6, 0, 4, 6], - [8, 6, 11, 8, 4, 6, 9, 0, 1], - [9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6], - [6, 8, 4, 6, 11, 8, 2, 10, 1], - [1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6], - [4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9], - [10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3], - [8, 2, 3, 8, 4, 2, 4, 6, 2], - [0, 4, 2, 4, 6, 2], - [1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8], - [1, 9, 4, 1, 4, 2, 2, 4, 6], - [8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1], - [10, 1, 0, 10, 0, 6, 6, 0, 4], - [4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3], - [10, 9, 4, 6, 10, 4], - [4, 9, 5, 7, 6, 11], - [0, 8, 3, 4, 9, 5, 11, 7, 6], - [5, 0, 1, 5, 4, 0, 7, 6, 11], - [11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5], - [9, 5, 4, 10, 1, 2, 7, 6, 11], - [6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5], - [7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2], - [3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6], - [7, 2, 3, 7, 6, 2, 5, 4, 9], - [9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7], - [3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0], - [6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8], - [9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7], - [1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4], - [4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10], - [7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10], - [6, 9, 5, 6, 11, 9, 11, 8, 9], - [3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5], - [0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11], - [6, 11, 3, 6, 3, 5, 5, 3, 1], - [1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6], - [0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10], - [11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5], - [6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3], - [5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2], - [9, 5, 6, 9, 6, 0, 0, 6, 2], - [1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8], - [1, 5, 6, 2, 1, 6], - [1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6], - [10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0], - [0, 3, 8, 5, 6, 10], - [10, 5, 6], - [11, 5, 10, 7, 5, 11], - [11, 5, 10, 11, 7, 5, 8, 3, 0], - [5, 11, 7, 5, 10, 11, 1, 9, 0], - [10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1], - [11, 1, 2, 11, 7, 1, 7, 5, 1], - [0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11], - [9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7], - [7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2], - [2, 5, 10, 2, 3, 5, 3, 7, 5], - [8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5], - [9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2], - [9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2], - [1, 3, 5, 3, 7, 5], - [0, 8, 7, 0, 7, 1, 1, 7, 5], - [9, 0, 3, 9, 3, 5, 5, 3, 7], - [9, 8, 7, 5, 9, 7], - [5, 8, 4, 5, 10, 8, 10, 11, 8], - [5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0], - [0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5], - [10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4], - [2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8], - [0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11], - [0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5], - [9, 4, 5, 2, 11, 3], - [2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4], - [5, 10, 2, 5, 2, 4, 4, 2, 0], - [3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9], - [5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2], - [8, 4, 5, 8, 5, 3, 3, 5, 1], - [0, 4, 5, 1, 0, 5], - [8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5], - [9, 4, 5], - [4, 11, 7, 4, 9, 11, 9, 10, 11], - [0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11], - [1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11], - [3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4], - [4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2], - [9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3], - [11, 7, 4, 11, 4, 2, 2, 4, 0], - [11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4], - [2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9], - [9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7], - [3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10], - [1, 10, 2, 8, 7, 4], - [4, 9, 1, 4, 1, 7, 7, 1, 3], - [4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1], - [4, 0, 3, 7, 4, 3], - [4, 8, 7], - [9, 10, 8, 10, 11, 8], - [3, 0, 9, 3, 9, 11, 11, 9, 10], - [0, 1, 10, 0, 10, 8, 8, 10, 11], - [3, 1, 10, 11, 3, 10], - [1, 2, 11, 1, 11, 9, 9, 11, 8], - [3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9], - [0, 2, 11, 8, 0, 11], - [3, 2, 11], - [2, 3, 8, 2, 8, 10, 10, 8, 9], - [9, 10, 2, 0, 9, 2], - [2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8], - [1, 10, 2], - [1, 3, 8, 9, 1, 8], - [0, 9, 1], - [0, 3, 8], - [], -] - -# mapping from 0-7 to v0-v7 in cube.vertices -INDEX = [0, 1, 5, 4, 2, 3, 7, 6] diff --git a/pytorch3d/pytorch3d/ops/mesh_face_areas_normals.py b/pytorch3d/pytorch3d/ops/mesh_face_areas_normals.py deleted file mode 100644 index f41ff1dcbe1370c59009f0c1426ab18ad50d2b12..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/mesh_face_areas_normals.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch -from pytorch3d import _C -from torch.autograd import Function -from torch.autograd.function import once_differentiable - - -class _MeshFaceAreasNormals(Function): - """ - Torch autograd Function wrapper for face areas & normals C++/CUDA implementations. - """ - - @staticmethod - def forward(ctx, verts, faces): - """ - Args: - ctx: Context object used to calculate gradients. - verts: FloatTensor of shape (V, 3), representing the packed - batch verts tensor. - faces: LongTensor of shape (F, 3), representing the packed - batch faces tensor - Returns: - areas: FloatTensor of shape (F,) with the areas of each face - normals: FloatTensor of shape (F,3) with the normals of each face - """ - if not (verts.dim() == 2): - raise ValueError("verts need to be of shape Vx3.") - if not (verts.shape[1] == 3): - raise ValueError("verts need to be of shape Vx3.") - if not (faces.dim() == 2): - raise ValueError("faces need to be of shape Fx3.") - if not (faces.shape[1] == 3): - raise ValueError("faces need to be of shape Fx3.") - if not (faces.dtype == torch.int64): - raise ValueError("faces need to be of type torch.int64.") - # TODO(gkioxari) Change cast to floats once we add support for doubles. - if not (verts.dtype == torch.float32): - verts = verts.float() - - ctx.save_for_backward(verts, faces) - areas, normals = _C.face_areas_normals_forward(verts, faces) - return areas, normals - - @staticmethod - @once_differentiable - def backward(ctx, grad_areas, grad_normals): - grad_areas = grad_areas.contiguous() - grad_normals = grad_normals.contiguous() - verts, faces = ctx.saved_tensors - # TODO(gkioxari) Change cast to floats once we add support for doubles. - if not (grad_areas.dtype == torch.float32): - grad_areas = grad_areas.float() - if not (grad_normals.dtype == torch.float32): - grad_normals = grad_normals.float() - grad_verts = _C.face_areas_normals_backward( - grad_areas, grad_normals, verts, faces - ) - return grad_verts, None - - -mesh_face_areas_normals = _MeshFaceAreasNormals.apply diff --git a/pytorch3d/pytorch3d/ops/mesh_filtering.py b/pytorch3d/pytorch3d/ops/mesh_filtering.py deleted file mode 100644 index 90cf1211c52d06776d6ad2e8804645e363da57e9..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/mesh_filtering.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch -from pytorch3d.ops import norm_laplacian -from pytorch3d.structures import Meshes, utils as struct_utils - - -# ------------------------ Mesh Smoothing ------------------------ # -# This file contains differentiable operators to filter meshes -# The ops include -# 1) Taubin Smoothing -# TODO(gkioxari) add more! :) -# ---------------------------------------------------------------- # - - -# ----------------------- Taubin Smoothing ----------------------- # - - -def taubin_smoothing( - meshes: Meshes, lambd: float = 0.53, mu: float = -0.53, num_iter: int = 10 -) -> Meshes: - """ - Taubin smoothing [1] is an iterative smoothing operator for meshes. - At each iteration - verts := (1 - Ξ») * verts + Ξ» * L * verts - verts := (1 - ΞΌ) * verts + ΞΌ * L * verts - - This function returns a new mesh with smoothed vertices. - Args: - meshes: Meshes input to be smoothed - lambd, mu: float parameters for Taubin smoothing, - lambd > 0, mu < 0 - num_iter: number of iterations to execute smoothing - Returns: - mesh: Smoothed input Meshes - - [1] Curve and Surface Smoothing without Shrinkage, - Gabriel Taubin, ICCV 1997 - """ - verts = meshes.verts_packed() # V x 3 - edges = meshes.edges_packed() # E x 3 - - for _ in range(num_iter): - L = norm_laplacian(verts, edges) - total_weight = torch.sparse.sum(L, dim=1).to_dense().view(-1, 1) - verts = (1 - lambd) * verts + lambd * torch.mm(L, verts) / total_weight - - L = norm_laplacian(verts, edges) - total_weight = torch.sparse.sum(L, dim=1).to_dense().view(-1, 1) - verts = (1 - mu) * verts + mu * torch.mm(L, verts) / total_weight - - verts_list = struct_utils.packed_to_list( - verts, meshes.num_verts_per_mesh().tolist() - ) - mesh = Meshes(verts=list(verts_list), faces=meshes.faces_list()) - return mesh diff --git a/pytorch3d/pytorch3d/ops/packed_to_padded.py b/pytorch3d/pytorch3d/ops/packed_to_padded.py deleted file mode 100644 index 5072e6245ea01003c1e30a218b7f56097dc0dbca..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/packed_to_padded.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch -from pytorch3d import _C -from torch.autograd import Function -from torch.autograd.function import once_differentiable - - -class _PackedToPadded(Function): - """ - Torch autograd Function wrapper for packed_to_padded C++/CUDA implementations. - """ - - @staticmethod - def forward(ctx, inputs, first_idxs, max_size): - """ - Args: - ctx: Context object used to calculate gradients. - inputs: FloatTensor of shape (F, D), representing the packed batch tensor. - e.g. areas for faces in a batch of meshes. - first_idxs: LongTensor of shape (N,) where N is the number of - elements in the batch and `first_idxs[i] = f` - means that the inputs for batch element i begin at `inputs[f]`. - max_size: Max length of an element in the batch. - - Returns: - inputs_padded: FloatTensor of shape (N, max_size, D) where max_size is max - of `sizes`. The values for batch element i which start at - `inputs[first_idxs[i]]` will be copied to `inputs_padded[i, :]`, - with zeros padding out the extra inputs. - """ - if not (inputs.dim() == 2): - raise ValueError("input can only be 2-dimensional.") - if not (first_idxs.dim() == 1): - raise ValueError("first_idxs can only be 1-dimensional.") - if not (inputs.dtype == torch.float32): - raise ValueError("input has to be of type torch.float32.") - if not (first_idxs.dtype == torch.int64): - raise ValueError("first_idxs has to be of type torch.int64.") - if not isinstance(max_size, int): - raise ValueError("max_size has to be int.") - - ctx.save_for_backward(first_idxs) - ctx.num_inputs = int(inputs.shape[0]) - inputs, first_idxs = inputs.contiguous(), first_idxs.contiguous() - inputs_padded = _C.packed_to_padded(inputs, first_idxs, max_size) - return inputs_padded - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - grad_output = grad_output.contiguous() - first_idxs = ctx.saved_tensors[0] - num_inputs = ctx.num_inputs - grad_input = _C.padded_to_packed(grad_output, first_idxs, num_inputs) - return grad_input, None, None - - -def packed_to_padded( - inputs: torch.Tensor, first_idxs: torch.LongTensor, max_size: int -) -> torch.Tensor: - """ - Torch wrapper that handles allowed input shapes. See description below. - - Args: - inputs: FloatTensor of shape (F,) or (F, ...), representing the packed - batch tensor, e.g. areas for faces in a batch of meshes. - first_idxs: LongTensor of shape (N,) where N is the number of - elements in the batch and `first_idxs[i] = f` - means that the inputs for batch element i begin at `inputs[f]`. - max_size: Max length of an element in the batch. - - Returns: - inputs_padded: FloatTensor of shape (N, max_size) or (N, max_size, ...) - where max_size is max of `sizes`. The values for batch element i - which start at `inputs[first_idxs[i]]` will be copied to - `inputs_padded[i, :]`, with zeros padding out the extra inputs. - - To handle the allowed input shapes, we convert the inputs tensor of shape - (F,) to (F, 1). We reshape the output back to (N, max_size) from - (N, max_size, 1). - """ - # if inputs is of shape (F,), reshape into (F, 1) - input_shape = inputs.shape - n_dims = inputs.dim() - if n_dims == 1: - inputs = inputs.unsqueeze(1) - else: - inputs = inputs.reshape(input_shape[0], -1) - inputs_padded = _PackedToPadded.apply(inputs, first_idxs, max_size) - # if flat is True, reshape output to (N, max_size) from (N, max_size, 1) - # else reshape output to (N, max_size, ...) - if n_dims == 1: - return inputs_padded.squeeze(2) - if n_dims == 2: - return inputs_padded - return inputs_padded.view(*inputs_padded.shape[:2], *input_shape[1:]) - - -class _PaddedToPacked(Function): - """ - Torch autograd Function wrapper for padded_to_packed C++/CUDA implementations. - """ - - @staticmethod - def forward(ctx, inputs, first_idxs, num_inputs): - """ - Args: - ctx: Context object used to calculate gradients. - inputs: FloatTensor of shape (N, max_size, D), representing - the padded tensor, e.g. areas for faces in a batch of meshes. - first_idxs: LongTensor of shape (N,) where N is the number of - elements in the batch and `first_idxs[i] = f` - means that the inputs for batch element i begin at `inputs_packed[f]`. - num_inputs: Number of packed entries (= F) - - Returns: - inputs_packed: FloatTensor of shape (F, D) where - `inputs_packed[first_idx[i]:] = inputs[i, :]`. - """ - if not (inputs.dim() == 3): - raise ValueError("input can only be 3-dimensional.") - if not (first_idxs.dim() == 1): - raise ValueError("first_idxs can only be 1-dimensional.") - if not (inputs.dtype == torch.float32): - raise ValueError("input has to be of type torch.float32.") - if not (first_idxs.dtype == torch.int64): - raise ValueError("first_idxs has to be of type torch.int64.") - if not isinstance(num_inputs, int): - raise ValueError("max_size has to be int.") - - ctx.save_for_backward(first_idxs) - ctx.max_size = inputs.shape[1] - inputs, first_idxs = inputs.contiguous(), first_idxs.contiguous() - inputs_packed = _C.padded_to_packed(inputs, first_idxs, num_inputs) - return inputs_packed - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - grad_output = grad_output.contiguous() - first_idxs = ctx.saved_tensors[0] - max_size = ctx.max_size - grad_input = _C.packed_to_padded(grad_output, first_idxs, max_size) - return grad_input, None, None - - -def padded_to_packed( - inputs: torch.Tensor, - first_idxs: torch.LongTensor, - num_inputs: int, - max_size_dim: int = 1, -) -> torch.Tensor: - """ - Torch wrapper that handles allowed input shapes. See description below. - - Args: - inputs: FloatTensor of shape (N, ..., max_size) or (N, ..., max_size, ...), - representing the padded tensor, e.g. areas for faces in a batch of - meshes, where max_size occurs on max_size_dim-th position. - first_idxs: LongTensor of shape (N,) where N is the number of - elements in the batch and `first_idxs[i] = f` - means that the inputs for batch element i begin at `inputs_packed[f]`. - num_inputs: Number of packed entries (= F) - max_size_dim: the dimension to be packed - - Returns: - inputs_packed: FloatTensor of shape (F,) or (F, ...) where - `inputs_packed[first_idx[i]:first_idx[i+1]] = inputs[i, ..., :delta[i]]`, - where `delta[i] = first_idx[i+1] - first_idx[i]`. - - To handle the allowed input shapes, we convert the inputs tensor of shape - (N, max_size) to (N, max_size, 1). We reshape the output back to (F,) from - (F, 1). - """ - n_dims = inputs.dim() - # move the variable dim to position 1 - inputs = inputs.movedim(max_size_dim, 1) - - # if inputs is of shape (N, max_size), reshape into (N, max_size, 1)) - input_shape = inputs.shape - if n_dims == 2: - inputs = inputs.unsqueeze(2) - else: - inputs = inputs.reshape(*input_shape[:2], -1) - inputs_packed = _PaddedToPacked.apply(inputs, first_idxs, num_inputs) - # if input is flat, reshape output to (F,) from (F, 1) - # else reshape output to (F, ...) - if n_dims == 2: - return inputs_packed.squeeze(1) - - return inputs_packed.view(-1, *input_shape[2:]) diff --git a/pytorch3d/pytorch3d/ops/perspective_n_points.py b/pytorch3d/pytorch3d/ops/perspective_n_points.py deleted file mode 100644 index c6b7d6816660b190e87cd16459abae4501f54558..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/perspective_n_points.py +++ /dev/null @@ -1,410 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -""" -This file contains Efficient PnP algorithm for Perspective-n-Points problem. -It finds a camera position (defined by rotation `R` and translation `T`) that -minimizes re-projection error between the given 3D points `x` and -the corresponding uncalibrated 2D points `y`. -""" - -import warnings -from typing import NamedTuple, Optional - -import torch -import torch.nn.functional as F -from pytorch3d.ops import points_alignment, utils as oputil - - -class EpnpSolution(NamedTuple): - x_cam: torch.Tensor - R: torch.Tensor - T: torch.Tensor - err_2d: torch.Tensor - err_3d: torch.Tensor - - -def _define_control_points(x, weight, storage_opts=None): - """ - Returns control points that define barycentric coordinates - Args: - x: Batch of 3-dimensional points of shape `(minibatch, num_points, 3)`. - weight: Batch of non-negative weights of - shape `(minibatch, num_point)`. `None` means equal weights. - storage_opts: dict of keyword arguments to the tensor constructor. - """ - storage_opts = storage_opts or {} - x_mean = oputil.wmean(x, weight) - c_world = F.pad(torch.eye(3, **storage_opts), (0, 0, 0, 1), value=0.0).expand_as( - x[:, :4, :] - ) - return c_world + x_mean - - -def _compute_alphas(x, c_world): - """ - Computes barycentric coordinates of x in the frame c_world. - Args: - x: Batch of 3-dimensional points of shape `(minibatch, num_points, 3)`. - c_world: control points in world coordinates. - """ - x = F.pad(x, (0, 1), value=1.0) - c = F.pad(c_world, (0, 1), value=1.0) - return torch.matmul(x, torch.inverse(c)) # B x N x 4 - - -def _build_M(y, alphas, weight): - """Returns the matrix defining the reprojection equations. - Args: - y: projected points in camera coordinates of size B x N x 2 - alphas: barycentric coordinates of size B x N x 4 - weight: Batch of non-negative weights of - shape `(minibatch, num_point)`. `None` means equal weights. - """ - bs, n, _ = y.size() - - # prepend t with the column of v's - def prepad(t, v): - return F.pad(t, (1, 0), value=v) - - if weight is not None: - # weight the alphas in order to get a correctly weighted version of M - alphas = alphas * weight[:, :, None] - - # outer left-multiply by alphas - def lm_alphas(t): - return torch.matmul(alphas[..., None], t).reshape(bs, n, 12) - - M = torch.cat( - ( - lm_alphas( - prepad(prepad(-y[:, :, 0, None, None], 0.0), 1.0) - ), # u constraints - lm_alphas( - prepad(prepad(-y[:, :, 1, None, None], 1.0), 0.0) - ), # v constraints - ), - dim=-1, - ).reshape(bs, -1, 12) - - return M - - -def _null_space(m, kernel_dim): - """Finds the null space (kernel) basis of the matrix - Args: - m: the batch of input matrices, B x N x 12 - kernel_dim: number of dimensions to approximate the kernel - Returns: - * a batch of null space basis vectors - of size B x 4 x 3 x kernel_dim - * a batch of spectral values where near-0s correspond to actual - kernel vectors, of size B x kernel_dim - """ - mTm = torch.bmm(m.transpose(1, 2), m) - s, v = torch.linalg.eigh(mTm) - return v[:, :, :kernel_dim].reshape(-1, 4, 3, kernel_dim), s[:, :kernel_dim] - - -def _reproj_error(y_hat, y, weight, eps=1e-9): - """Projects estimated 3D points and computes the reprojection error - Args: - y_hat: a batch of predicted 2D points in homogeneous coordinates - y: a batch of ground-truth 2D points - weight: Batch of non-negative weights of - shape `(minibatch, num_point)`. `None` means equal weights. - Returns: - Optionally weighted RMSE of difference between y and y_hat. - """ - y_hat = y_hat / torch.clamp(y_hat[..., 2:], eps) - dist = ((y - y_hat[..., :2]) ** 2).sum(dim=-1, keepdim=True) ** 0.5 - return oputil.wmean(dist, weight)[:, 0, 0] - - -def _algebraic_error(x_w_rotated, x_cam, weight): - """Computes the residual of Umeyama in 3D. - Args: - x_w_rotated: The given 3D points rotated with the predicted camera. - x_cam: the lifted 2D points y - weight: Batch of non-negative weights of - shape `(minibatch, num_point)`. `None` means equal weights. - Returns: - Optionally weighted MSE of difference between x_w_rotated and x_cam. - """ - dist = ((x_w_rotated - x_cam) ** 2).sum(dim=-1, keepdim=True) - return oputil.wmean(dist, weight)[:, 0, 0] - - -def _compute_norm_sign_scaling_factor(c_cam, alphas, x_world, y, weight, eps=1e-9): - """Given a solution, adjusts the scale and flip - Args: - c_cam: control points in camera coordinates - alphas: barycentric coordinates of the points - x_world: Batch of 3-dimensional points of shape `(minibatch, num_points, 3)`. - y: Batch of 2-dimensional points of shape `(minibatch, num_points, 2)`. - weights: Batch of non-negative weights of - shape `(minibatch, num_point)`. `None` means equal weights. - eps: epsilon to threshold negative `z` values - """ - # position of reference points in camera coordinates - x_cam = torch.matmul(alphas, c_cam) - - x_cam = x_cam * (1.0 - 2.0 * (oputil.wmean(x_cam[..., 2:], weight) < 0).float()) - if torch.any(x_cam[..., 2:] < -eps): - neg_rate = oputil.wmean((x_cam[..., 2:] < 0).float(), weight, dim=(0, 1)).item() - warnings.warn("\nEPnP: %2.2f%% points have z<0." % (neg_rate * 100.0)) - - R, T, s = points_alignment.corresponding_points_alignment( - x_world, x_cam, weight, estimate_scale=True - ) - s = s.clamp(eps) - x_cam = x_cam / s[:, None, None] - T = T / s[:, None] - x_w_rotated = torch.matmul(x_world, R) + T[:, None, :] - err_2d = _reproj_error(x_w_rotated, y, weight) - err_3d = _algebraic_error(x_w_rotated, x_cam, weight) - - return EpnpSolution(x_cam, R, T, err_2d, err_3d) - - -def _gen_pairs(input, dim=-2, reducer=lambda a, b: ((a - b) ** 2).sum(dim=-1)): - """Generates all pairs of different rows and then applies the reducer - Args: - input: a tensor - dim: a dimension to generate pairs across - reducer: a function of generated pair of rows to apply (beyond just concat) - Returns: - for default args, for A x B x C input, will output A x (B choose 2) - """ - n = input.size()[dim] - range = torch.arange(n) - idx = torch.combinations(range).to(input).long() - left = input.index_select(dim, idx[:, 0]) - right = input.index_select(dim, idx[:, 1]) - return reducer(left, right) - - -def _kernel_vec_distances(v): - """Computes the coefficients for linearization of the quadratic system - to match all pairwise distances between 4 control points (dim=1). - The last dimension corresponds to the coefficients for quadratic terms - Bij = Bi * Bj, where Bi and Bj correspond to kernel vectors. - Arg: - v: tensor of B x 4 x 3 x D, where D is dim(kernel), usually 4 - Returns: - a tensor of B x 6 x [(D choose 2) + D]; - for D=4, the last dim means [B11 B22 B33 B44 B12 B13 B14 B23 B24 B34]. - """ - dv = _gen_pairs(v, dim=-3, reducer=lambda a, b: a - b) # B x 6 x 3 x D - - # we should take dot-product of all (i,j), i < j, with coeff 2 - rows_2ij = 2.0 * _gen_pairs(dv, dim=-1, reducer=lambda a, b: (a * b).sum(dim=-2)) - # this should produce B x 6 x (D choose 2) tensor - - # we should take dot-product of all (i,i) - rows_ii = (dv**2).sum(dim=-2) - # this should produce B x 6 x D tensor - - return torch.cat((rows_ii, rows_2ij), dim=-1) - - -def _solve_lstsq_subcols(rhs, lhs, lhs_col_idx): - """Solves an over-determined linear system for selected LHS columns. - A batched version of `torch.lstsq`. - Args: - rhs: right-hand side vectors - lhs: left-hand side matrices - lhs_col_idx: a slice of columns in lhs - Returns: - a least-squares solution for lhs * X = rhs - """ - lhs = lhs.index_select(-1, torch.tensor(lhs_col_idx, device=lhs.device).long()) - return torch.matmul(torch.pinverse(lhs), rhs[:, :, None]) - - -def _binary_sign(t): - return (t >= 0).to(t) * 2.0 - 1.0 - - -def _find_null_space_coords_1(kernel_dsts, cw_dst, eps=1e-9): - """Solves case 1 from the paper [1]; solve for 4 coefficients: - [B11 B22 B33 B44 B12 B13 B14 B23 B24 B34] - ^ ^ ^ ^ - Args: - kernel_dsts: distances between kernel vectors - cw_dst: distances between control points - Returns: - coefficients to weight kernel vectors - [1] Moreno-Noguer, F., Lepetit, V., & Fua, P. (2009). - EPnP: An Accurate O(n) solution to the PnP problem. - International Journal of Computer Vision. - https://www.epfl.ch/labs/cvlab/software/multi-view-stereo/epnp/ - """ - beta = _solve_lstsq_subcols(cw_dst, kernel_dsts, [0, 4, 5, 6]) - - beta = beta * _binary_sign(beta[:, :1, :]) - return beta / torch.clamp(beta[:, :1, :] ** 0.5, eps) - - -def _find_null_space_coords_2(kernel_dsts, cw_dst): - """Solves case 2 from the paper; solve for 3 coefficients: - [B11 B22 B33 B44 B12 B13 B14 B23 B24 B34] - ^ ^ ^ - Args: - kernel_dsts: distances between kernel vectors - cw_dst: distances between control points - Returns: - coefficients to weight kernel vectors - [1] Moreno-Noguer, F., Lepetit, V., & Fua, P. (2009). - EPnP: An Accurate O(n) solution to the PnP problem. - International Journal of Computer Vision. - https://www.epfl.ch/labs/cvlab/software/multi-view-stereo/epnp/ - """ - beta = _solve_lstsq_subcols(cw_dst, kernel_dsts, [0, 4, 1]) - - coord_0 = (beta[:, :1, :].abs() ** 0.5) * _binary_sign(beta[:, 1:2, :]) - coord_1 = (beta[:, 2:3, :].abs() ** 0.5) * ( - (beta[:, :1, :] >= 0) == (beta[:, 2:3, :] >= 0) - ).float() - - return torch.cat((coord_0, coord_1, torch.zeros_like(beta[:, :2, :])), dim=1) - - -def _find_null_space_coords_3(kernel_dsts, cw_dst, eps=1e-9): - """Solves case 3 from the paper; solve for 5 coefficients: - [B11 B22 B33 B44 B12 B13 B14 B23 B24 B34] - ^ ^ ^ ^ ^ - Args: - kernel_dsts: distances between kernel vectors - cw_dst: distances between control points - Returns: - coefficients to weight kernel vectors - [1] Moreno-Noguer, F., Lepetit, V., & Fua, P. (2009). - EPnP: An Accurate O(n) solution to the PnP problem. - International Journal of Computer Vision. - https://www.epfl.ch/labs/cvlab/software/multi-view-stereo/epnp/ - """ - beta = _solve_lstsq_subcols(cw_dst, kernel_dsts, [0, 4, 1, 5, 7]) - - coord_0 = (beta[:, :1, :].abs() ** 0.5) * _binary_sign(beta[:, 1:2, :]) - coord_1 = (beta[:, 2:3, :].abs() ** 0.5) * ( - (beta[:, :1, :] >= 0) == (beta[:, 2:3, :] >= 0) - ).float() - coord_2 = beta[:, 3:4, :] / torch.clamp(coord_0[:, :1, :], eps) - - return torch.cat( - (coord_0, coord_1, coord_2, torch.zeros_like(beta[:, :1, :])), dim=1 - ) - - -def efficient_pnp( - x: torch.Tensor, - y: torch.Tensor, - weights: Optional[torch.Tensor] = None, - skip_quadratic_eq: bool = False, -) -> EpnpSolution: - """ - Implements Efficient PnP algorithm [1] for Perspective-n-Points problem: - finds a camera position (defined by rotation `R` and translation `T`) that - minimizes re-projection error between the given 3D points `x` and - the corresponding uncalibrated 2D points `y`, i.e. solves - - `y[i] = Proj(x[i] R[i] + T[i])` - - in the least-squares sense, where `i` are indices within the batch, and - `Proj` is the perspective projection operator: `Proj([x y z]) = [x/z y/z]`. - In the noise-less case, 4 points are enough to find the solution as long - as they are not co-planar. - - Args: - x: Batch of 3-dimensional points of shape `(minibatch, num_points, 3)`. - y: Batch of 2-dimensional points of shape `(minibatch, num_points, 2)`. - weights: Batch of non-negative weights of - shape `(minibatch, num_point)`. `None` means equal weights. - skip_quadratic_eq: If True, assumes the solution space for the - linear system is one-dimensional, i.e. takes the scaled eigenvector - that corresponds to the smallest eigenvalue as a solution. - If False, finds the candidate coordinates in the potentially - 4D null space by approximately solving the systems of quadratic - equations. The best candidate is chosen by examining the 2D - re-projection error. While this option finds a better solution, - especially when the number of points is small or perspective - distortions are low (the points are far away), it may be more - difficult to back-propagate through. - - Returns: - `EpnpSolution` namedtuple containing elements: - **x_cam**: Batch of transformed points `x` that is used to find - the camera parameters, of shape `(minibatch, num_points, 3)`. - In the general (noisy) case, they are not exactly equal to - `x[i] R[i] + T[i]` but are some affine transform of `x[i]`s. - **R**: Batch of rotation matrices of shape `(minibatch, 3, 3)`. - **T**: Batch of translation vectors of shape `(minibatch, 3)`. - **err_2d**: Batch of mean 2D re-projection errors of shape - `(minibatch,)`. Specifically, if `yhat` is the re-projection for - the `i`-th batch element, it returns `sum_j norm(yhat_j - y_j)` - where `j` iterates over points and `norm` denotes the L2 norm. - **err_3d**: Batch of mean algebraic errors of shape `(minibatch,)`. - Specifically, those are squared distances between `x_world` and - estimated points on the rays defined by `y`. - - [1] Moreno-Noguer, F., Lepetit, V., & Fua, P. (2009). - EPnP: An Accurate O(n) solution to the PnP problem. - International Journal of Computer Vision. - https://www.epfl.ch/labs/cvlab/software/multi-view-stereo/epnp/ - """ - # define control points in a world coordinate system (centered on the 3d - # points centroid); 4 x 3 - # TODO: more stable when initialised with the center and eigenvectors! - c_world = _define_control_points( - x.detach(), weights, storage_opts={"dtype": x.dtype, "device": x.device} - ) - - # find the linear combination of the control points to represent the 3d points - alphas = _compute_alphas(x, c_world) - - M = _build_M(y, alphas, weights) - - # Compute kernel M - kernel, spectrum = _null_space(M, 4) - - c_world_distances = _gen_pairs(c_world) - kernel_dsts = _kernel_vec_distances(kernel) - - betas = ( - [] - if skip_quadratic_eq - else [ - fnsc(kernel_dsts, c_world_distances) - for fnsc in [ - _find_null_space_coords_1, - _find_null_space_coords_2, - _find_null_space_coords_3, - ] - ] - ) - - c_cam_variants = [kernel] + [ - torch.matmul(kernel, beta[:, None, :, :]) for beta in betas - ] - - solutions = [ - _compute_norm_sign_scaling_factor(c_cam[..., 0], alphas, x, y, weights) - for c_cam in c_cam_variants - ] - - sol_zipped = EpnpSolution(*(torch.stack(list(col)) for col in zip(*solutions))) - best = torch.argmin(sol_zipped.err_2d, dim=0) - - def gather1d(source, idx): - # reduces the dim=1 by picking the slices in a 1D tensor idx - # in other words, it is batched index_select. - return source.gather( - 0, - idx.reshape(1, -1, *([1] * (len(source.shape) - 2))).expand_as(source[:1]), - )[0] - - return EpnpSolution(*[gather1d(sol_col, best) for sol_col in sol_zipped]) diff --git a/pytorch3d/pytorch3d/ops/points_alignment.py b/pytorch3d/pytorch3d/ops/points_alignment.py deleted file mode 100644 index 1b22b3cccc18350f7cdd2c49f8c345032e2f616d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/points_alignment.py +++ /dev/null @@ -1,389 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import warnings -from typing import List, NamedTuple, Optional, TYPE_CHECKING, Union - -import torch -from pytorch3d.ops import knn_points -from pytorch3d.structures import utils as strutil - -from . import utils as oputil - - -if TYPE_CHECKING: - from pytorch3d.structures.pointclouds import Pointclouds - - -# named tuples for inputs/outputs -class SimilarityTransform(NamedTuple): - R: torch.Tensor - T: torch.Tensor - s: torch.Tensor - - -class ICPSolution(NamedTuple): - converged: bool - rmse: Union[torch.Tensor, None] - Xt: torch.Tensor - RTs: SimilarityTransform - t_history: List[SimilarityTransform] - - -def iterative_closest_point( - X: Union[torch.Tensor, "Pointclouds"], - Y: Union[torch.Tensor, "Pointclouds"], - init_transform: Optional[SimilarityTransform] = None, - max_iterations: int = 100, - relative_rmse_thr: float = 1e-6, - estimate_scale: bool = False, - allow_reflection: bool = False, - verbose: bool = False, -) -> ICPSolution: - """ - Executes the iterative closest point (ICP) algorithm [1, 2] in order to find - a similarity transformation (rotation `R`, translation `T`, and - optionally scale `s`) between two given differently-sized sets of - `d`-dimensional points `X` and `Y`, such that: - - `s[i] X[i] R[i] + T[i] = Y[NN[i]]`, - - for all batch indices `i` in the least squares sense. Here, Y[NN[i]] stands - for the indices of nearest neighbors from `Y` to each point in `X`. - Note, however, that the solution is only a local optimum. - - Args: - **X**: Batch of `d`-dimensional points - of shape `(minibatch, num_points_X, d)` or a `Pointclouds` object. - **Y**: Batch of `d`-dimensional points - of shape `(minibatch, num_points_Y, d)` or a `Pointclouds` object. - **init_transform**: A named-tuple `SimilarityTransform` of tensors - `R`, `T, `s`, where `R` is a batch of orthonormal matrices of - shape `(minibatch, d, d)`, `T` is a batch of translations - of shape `(minibatch, d)` and `s` is a batch of scaling factors - of shape `(minibatch,)`. - **max_iterations**: The maximum number of ICP iterations. - **relative_rmse_thr**: A threshold on the relative root mean squared error - used to terminate the algorithm. - **estimate_scale**: If `True`, also estimates a scaling component `s` - of the transformation. Otherwise assumes the identity - scale and returns a tensor of ones. - **allow_reflection**: If `True`, allows the algorithm to return `R` - which is orthonormal but has determinant==-1. - **verbose**: If `True`, prints status messages during each ICP iteration. - - Returns: - A named tuple `ICPSolution` with the following fields: - **converged**: A boolean flag denoting whether the algorithm converged - successfully (=`True`) or not (=`False`). - **rmse**: Attained root mean squared error after termination of ICP. - **Xt**: The point cloud `X` transformed with the final transformation - (`R`, `T`, `s`). If `X` is a `Pointclouds` object, returns an - instance of `Pointclouds`, otherwise returns `torch.Tensor`. - **RTs**: A named tuple `SimilarityTransform` containing - a batch of similarity transforms with fields: - **R**: Batch of orthonormal matrices of shape `(minibatch, d, d)`. - **T**: Batch of translations of shape `(minibatch, d)`. - **s**: batch of scaling factors of shape `(minibatch, )`. - **t_history**: A list of named tuples `SimilarityTransform` - the transformation parameters after each ICP iteration. - - References: - [1] Besl & McKay: A Method for Registration of 3-D Shapes. TPAMI, 1992. - [2] https://en.wikipedia.org/wiki/Iterative_closest_point - """ - - # make sure we convert input Pointclouds structures to - # padded tensors of shape (N, P, 3) - Xt, num_points_X = oputil.convert_pointclouds_to_tensor(X) - Yt, num_points_Y = oputil.convert_pointclouds_to_tensor(Y) - - b, size_X, dim = Xt.shape - - if (Xt.shape[2] != Yt.shape[2]) or (Xt.shape[0] != Yt.shape[0]): - raise ValueError( - "Point sets X and Y have to have the same " - + "number of batches and data dimensions." - ) - - if ((num_points_Y < Yt.shape[1]).any() or (num_points_X < Xt.shape[1]).any()) and ( - num_points_Y != num_points_X - ).any(): - # we have a heterogeneous input (e.g. because X/Y is - # an instance of Pointclouds) - mask_X = ( - torch.arange(size_X, dtype=torch.int64, device=Xt.device)[None] - < num_points_X[:, None] - ).type_as(Xt) - else: - mask_X = Xt.new_ones(b, size_X) - - # clone the initial point cloud - Xt_init = Xt.clone() - - if init_transform is not None: - # parse the initial transform from the input and apply to Xt - try: - R, T, s = init_transform - assert ( - R.shape == torch.Size((b, dim, dim)) - and T.shape == torch.Size((b, dim)) - and s.shape == torch.Size((b,)) - ) - except Exception: - raise ValueError( - "The initial transformation init_transform has to be " - "a named tuple SimilarityTransform with elements (R, T, s). " - "R are dim x dim orthonormal matrices of shape " - "(minibatch, dim, dim), T is a batch of dim-dimensional " - "translations of shape (minibatch, dim) and s is a batch " - "of scalars of shape (minibatch,)." - ) from None - # apply the init transform to the input point cloud - Xt = _apply_similarity_transform(Xt, R, T, s) - else: - # initialize the transformation with identity - R = oputil.eyes(dim, b, device=Xt.device, dtype=Xt.dtype) - T = Xt.new_zeros((b, dim)) - s = Xt.new_ones(b) - - prev_rmse = None - rmse = None - iteration = -1 - converged = False - - # initialize the transformation history - t_history = [] - - # the main loop over ICP iterations - for iteration in range(max_iterations): - Xt_nn_points = knn_points( - Xt, Yt, lengths1=num_points_X, lengths2=num_points_Y, K=1, return_nn=True - ).knn[:, :, 0, :] - - # get the alignment of the nearest neighbors from Yt with Xt_init - R, T, s = corresponding_points_alignment( - Xt_init, - Xt_nn_points, - weights=mask_X, - estimate_scale=estimate_scale, - allow_reflection=allow_reflection, - ) - - # apply the estimated similarity transform to Xt_init - Xt = _apply_similarity_transform(Xt_init, R, T, s) - - # add the current transformation to the history - t_history.append(SimilarityTransform(R, T, s)) - - # compute the root mean squared error - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - Xt_sq_diff = ((Xt - Xt_nn_points) ** 2).sum(2) - rmse = oputil.wmean(Xt_sq_diff[:, :, None], mask_X).sqrt()[:, 0, 0] - - # compute the relative rmse - if prev_rmse is None: - relative_rmse = rmse.new_ones(b) - else: - relative_rmse = (prev_rmse - rmse) / prev_rmse - - if verbose: - rmse_msg = ( - f"ICP iteration {iteration}: mean/max rmse = " - + f"{rmse.mean():1.2e}/{rmse.max():1.2e} " - + f"; mean relative rmse = {relative_rmse.mean():1.2e}" - ) - print(rmse_msg) - - # check for convergence - if (relative_rmse <= relative_rmse_thr).all(): - converged = True - break - - # update the previous rmse - prev_rmse = rmse - - if verbose: - if converged: - print(f"ICP has converged in {iteration + 1} iterations.") - else: - print(f"ICP has not converged in {max_iterations} iterations.") - - if oputil.is_pointclouds(X): - Xt = X.update_padded(Xt) # type: ignore - - return ICPSolution(converged, rmse, Xt, SimilarityTransform(R, T, s), t_history) - - -# threshold for checking that point crosscorelation -# is full rank in corresponding_points_alignment -AMBIGUOUS_ROT_SINGULAR_THR = 1e-15 - - -def corresponding_points_alignment( - X: Union[torch.Tensor, "Pointclouds"], - Y: Union[torch.Tensor, "Pointclouds"], - weights: Union[torch.Tensor, List[torch.Tensor], None] = None, - estimate_scale: bool = False, - allow_reflection: bool = False, - eps: float = 1e-9, -) -> SimilarityTransform: - """ - Finds a similarity transformation (rotation `R`, translation `T` - and optionally scale `s`) between two given sets of corresponding - `d`-dimensional points `X` and `Y` such that: - - `s[i] X[i] R[i] + T[i] = Y[i]`, - - for all batch indexes `i` in the least squares sense. - - The algorithm is also known as Umeyama [1]. - - Args: - **X**: Batch of `d`-dimensional points of shape `(minibatch, num_point, d)` - or a `Pointclouds` object. - **Y**: Batch of `d`-dimensional points of shape `(minibatch, num_point, d)` - or a `Pointclouds` object. - **weights**: Batch of non-negative weights of - shape `(minibatch, num_point)` or list of `minibatch` 1-dimensional - tensors that may have different shapes; in that case, the length of - i-th tensor should be equal to the number of points in X_i and Y_i. - Passing `None` means uniform weights. - **estimate_scale**: If `True`, also estimates a scaling component `s` - of the transformation. Otherwise assumes an identity - scale and returns a tensor of ones. - **allow_reflection**: If `True`, allows the algorithm to return `R` - which is orthonormal but has determinant==-1. - **eps**: A scalar for clamping to avoid dividing by zero. Active for the - code that estimates the output scale `s`. - - Returns: - 3-element named tuple `SimilarityTransform` containing - - **R**: Batch of orthonormal matrices of shape `(minibatch, d, d)`. - - **T**: Batch of translations of shape `(minibatch, d)`. - - **s**: batch of scaling factors of shape `(minibatch, )`. - - References: - [1] Shinji Umeyama: Least-Suqares Estimation of - Transformation Parameters Between Two Point Patterns - """ - - # make sure we convert input Pointclouds structures to tensors - Xt, num_points = oputil.convert_pointclouds_to_tensor(X) - Yt, num_points_Y = oputil.convert_pointclouds_to_tensor(Y) - - if (Xt.shape != Yt.shape) or (num_points != num_points_Y).any(): - raise ValueError( - "Point sets X and Y have to have the same \ - number of batches, points and dimensions." - ) - if weights is not None: - if isinstance(weights, list): - if any(np != w.shape[0] for np, w in zip(num_points, weights)): - raise ValueError( - "number of weights should equal to the " - + "number of points in the point cloud." - ) - weights = [w[..., None] for w in weights] - weights = strutil.list_to_padded(weights)[..., 0] - - if Xt.shape[:2] != weights.shape: - raise ValueError("weights should have the same first two dimensions as X.") - - b, n, dim = Xt.shape - - if (num_points < Xt.shape[1]).any() or (num_points < Yt.shape[1]).any(): - # in case we got Pointclouds as input, mask the unused entries in Xc, Yc - mask = ( - torch.arange(n, dtype=torch.int64, device=Xt.device)[None] - < num_points[:, None] - ).type_as(Xt) - weights = mask if weights is None else mask * weights.type_as(Xt) - - # compute the centroids of the point sets - Xmu = oputil.wmean(Xt, weight=weights, eps=eps) - Ymu = oputil.wmean(Yt, weight=weights, eps=eps) - - # mean-center the point sets - Xc = Xt - Xmu - Yc = Yt - Ymu - - total_weight = torch.clamp(num_points, 1) - # special handling for heterogeneous point clouds and/or input weights - if weights is not None: - Xc *= weights[:, :, None] - Yc *= weights[:, :, None] - total_weight = torch.clamp(weights.sum(1), eps) - - if (num_points < (dim + 1)).any(): - warnings.warn( - "The size of one of the point clouds is <= dim+1. " - + "corresponding_points_alignment cannot return a unique rotation." - ) - - # compute the covariance XYcov between the point sets Xc, Yc - XYcov = torch.bmm(Xc.transpose(2, 1), Yc) - XYcov = XYcov / total_weight[:, None, None] - - # decompose the covariance matrix XYcov - U, S, V = torch.svd(XYcov) - - # catch ambiguous rotation by checking the magnitude of singular values - if (S.abs() <= AMBIGUOUS_ROT_SINGULAR_THR).any() and not ( - num_points < (dim + 1) - ).any(): - warnings.warn( - "Excessively low rank of " - + "cross-correlation between aligned point clouds. " - + "corresponding_points_alignment cannot return a unique rotation." - ) - - # identity matrix used for fixing reflections - E = torch.eye(dim, dtype=XYcov.dtype, device=XYcov.device)[None].repeat(b, 1, 1) - - if not allow_reflection: - # reflection test: - # checks whether the estimated rotation has det==1, - # if not, finds the nearest rotation s.t. det==1 by - # flipping the sign of the last singular vector U - R_test = torch.bmm(U, V.transpose(2, 1)) - E[:, -1, -1] = torch.det(R_test) - - # find the rotation matrix by composing U and V again - R = torch.bmm(torch.bmm(U, E), V.transpose(2, 1)) - - if estimate_scale: - # estimate the scaling component of the transformation - trace_ES = (torch.diagonal(E, dim1=1, dim2=2) * S).sum(1) - Xcov = (Xc * Xc).sum((1, 2)) / total_weight - - # the scaling component - s = trace_ES / torch.clamp(Xcov, eps) - - # translation component - T = Ymu[:, 0, :] - s[:, None] * torch.bmm(Xmu, R)[:, 0, :] - else: - # translation component - T = Ymu[:, 0, :] - torch.bmm(Xmu, R)[:, 0, :] - - # unit scaling since we do not estimate scale - s = T.new_ones(b) - - return SimilarityTransform(R, T, s) - - -def _apply_similarity_transform( - X: torch.Tensor, R: torch.Tensor, T: torch.Tensor, s: torch.Tensor -) -> torch.Tensor: - """ - Applies a similarity transformation parametrized with a batch of orthonormal - matrices `R` of shape `(minibatch, d, d)`, a batch of translations `T` - of shape `(minibatch, d)` and a batch of scaling factors `s` - of shape `(minibatch,)` to a given `d`-dimensional cloud `X` - of shape `(minibatch, num_points, d)` - """ - X = s[:, None, None] * torch.bmm(X, R) + T[:, None, :] - return X diff --git a/pytorch3d/pytorch3d/ops/points_normals.py b/pytorch3d/pytorch3d/ops/points_normals.py deleted file mode 100644 index 63aeefbd82814f4233b35ccab1c1e1c7cc828d66..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/points_normals.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Tuple, TYPE_CHECKING, Union - -import torch -from pytorch3d.common.workaround import symeig3x3 - -from .utils import convert_pointclouds_to_tensor, get_point_covariances - - -if TYPE_CHECKING: - from ..structures import Pointclouds - - -def estimate_pointcloud_normals( - pointclouds: Union[torch.Tensor, "Pointclouds"], - neighborhood_size: int = 50, - disambiguate_directions: bool = True, - *, - use_symeig_workaround: bool = True, -) -> torch.Tensor: - """ - Estimates the normals of a batch of `pointclouds`. - - The function uses `estimate_pointcloud_local_coord_frames` to estimate - the normals. Please refer to that function for more detailed information. - - Args: - **pointclouds**: Batch of 3-dimensional points of shape - `(minibatch, num_point, 3)` or a `Pointclouds` object. - **neighborhood_size**: The size of the neighborhood used to estimate the - geometry around each point. - **disambiguate_directions**: If `True`, uses the algorithm from [1] to - ensure sign consistency of the normals of neighboring points. - **use_symeig_workaround**: If `True`, uses a custom eigenvalue - calculation. - - Returns: - **normals**: A tensor of normals for each input point - of shape `(minibatch, num_point, 3)`. - If `pointclouds` are of `Pointclouds` class, returns a padded tensor. - - References: - [1] Tombari, Salti, Di Stefano: Unique Signatures of Histograms for - Local Surface Description, ECCV 2010. - """ - - curvatures, local_coord_frames = estimate_pointcloud_local_coord_frames( - pointclouds, - neighborhood_size=neighborhood_size, - disambiguate_directions=disambiguate_directions, - use_symeig_workaround=use_symeig_workaround, - ) - - # the normals correspond to the first vector of each local coord frame - normals = local_coord_frames[:, :, :, 0] - - return normals - - -def estimate_pointcloud_local_coord_frames( - pointclouds: Union[torch.Tensor, "Pointclouds"], - neighborhood_size: int = 50, - disambiguate_directions: bool = True, - *, - use_symeig_workaround: bool = True, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Estimates the principal directions of curvature (which includes normals) - of a batch of `pointclouds`. - - The algorithm first finds `neighborhood_size` nearest neighbors for each - point of the point clouds, followed by obtaining principal vectors of - covariance matrices of each of the point neighborhoods. - The main principal vector corresponds to the normals, while the - other 2 are the direction of the highest curvature and the 2nd highest - curvature. - - Note that each principal direction is given up to a sign. Hence, - the function implements `disambiguate_directions` switch that allows - to ensure consistency of the sign of neighboring normals. The implementation - follows the sign disabiguation from SHOT descriptors [1]. - - The algorithm also returns the curvature values themselves. - These are the eigenvalues of the estimated covariance matrices - of each point neighborhood. - - Args: - **pointclouds**: Batch of 3-dimensional points of shape - `(minibatch, num_point, 3)` or a `Pointclouds` object. - **neighborhood_size**: The size of the neighborhood used to estimate the - geometry around each point. - **disambiguate_directions**: If `True`, uses the algorithm from [1] to - ensure sign consistency of the normals of neighboring points. - **use_symeig_workaround**: If `True`, uses a custom eigenvalue - calculation. - - Returns: - **curvatures**: The three principal curvatures of each point - of shape `(minibatch, num_point, 3)`. - If `pointclouds` are of `Pointclouds` class, returns a padded tensor. - **local_coord_frames**: The three principal directions of the curvature - around each point of shape `(minibatch, num_point, 3, 3)`. - The principal directions are stored in columns of the output. - E.g. `local_coord_frames[i, j, :, 0]` is the normal of - `j`-th point in the `i`-th pointcloud. - If `pointclouds` are of `Pointclouds` class, returns a padded tensor. - - References: - [1] Tombari, Salti, Di Stefano: Unique Signatures of Histograms for - Local Surface Description, ECCV 2010. - """ - - points_padded, num_points = convert_pointclouds_to_tensor(pointclouds) - - ba, N, dim = points_padded.shape - if dim != 3: - raise ValueError( - "The pointclouds argument has to be of shape (minibatch, N, 3)" - ) - - if (num_points <= neighborhood_size).any(): - raise ValueError( - "The neighborhood_size argument has to be" - + " >= size of each of the point clouds." - ) - - # undo global mean for stability - # TODO: replace with tutil.wmean once landed - pcl_mean = points_padded.sum(1) / num_points[:, None] - points_centered = points_padded - pcl_mean[:, None, :] - - # get the per-point covariance and nearest neighbors used to compute it - cov, knns = get_point_covariances(points_centered, num_points, neighborhood_size) - - # get the local coord frames as principal directions of - # the per-point covariance - # this is done with torch.symeig / torch.linalg.eigh, which returns the - # eigenvectors (=principal directions) in an ascending order of their - # corresponding eigenvalues, and the smallest eigenvalue's eigenvector - # corresponds to the normal direction; or with a custom equivalent. - if use_symeig_workaround: - curvatures, local_coord_frames = symeig3x3(cov, eigenvectors=True) - else: - curvatures, local_coord_frames = torch.linalg.eigh(cov) - - # disambiguate the directions of individual principal vectors - if disambiguate_directions: - # disambiguate normal - n = _disambiguate_vector_directions( - points_centered, knns, local_coord_frames[:, :, :, 0] - ) - # disambiguate the main curvature - z = _disambiguate_vector_directions( - points_centered, knns, local_coord_frames[:, :, :, 2] - ) - # the secondary curvature is just a cross between n and z - y = torch.cross(n, z, dim=2) - # cat to form the set of principal directions - local_coord_frames = torch.stack((n, y, z), dim=3) - - return curvatures, local_coord_frames - - -def _disambiguate_vector_directions(pcl, knns, vecs: torch.Tensor) -> torch.Tensor: - """ - Disambiguates normal directions according to [1]. - - References: - [1] Tombari, Salti, Di Stefano: Unique Signatures of Histograms for - Local Surface Description, ECCV 2010. - """ - # parse out K from the shape of knns - K = knns.shape[2] - # the difference between the mean of each neighborhood and - # each element of the neighborhood - df = knns - pcl[:, :, None] - # projection of the difference on the principal direction - proj = (vecs[:, :, None] * df).sum(3) - # check how many projections are positive - n_pos = (proj > 0).type_as(knns).sum(2, keepdim=True) - # flip the principal directions where number of positive correlations - flip = (n_pos < (0.5 * K)).type_as(knns) - vecs = (1.0 - 2.0 * flip) * vecs - return vecs diff --git a/pytorch3d/pytorch3d/ops/points_to_volumes.py b/pytorch3d/pytorch3d/ops/points_to_volumes.py deleted file mode 100644 index f319d90aeaf2ff029df575451005ccf76d44b505..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/points_to_volumes.py +++ /dev/null @@ -1,762 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Optional, Tuple, TYPE_CHECKING - -import torch -from pytorch3d import _C -from torch.autograd import Function -from torch.autograd.function import once_differentiable - - -if TYPE_CHECKING: - from ..structures import Pointclouds, Volumes - - -class _points_to_volumes_function(Function): - """ - For each point in a pointcloud, add point_weight to the - corresponding volume density and point_weight times its features - to the corresponding volume features. - - This function does not require any contiguity internally and therefore - doesn't need to make copies of its inputs, which is useful when GPU memory - is at a premium. (An implementation requiring contiguous inputs might be faster - though). The volumes are modified in place. - - This function is differentiable with respect to - points_features, volume_densities and volume_features. - If splat is True then it is also differentiable with respect to - points_3d. - - It may be useful to think about this function as a sort of opposite to - torch.nn.functional.grid_sample with 5D inputs. - - Args: - points_3d: Batch of 3D point cloud coordinates of shape - `(minibatch, N, 3)` where N is the number of points - in each point cloud. Coordinates have to be specified in the - local volume coordinates (ranging in [-1, 1]). - points_features: Features of shape `(minibatch, N, feature_dim)` - corresponding to the points of the input point cloud `points_3d`. - volume_features: Batch of input feature volumes - of shape `(minibatch, feature_dim, D, H, W)` - volume_densities: Batch of input feature volume densities - of shape `(minibatch, 1, D, H, W)`. Each voxel should - contain a non-negative number corresponding to its - opaqueness (the higher, the less transparent). - - grid_sizes: `LongTensor` of shape (minibatch, 3) representing the - spatial resolutions of each of the the non-flattened `volumes` - tensors. Note that the following has to hold: - `torch.prod(grid_sizes, dim=1)==N_voxels`. - - point_weight: A scalar controlling how much weight a single point has. - - mask: A binary mask of shape `(minibatch, N)` determining - which 3D points are going to be converted to the resulting - volume. Set to `None` if all points are valid. - - align_corners: as for grid_sample. - - splat: if true, trilinear interpolation. If false all the weight goes in - the nearest voxel. - - Returns: - volume_densities and volume_features, which have been modified in place. - """ - - @staticmethod - # pyre-fixme[14]: `forward` overrides method defined in `Function` inconsistently. - def forward( - ctx, - points_3d: torch.Tensor, - points_features: torch.Tensor, - volume_densities: torch.Tensor, - volume_features: torch.Tensor, - grid_sizes: torch.LongTensor, - point_weight: float, - mask: torch.Tensor, - align_corners: bool, - splat: bool, - ): - - ctx.mark_dirty(volume_densities, volume_features) - - N, P, D = points_3d.shape - if D != 3: - raise ValueError("points_3d must be 3D") - if points_3d.dtype != torch.float32: - raise ValueError("points_3d must be float32") - if points_features.dtype != torch.float32: - raise ValueError("points_features must be float32") - N1, P1, C = points_features.shape - if N1 != N or P1 != P: - raise ValueError("Bad points_features shape") - if volume_densities.dtype != torch.float32: - raise ValueError("volume_densities must be float32") - N2, one, D, H, W = volume_densities.shape - if N2 != N or one != 1: - raise ValueError("Bad volume_densities shape") - if volume_features.dtype != torch.float32: - raise ValueError("volume_features must be float32") - N3, C1, D1, H1, W1 = volume_features.shape - if N3 != N or C1 != C or D1 != D or H1 != H or W1 != W: - raise ValueError("Bad volume_features shape") - if grid_sizes.dtype != torch.int64: - raise ValueError("grid_sizes must be int64") - N4, D1 = grid_sizes.shape - if N4 != N or D1 != 3: - raise ValueError("Bad grid_sizes.shape") - if mask.dtype != torch.float32: - raise ValueError("mask must be float32") - N5, P2 = mask.shape - if N5 != N or P2 != P: - raise ValueError("Bad mask shape") - - # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`. - _C.points_to_volumes_forward( - points_3d, - points_features, - volume_densities, - volume_features, - grid_sizes, - mask, - point_weight, - align_corners, - splat, - ) - if splat: - ctx.save_for_backward(points_3d, points_features, grid_sizes, mask) - else: - ctx.save_for_backward(points_3d, grid_sizes, mask) - ctx.point_weight = point_weight - ctx.splat = splat - ctx.align_corners = align_corners - return volume_densities, volume_features - - @staticmethod - @once_differentiable - def backward(ctx, grad_volume_densities, grad_volume_features): - splat = ctx.splat - N, C = grad_volume_features.shape[:2] - if splat: - points_3d, points_features, grid_sizes, mask = ctx.saved_tensors - P = points_3d.shape[1] - grad_points_3d = torch.zeros_like(points_3d) - else: - points_3d, grid_sizes, mask = ctx.saved_tensors - P = points_3d.shape[1] - ones = points_3d.new_zeros(1, 1, 1) - # There is no gradient. Just need something to let its accessors exist. - grad_points_3d = ones.expand_as(points_3d) - # points_features not needed. Just need something to let its accessors exist. - points_features = ones.expand(N, P, C) - grad_points_features = points_3d.new_zeros(N, P, C) - _C.points_to_volumes_backward( - points_3d, - points_features, - grid_sizes, - mask, - ctx.point_weight, - ctx.align_corners, - splat, - grad_volume_densities, - grad_volume_features, - grad_points_3d, - grad_points_features, - ) - - return ( - (grad_points_3d if splat else None), - grad_points_features, - grad_volume_densities, - grad_volume_features, - None, - None, - None, - None, - None, - ) - - -_points_to_volumes = _points_to_volumes_function.apply - - -def add_pointclouds_to_volumes( - pointclouds: "Pointclouds", - initial_volumes: "Volumes", - mode: str = "trilinear", - min_weight: float = 1e-4, - rescale_features: bool = True, - _python: bool = False, -) -> "Volumes": - """ - Add a batch of point clouds represented with a `Pointclouds` structure - `pointclouds` to a batch of existing volumes represented with a - `Volumes` structure `initial_volumes`. - - More specifically, the method casts a set of weighted votes (the weights are - determined based on `mode="trilinear"|"nearest"`) into the pre-initialized - `features` and `densities` fields of `initial_volumes`. - - The method returns an updated `Volumes` object that contains a copy - of `initial_volumes` with its `features` and `densities` updated with the - result of the pointcloud addition. - - Example:: - - # init a random point cloud - pointclouds = Pointclouds( - points=torch.randn(4, 100, 3), features=torch.rand(4, 100, 5) - ) - # init an empty volume centered around [0.5, 0.5, 0.5] in world coordinates - # with a voxel size of 1.0. - initial_volumes = Volumes( - features = torch.zeros(4, 5, 25, 25, 25), - densities = torch.zeros(4, 1, 25, 25, 25), - volume_translation = [-0.5, -0.5, -0.5], - voxel_size = 1.0, - ) - # add the pointcloud to the 'initial_volumes' buffer using - # trilinear splatting - updated_volumes = add_pointclouds_to_volumes( - pointclouds=pointclouds, - initial_volumes=initial_volumes, - mode="trilinear", - ) - - Args: - pointclouds: Batch of 3D pointclouds represented with a `Pointclouds` - structure. Note that `pointclouds.features` have to be defined. - initial_volumes: Batch of initial `Volumes` with pre-initialized 1-dimensional - densities which contain non-negative numbers corresponding to the - opaqueness of each voxel (the higher, the less transparent). - mode: The mode of the conversion of individual points into the volume. - Set either to `nearest` or `trilinear`: - `nearest`: Each 3D point is first rounded to the volumetric - lattice. Each voxel is then labeled with the average - over features that fall into the given voxel. - The gradients of nearest neighbor conversion w.r.t. the - 3D locations of the points in `pointclouds` are *not* defined. - `trilinear`: Each 3D point casts 8 weighted votes to the 8-neighborhood - of its floating point coordinate. The weights are - determined using a trilinear interpolation scheme. - Trilinear splatting is fully differentiable w.r.t. all input arguments. - min_weight: A scalar controlling the lowest possible total per-voxel - weight used to normalize the features accumulated in a voxel. - Only active for `mode==trilinear`. - rescale_features: If False, output features are just the sum of input and - added points. If True, they are averaged. In both cases, - output densities are just summed without rescaling, so - you may need to rescale them afterwards. - _python: Set to True to use a pure Python implementation, e.g. for test - purposes, which requires more memory and may be slower. - - Returns: - updated_volumes: Output `Volumes` structure containing the conversion result. - """ - - if len(initial_volumes) != len(pointclouds): - raise ValueError( - "'initial_volumes' and 'pointclouds' have to have the same batch size." - ) - - # obtain the features and densities - pcl_feats = pointclouds.features_padded() - pcl_3d = pointclouds.points_padded() - - if pcl_feats is None: - raise ValueError("'pointclouds' have to have their 'features' defined.") - - # obtain the conversion mask - n_per_pcl = pointclouds.num_points_per_cloud().type_as(pcl_feats) - # pyre-fixme[6]: For 1st param expected `Union[bool, float, int]` but got `Tensor`. - mask = torch.arange(n_per_pcl.max(), dtype=pcl_feats.dtype, device=pcl_feats.device) - mask = (mask[None, :] < n_per_pcl[:, None]).type_as(mask) - - # convert to the coord frame of the volume - pcl_3d_local = initial_volumes.world_to_local_coords(pcl_3d) - - features_new, densities_new = add_points_features_to_volume_densities_features( - points_3d=pcl_3d_local, - points_features=pcl_feats, - volume_features=initial_volumes.features(), - volume_densities=initial_volumes.densities(), - min_weight=min_weight, - grid_sizes=initial_volumes.get_grid_sizes(), - mask=mask, - mode=mode, - rescale_features=rescale_features, - align_corners=initial_volumes.get_align_corners(), - _python=_python, - ) - - return initial_volumes.update_padded( - new_densities=densities_new, new_features=features_new - ) - - -def add_points_features_to_volume_densities_features( - points_3d: torch.Tensor, - points_features: torch.Tensor, - volume_densities: torch.Tensor, - volume_features: Optional[torch.Tensor], - mode: str = "trilinear", - min_weight: float = 1e-4, - mask: Optional[torch.Tensor] = None, - grid_sizes: Optional[torch.LongTensor] = None, - rescale_features: bool = True, - _python: bool = False, - align_corners: bool = True, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Convert a batch of point clouds represented with tensors of per-point - 3d coordinates and their features to a batch of volumes represented - with tensors of densities and features. - - Args: - points_3d: Batch of 3D point cloud coordinates of shape - `(minibatch, N, 3)` where N is the number of points - in each point cloud. Coordinates have to be specified in the - local volume coordinates (ranging in [-1, 1]). - points_features: Features of shape `(minibatch, N, feature_dim)` corresponding - to the points of the input point clouds `pointcloud`. - volume_densities: Batch of input feature volume densities of shape - `(minibatch, 1, D, H, W)`. Each voxel should - contain a non-negative number corresponding to its - opaqueness (the higher, the less transparent). - volume_features: Batch of input feature volumes of shape - `(minibatch, feature_dim, D, H, W)` - If set to `None`, the `volume_features` will be automatically - instantiated with a correct size and filled with 0s. - mode: The mode of the conversion of individual points into the volume. - Set either to `nearest` or `trilinear`: - `nearest`: Each 3D point is first rounded to the volumetric - lattice. Each voxel is then labeled with the average - over features that fall into the given voxel. - The gradients of nearest neighbor rounding w.r.t. the - input point locations `points_3d` are *not* defined. - `trilinear`: Each 3D point casts 8 weighted votes to the 8-neighborhood - of its floating point coordinate. The weights are - determined using a trilinear interpolation scheme. - Trilinear splatting is fully differentiable w.r.t. all input arguments. - min_weight: A scalar controlling the lowest possible total per-voxel - weight used to normalize the features accumulated in a voxel. - Only active for `mode==trilinear`. - mask: A binary mask of shape `(minibatch, N)` determining which 3D points - are going to be converted to the resulting volume. - Set to `None` if all points are valid. - grid_sizes: `LongTensor` of shape (minibatch, 3) representing the - spatial resolutions of each of the the non-flattened `volumes` tensors, - or None to indicate the whole volume is used for every batch element. - rescale_features: If False, output features are just the sum of input and - added points. If True, they are averaged. In both cases, - output densities are just summed without rescaling, so - you may need to rescale them afterwards. - _python: Set to True to use a pure Python implementation. - align_corners: as for grid_sample. - Returns: - volume_features: Output volume of shape `(minibatch, feature_dim, D, H, W)` - volume_densities: Occupancy volume of shape `(minibatch, 1, D, H, W)` - containing the total amount of votes cast to each of the voxels. - """ - - # number of points in the point cloud, its dim and batch size - ba, n_points, feature_dim = points_features.shape - ba_volume, density_dim = volume_densities.shape[:2] - - if density_dim != 1: - raise ValueError("Only one-dimensional densities are allowed.") - - # init the volumetric grid sizes if uninitialized - if grid_sizes is None: - # grid sizes shape (minibatch, 3) - grid_sizes = ( - torch.LongTensor(list(volume_densities.shape[2:])) - .to(volume_densities.device) - .expand(volume_densities.shape[0], 3) - ) - - if _python: - return _add_points_features_to_volume_densities_features_python( - points_3d=points_3d, - points_features=points_features, - volume_densities=volume_densities, - volume_features=volume_features, - mode=mode, - min_weight=min_weight, - mask=mask, - # pyre-fixme[6]: For 8th param expected `LongTensor` but got `Tensor`. - grid_sizes=grid_sizes, - ) - - if mode == "trilinear": - splat = True - elif mode == "nearest": - splat = False - else: - raise ValueError('No such interpolation mode "%s"' % mode) - - if mask is None: - mask = points_3d.new_ones(1).expand(points_3d.shape[:2]) - - volume_densities, volume_features = _points_to_volumes( - points_3d, - points_features, - volume_densities, - volume_features, - grid_sizes, - 1.0, # point_weight - mask, - align_corners, # align_corners - splat, - ) - - if rescale_features: - # divide each feature by the total weight of the votes - if splat: - volume_features = volume_features / volume_densities.clamp(min_weight) - else: - volume_features = volume_features / volume_densities.clamp(1.0) - - return volume_features, volume_densities - - -def _add_points_features_to_volume_densities_features_python( - *, - points_3d: torch.Tensor, - points_features: torch.Tensor, - volume_densities: torch.Tensor, - volume_features: Optional[torch.Tensor], - mode: str, - min_weight: float, - mask: Optional[torch.Tensor], - grid_sizes: torch.LongTensor, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Python implementation for add_points_features_to_volume_densities_features. - - Returns: - volume_features: Output volume of shape `(minibatch, feature_dim, D, H, W)` - volume_densities: Occupancy volume of shape `(minibatch, 1, D, H, W)` - containing the total amount of votes cast to each of the voxels. - """ - ba, n_points, feature_dim = points_features.shape - - # flatten densities and features - v_shape = volume_densities.shape[2:] - volume_densities_flatten = volume_densities.view(ba, -1, 1) - n_voxels = volume_densities_flatten.shape[1] - - if volume_features is None: - # initialize features if not passed in - volume_features_flatten = volume_densities.new_zeros(ba, feature_dim, n_voxels) - else: - # otherwise just flatten - volume_features_flatten = volume_features.view(ba, feature_dim, n_voxels) - - if mode == "trilinear": # do the splatting (trilinear interp) - volume_features, volume_densities = _splat_points_to_volumes( - points_3d, - points_features, - volume_densities_flatten, - volume_features_flatten, - grid_sizes, - mask=mask, - min_weight=min_weight, - ) - elif mode == "nearest": # nearest neighbor interp - volume_features, volume_densities = _round_points_to_volumes( - points_3d, - points_features, - volume_densities_flatten, - volume_features_flatten, - grid_sizes, - mask=mask, - ) - else: - raise ValueError('No such interpolation mode "%s"' % mode) - - # reshape into the volume shape - volume_features = volume_features.view(ba, feature_dim, *v_shape) - volume_densities = volume_densities.view(ba, 1, *v_shape) - return volume_features, volume_densities - - -def _check_points_to_volumes_inputs( - points_3d: torch.Tensor, - points_features: torch.Tensor, - volume_densities: torch.Tensor, - volume_features: torch.Tensor, - grid_sizes: torch.LongTensor, - mask: Optional[torch.Tensor] = None, -) -> None: - - max_grid_size = grid_sizes.max(dim=0).values - if torch.prod(max_grid_size) > volume_densities.shape[1]: - raise ValueError( - "One of the grid sizes corresponds to a larger number" - + " of elements than the number of elements in volume_densities." - ) - - _, n_voxels, density_dim = volume_densities.shape - - if density_dim != 1: - raise ValueError("Only one-dimensional densities are allowed.") - - ba, n_points, feature_dim = points_features.shape - - if volume_features.shape[1] != feature_dim: - raise ValueError( - "volume_features have a different number of channels" - + " than points_features." - ) - - if volume_features.shape[2] != n_voxels: - raise ValueError( - "volume_features have a different number of elements" - + " than volume_densities." - ) - - -def _splat_points_to_volumes( - points_3d: torch.Tensor, - points_features: torch.Tensor, - volume_densities: torch.Tensor, - volume_features: torch.Tensor, - grid_sizes: torch.LongTensor, - min_weight: float = 1e-4, - mask: Optional[torch.Tensor] = None, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Convert a batch of point clouds to a batch of volumes using trilinear - splatting into a volume. - - Args: - points_3d: Batch of 3D point cloud coordinates of shape - `(minibatch, N, 3)` where N is the number of points - in each point cloud. Coordinates have to be specified in the - local volume coordinates (ranging in [-1, 1]). - points_features: Features of shape `(minibatch, N, feature_dim)` - corresponding to the points of the input point cloud `points_3d`. - volume_features: Batch of input *flattened* feature volumes - of shape `(minibatch, feature_dim, N_voxels)` - volume_densities: Batch of input *flattened* feature volume densities - of shape `(minibatch, N_voxels, 1)`. Each voxel should - contain a non-negative number corresponding to its - opaqueness (the higher, the less transparent). - grid_sizes: `LongTensor` of shape (minibatch, 3) representing the - spatial resolutions of each of the the non-flattened `volumes` tensors. - Note that the following has to hold: - `torch.prod(grid_sizes, dim=1)==N_voxels` - min_weight: A scalar controlling the lowest possible total per-voxel - weight used to normalize the features accumulated in a voxel. - mask: A binary mask of shape `(minibatch, N)` determining which 3D points - are going to be converted to the resulting volume. - Set to `None` if all points are valid. - Returns: - volume_features: Output volume of shape `(minibatch, D, N_voxels)`. - volume_densities: Occupancy volume of shape `(minibatch, 1, N_voxels)` - containing the total amount of votes cast to each of the voxels. - """ - - _check_points_to_volumes_inputs( - points_3d, - points_features, - volume_densities, - volume_features, - grid_sizes, - mask=mask, - ) - - _, n_voxels, density_dim = volume_densities.shape - ba, n_points, feature_dim = points_features.shape - - # minibatch x n_points x feature_dim -> minibatch x feature_dim x n_points - points_features = points_features.permute(0, 2, 1).contiguous() - - # XYZ = the upper-left volume index of the 8-neighborhood of every point - # grid_sizes is of the form (minibatch, depth-height-width) - grid_sizes_xyz = grid_sizes[:, [2, 1, 0]] - - # Convert from points_3d in the range [-1, 1] to - # indices in the volume grid in the range [0, grid_sizes_xyz-1] - points_3d_indices = ((points_3d + 1) * 0.5) * ( - grid_sizes_xyz[:, None].type_as(points_3d) - 1 - ) - XYZ = points_3d_indices.floor().long() - rXYZ = points_3d_indices - XYZ.type_as(points_3d) # remainder of floor - - # split into separate coordinate vectors - X, Y, Z = XYZ.split(1, dim=2) - # rX = remainder after floor = 1-"the weight of each vote into - # the X coordinate of the 8-neighborhood" - rX, rY, rZ = rXYZ.split(1, dim=2) - - # get random indices for the purpose of adding out-of-bounds values - rand_idx = X.new_zeros(X.shape).random_(0, n_voxels) - - # iterate over the x, y, z indices of the 8-neighborhood (xdiff, ydiff, zdiff) - for xdiff in (0, 1): - X_ = X + xdiff - wX = (1 - xdiff) + (2 * xdiff - 1) * rX - for ydiff in (0, 1): - Y_ = Y + ydiff - wY = (1 - ydiff) + (2 * ydiff - 1) * rY - for zdiff in (0, 1): - Z_ = Z + zdiff - wZ = (1 - zdiff) + (2 * zdiff - 1) * rZ - - # weight of each vote into the given cell of 8-neighborhood - w = wX * wY * wZ - - # valid - binary indicators of votes that fall into the volume - valid = ( - (0 <= X_) - * (X_ < grid_sizes_xyz[:, None, 0:1]) - * (0 <= Y_) - * (Y_ < grid_sizes_xyz[:, None, 1:2]) - * (0 <= Z_) - * (Z_ < grid_sizes_xyz[:, None, 2:3]) - ).long() - - # linearized indices into the volume - idx = (Z_ * grid_sizes[:, None, 1:2] + Y_) * grid_sizes[ - :, None, 2:3 - ] + X_ - - # out-of-bounds features added to a random voxel idx with weight=0. - idx_valid = idx * valid + rand_idx * (1 - valid) - w_valid = w * valid.type_as(w) - if mask is not None: - w_valid = w_valid * mask.type_as(w)[:, :, None] - - # scatter add casts the votes into the weight accumulator - # and the feature accumulator - volume_densities.scatter_add_(1, idx_valid, w_valid) - - # reshape idx_valid -> (minibatch, feature_dim, n_points) - idx_valid = idx_valid.view(ba, 1, n_points).expand_as(points_features) - w_valid = w_valid.view(ba, 1, n_points) - - # volume_features of shape (minibatch, feature_dim, n_voxels) - volume_features.scatter_add_(2, idx_valid, w_valid * points_features) - - # divide each feature by the total weight of the votes - volume_features = volume_features / volume_densities.view(ba, 1, n_voxels).clamp( - min_weight - ) - - return volume_features, volume_densities - - -def _round_points_to_volumes( - points_3d: torch.Tensor, - points_features: torch.Tensor, - volume_densities: torch.Tensor, - volume_features: torch.Tensor, - grid_sizes: torch.LongTensor, - mask: Optional[torch.Tensor] = None, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Convert a batch of point clouds to a batch of volumes using rounding to the - nearest integer coordinate of the volume. Features that fall into the same - voxel are averaged. - - Args: - points_3d: Batch of 3D point cloud coordinates of shape - `(minibatch, N, 3)` where N is the number of points - in each point cloud. Coordinates have to be specified in the - local volume coordinates (ranging in [-1, 1]). - points_features: Features of shape `(minibatch, N, feature_dim)` - corresponding to the points of the input point cloud `points_3d`. - volume_features: Batch of input *flattened* feature volumes - of shape `(minibatch, feature_dim, N_voxels)` - volume_densities: Batch of input *flattened* feature volume densities - of shape `(minibatch, 1, N_voxels)`. Each voxel should - contain a non-negative number corresponding to its - opaqueness (the higher, the less transparent). - grid_sizes: `LongTensor` of shape (minibatch, 3) representing the - spatial resolutions of each of the the non-flattened `volumes` tensors. - Note that the following has to hold: - `torch.prod(grid_sizes, dim=1)==N_voxels` - mask: A binary mask of shape `(minibatch, N)` determining which 3D points - are going to be converted to the resulting volume. - Set to `None` if all points are valid. - Returns: - volume_features: Output volume of shape `(minibatch, D, N_voxels)`. - volume_densities: Occupancy volume of shape `(minibatch, 1, N_voxels)` - containing the total amount of votes cast to each of the voxels. - """ - - _check_points_to_volumes_inputs( - points_3d, - points_features, - volume_densities, - volume_features, - grid_sizes, - mask=mask, - ) - - _, n_voxels, density_dim = volume_densities.shape - ba, n_points, feature_dim = points_features.shape - - # minibatch x n_points x feature_dim-> minibatch x feature_dim x n_points - points_features = points_features.permute(0, 2, 1).contiguous() - - # round the coordinates to nearest integer - # grid_sizes is of the form (minibatch, depth-height-width) - grid_sizes_xyz = grid_sizes[:, [2, 1, 0]] - XYZ = ((points_3d.detach() + 1) * 0.5) * ( - grid_sizes_xyz[:, None].type_as(points_3d) - 1 - ) - XYZ = torch.round(XYZ).long() - - # split into separate coordinate vectors - X, Y, Z = XYZ.split(1, dim=2) - - # valid - binary indicators of votes that fall into the volume - # pyre-fixme[9]: grid_sizes has type `LongTensor`; used as `Tensor`. - grid_sizes = grid_sizes.type_as(XYZ) - valid = ( - (0 <= X) - * (X < grid_sizes_xyz[:, None, 0:1]) - * (0 <= Y) - * (Y < grid_sizes_xyz[:, None, 1:2]) - * (0 <= Z) - * (Z < grid_sizes_xyz[:, None, 2:3]) - ).long() - if mask is not None: - valid = valid * mask[:, :, None].long() - - # get random indices for the purpose of adding out-of-bounds values - rand_idx = valid.new_zeros(X.shape).random_(0, n_voxels) - - # linearized indices into the volume - idx = (Z * grid_sizes[:, None, 1:2] + Y) * grid_sizes[:, None, 2:3] + X - - # out-of-bounds features added to a random voxel idx with weight=0. - idx_valid = idx * valid + rand_idx * (1 - valid) - w_valid = valid.type_as(volume_features) - - # scatter add casts the votes into the weight accumulator - # and the feature accumulator - volume_densities.scatter_add_(1, idx_valid, w_valid) - - # reshape idx_valid -> (minibatch, feature_dim, n_points) - idx_valid = idx_valid.view(ba, 1, n_points).expand_as(points_features) - w_valid = w_valid.view(ba, 1, n_points) - - # volume_features of shape (minibatch, feature_dim, n_voxels) - volume_features.scatter_add_(2, idx_valid, w_valid * points_features) - - # divide each feature by the total weight of the votes - volume_features = volume_features / volume_densities.view(ba, 1, n_voxels).clamp( - 1.0 - ) - - return volume_features, volume_densities diff --git a/pytorch3d/pytorch3d/ops/sample_farthest_points.py b/pytorch3d/pytorch3d/ops/sample_farthest_points.py deleted file mode 100644 index a2779e42dded1a4717943fb4dadb17166ea4baa1..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/sample_farthest_points.py +++ /dev/null @@ -1,195 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from random import randint -from typing import List, Optional, Tuple, Union - -import torch -from pytorch3d import _C - -from .utils import masked_gather - - -def sample_farthest_points( - points: torch.Tensor, - lengths: Optional[torch.Tensor] = None, - K: Union[int, List, torch.Tensor] = 50, - random_start_point: bool = False, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Iterative farthest point sampling algorithm [1] to subsample a set of - K points from a given pointcloud. At each iteration, a point is selected - which has the largest nearest neighbor distance to any of the - already selected points. - - Farthest point sampling provides more uniform coverage of the input - point cloud compared to uniform random sampling. - - [1] Charles R. Qi et al, "PointNet++: Deep Hierarchical Feature Learning - on Point Sets in a Metric Space", NeurIPS 2017. - - Args: - points: (N, P, D) array containing the batch of pointclouds - lengths: (N,) number of points in each pointcloud (to support heterogeneous - batches of pointclouds) - K: samples required in each sampled point cloud (this is typically << P). If - K is an int then the same number of samples are selected for each - pointcloud in the batch. If K is a tensor is should be length (N,) - giving the number of samples to select for each element in the batch - random_start_point: bool, if True, a random point is selected as the starting - point for iterative sampling. - - Returns: - selected_points: (N, K, D), array of selected values from points. If the input - K is a tensor, then the shape will be (N, max(K), D), and padded with - 0.0 for batch elements where k_i < max(K). - selected_indices: (N, K) array of selected indices. If the input - K is a tensor, then the shape will be (N, max(K), D), and padded with - -1 for batch elements where k_i < max(K). - """ - N, P, D = points.shape - device = points.device - - # Validate inputs - if lengths is None: - lengths = torch.full((N,), P, dtype=torch.int64, device=device) - else: - if lengths.shape != (N,): - raise ValueError("points and lengths must have same batch dimension.") - if lengths.max() > P: - raise ValueError("A value in lengths was too large.") - - # TODO: support providing K as a ratio of the total number of points instead of as an int - if isinstance(K, int): - K = torch.full((N,), K, dtype=torch.int64, device=device) - elif isinstance(K, list): - K = torch.tensor(K, dtype=torch.int64, device=device) - - if K.shape[0] != N: - raise ValueError("K and points must have the same batch dimension") - - # Check dtypes are correct and convert if necessary - if not (points.dtype == torch.float32): - points = points.to(torch.float32) - if not (lengths.dtype == torch.int64): - lengths = lengths.to(torch.int64) - if not (K.dtype == torch.int64): - K = K.to(torch.int64) - - # Generate the starting indices for sampling - start_idxs = torch.zeros_like(lengths) - if random_start_point: - for n in range(N): - # pyre-fixme[6]: For 1st param expected `int` but got `Tensor`. - start_idxs[n] = torch.randint(high=lengths[n], size=(1,)).item() - - with torch.no_grad(): - # pyre-fixme[16]: `pytorch3d_._C` has no attribute `sample_farthest_points`. - idx = _C.sample_farthest_points(points, lengths, K, start_idxs) - sampled_points = masked_gather(points, idx) - - return sampled_points, idx - - -def sample_farthest_points_naive( - points: torch.Tensor, - lengths: Optional[torch.Tensor] = None, - K: Union[int, List, torch.Tensor] = 50, - random_start_point: bool = False, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Same Args/Returns as sample_farthest_points - """ - N, P, D = points.shape - device = points.device - - # Validate inputs - if lengths is None: - lengths = torch.full((N,), P, dtype=torch.int64, device=device) - else: - if lengths.shape != (N,): - raise ValueError("points and lengths must have same batch dimension.") - if lengths.max() > P: - raise ValueError("Invalid lengths.") - - # TODO: support providing K as a ratio of the total number of points instead of as an int - if isinstance(K, int): - K = torch.full((N,), K, dtype=torch.int64, device=device) - elif isinstance(K, list): - K = torch.tensor(K, dtype=torch.int64, device=device) - - if K.shape[0] != N: - raise ValueError("K and points must have the same batch dimension") - - # Find max value of K - max_K = torch.max(K) - - # List of selected indices from each batch element - all_sampled_indices = [] - - for n in range(N): - # Initialize an array for the sampled indices, shape: (max_K,) - sample_idx_batch = torch.full( - # pyre-fixme[6]: For 1st param expected `Union[List[int], Size, - # typing.Tuple[int, ...]]` but got `Tuple[Tensor]`. - (max_K,), - fill_value=-1, - dtype=torch.int64, - device=device, - ) - - # Initialize closest distances to inf, shape: (P,) - # This will be updated at each iteration to track the closest distance of the - # remaining points to any of the selected points - closest_dists = points.new_full( - # pyre-fixme[6]: For 1st param expected `Union[List[int], Size, - # typing.Tuple[int, ...]]` but got `Tuple[Tensor]`. - (lengths[n],), - float("inf"), - dtype=torch.float32, - ) - - # Select a random point index and save it as the starting point - # pyre-fixme[6]: For 2nd argument expected `int` but got `Tensor`. - selected_idx = randint(0, lengths[n] - 1) if random_start_point else 0 - sample_idx_batch[0] = selected_idx - - # If the pointcloud has fewer than K points then only iterate over the min - # pyre-fixme[6]: For 1st param expected `SupportsRichComparisonT` but got - # `Tensor`. - # pyre-fixme[6]: For 2nd param expected `SupportsRichComparisonT` but got - # `Tensor`. - k_n = min(lengths[n], K[n]) - - # Iteratively select points for a maximum of k_n - for i in range(1, k_n): - # Find the distance between the last selected point - # and all the other points. If a point has already been selected - # it's distance will be 0.0 so it will not be selected again as the max. - dist = points[n, selected_idx, :] - points[n, : lengths[n], :] - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and - # `int`. - dist_to_last_selected = (dist**2).sum(-1) # (P - i) - - # If closer than currently saved distance to one of the selected - # points, then updated closest_dists - closest_dists = torch.min(dist_to_last_selected, closest_dists) # (P - i) - - # The aim is to pick the point that has the largest - # nearest neighbour distance to any of the already selected points - selected_idx = torch.argmax(closest_dists) - sample_idx_batch[i] = selected_idx - - # Add the list of points for this batch to the final list - all_sampled_indices.append(sample_idx_batch) - - all_sampled_indices = torch.stack(all_sampled_indices, dim=0) - - # Gather the points - all_sampled_points = masked_gather(points, all_sampled_indices) - - # Return (N, max_K, D) subsampled points and indices - return all_sampled_points, all_sampled_indices diff --git a/pytorch3d/pytorch3d/ops/sample_points_from_meshes.py b/pytorch3d/pytorch3d/ops/sample_points_from_meshes.py deleted file mode 100644 index 2e2d34890ed552d4973250bab5ad7636d9089157..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/sample_points_from_meshes.py +++ /dev/null @@ -1,175 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -""" -This module implements utility functions for sampling points from -batches of meshes. -""" -import sys -from typing import Tuple, Union - -import torch -from pytorch3d.ops.mesh_face_areas_normals import mesh_face_areas_normals -from pytorch3d.ops.packed_to_padded import packed_to_padded -from pytorch3d.renderer.mesh.rasterizer import Fragments as MeshFragments - - -def sample_points_from_meshes( - meshes, - num_samples: int = 10000, - return_normals: bool = False, - return_textures: bool = False, -) -> Union[ - torch.Tensor, - Tuple[torch.Tensor, torch.Tensor], - Tuple[torch.Tensor, torch.Tensor, torch.Tensor], -]: - """ - Convert a batch of meshes to a batch of pointclouds by uniformly sampling - points on the surface of the mesh with probability proportional to the - face area. - - Args: - meshes: A Meshes object with a batch of N meshes. - num_samples: Integer giving the number of point samples per mesh. - return_normals: If True, return normals for the sampled points. - return_textures: If True, return textures for the sampled points. - - Returns: - 3-element tuple containing - - - **samples**: FloatTensor of shape (N, num_samples, 3) giving the - coordinates of sampled points for each mesh in the batch. For empty - meshes the corresponding row in the samples array will be filled with 0. - - **normals**: FloatTensor of shape (N, num_samples, 3) giving a normal vector - to each sampled point. Only returned if return_normals is True. - For empty meshes the corresponding row in the normals array will - be filled with 0. - - **textures**: FloatTensor of shape (N, num_samples, C) giving a C-dimensional - texture vector to each sampled point. Only returned if return_textures is True. - For empty meshes the corresponding row in the textures array will - be filled with 0. - - Note that in a future releases, we will replace the 3-element tuple output - with a `Pointclouds` datastructure, as follows - - .. code-block:: python - - Pointclouds(samples, normals=normals, features=textures) - """ - if meshes.isempty(): - raise ValueError("Meshes are empty.") - - verts = meshes.verts_packed() - if not torch.isfinite(verts).all(): - raise ValueError("Meshes contain nan or inf.") - - if return_textures and meshes.textures is None: - raise ValueError("Meshes do not contain textures.") - - faces = meshes.faces_packed() - mesh_to_face = meshes.mesh_to_faces_packed_first_idx() - num_meshes = len(meshes) - num_valid_meshes = torch.sum(meshes.valid) # Non empty meshes. - - # Initialize samples tensor with fill value 0 for empty meshes. - samples = torch.zeros((num_meshes, num_samples, 3), device=meshes.device) - - # Only compute samples for non empty meshes - with torch.no_grad(): - areas, _ = mesh_face_areas_normals(verts, faces) # Face areas can be zero. - max_faces = meshes.num_faces_per_mesh().max().item() - areas_padded = packed_to_padded( - areas, mesh_to_face[meshes.valid], max_faces - ) # (N, F) - - # TODO (gkioxari) Confirm multinomial bug is not present with real data. - sample_face_idxs = areas_padded.multinomial( - num_samples, replacement=True - ) # (N, num_samples) - sample_face_idxs += mesh_to_face[meshes.valid].view(num_valid_meshes, 1) - - # Get the vertex coordinates of the sampled faces. - face_verts = verts[faces] - v0, v1, v2 = face_verts[:, 0], face_verts[:, 1], face_verts[:, 2] - - # Randomly generate barycentric coords. - w0, w1, w2 = _rand_barycentric_coords( - num_valid_meshes, num_samples, verts.dtype, verts.device - ) - - # Use the barycentric coords to get a point on each sampled face. - a = v0[sample_face_idxs] # (N, num_samples, 3) - b = v1[sample_face_idxs] - c = v2[sample_face_idxs] - samples[meshes.valid] = w0[:, :, None] * a + w1[:, :, None] * b + w2[:, :, None] * c - - if return_normals: - # Initialize normals tensor with fill value 0 for empty meshes. - # Normals for the sampled points are face normals computed from - # the vertices of the face in which the sampled point lies. - normals = torch.zeros((num_meshes, num_samples, 3), device=meshes.device) - vert_normals = (v1 - v0).cross(v2 - v1, dim=1) - vert_normals = vert_normals / vert_normals.norm(dim=1, p=2, keepdim=True).clamp( - min=sys.float_info.epsilon - ) - vert_normals = vert_normals[sample_face_idxs] - normals[meshes.valid] = vert_normals - - if return_textures: - # fragment data are of shape NxHxWxK. Here H=S, W=1 & K=1. - pix_to_face = sample_face_idxs.view(len(meshes), num_samples, 1, 1) # NxSx1x1 - bary = torch.stack((w0, w1, w2), dim=2).unsqueeze(2).unsqueeze(2) # NxSx1x1x3 - # zbuf and dists are not used in `sample_textures` so we initialize them with dummy - dummy = torch.zeros( - (len(meshes), num_samples, 1, 1), device=meshes.device, dtype=torch.float32 - ) # NxSx1x1 - fragments = MeshFragments( - pix_to_face=pix_to_face, zbuf=dummy, bary_coords=bary, dists=dummy - ) - textures = meshes.sample_textures(fragments) # NxSx1x1xC - textures = textures[:, :, 0, 0, :] # NxSxC - - # return - # TODO(gkioxari) consider returning a Pointclouds instance [breaking] - if return_normals and return_textures: - # pyre-fixme[61]: `normals` may not be initialized here. - # pyre-fixme[61]: `textures` may not be initialized here. - return samples, normals, textures - if return_normals: # return_textures is False - # pyre-fixme[61]: `normals` may not be initialized here. - return samples, normals - if return_textures: # return_normals is False - # pyre-fixme[61]: `textures` may not be initialized here. - return samples, textures - return samples - - -def _rand_barycentric_coords( - size1, size2, dtype: torch.dtype, device: torch.device -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Helper function to generate random barycentric coordinates which are uniformly - distributed over a triangle. - - Args: - size1, size2: The number of coordinates generated will be size1*size2. - Output tensors will each be of shape (size1, size2). - dtype: Datatype to generate. - device: A torch.device object on which the outputs will be allocated. - - Returns: - w0, w1, w2: Tensors of shape (size1, size2) giving random barycentric - coordinates - """ - uv = torch.rand(2, size1, size2, dtype=dtype, device=device) - u, v = uv[0], uv[1] - u_sqrt = u.sqrt() - w0 = 1.0 - u_sqrt - w1 = u_sqrt * (1.0 - v) - w2 = u_sqrt * v - return w0, w1, w2 diff --git a/pytorch3d/pytorch3d/ops/subdivide_meshes.py b/pytorch3d/pytorch3d/ops/subdivide_meshes.py deleted file mode 100644 index 9a633ae2ca2ae3a0ddd1d3ed32606dc42710f177..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/subdivide_meshes.py +++ /dev/null @@ -1,470 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import torch -import torch.nn as nn -from pytorch3d.structures import Meshes - - -class SubdivideMeshes(nn.Module): - """ - Subdivide a triangle mesh by adding a new vertex at the center of each edge - and dividing each face into four new faces. Vectors of vertex - attributes can also be subdivided by averaging the values of the attributes - at the two vertices which form each edge. This implementation - preserves face orientation - if the vertices of a face are all ordered - counter-clockwise, then the faces in the subdivided meshes will also have - their vertices ordered counter-clockwise. - - If meshes is provided as an input, the initializer performs the relatively - expensive computation of determining the new face indices. This one-time - computation can be reused for all meshes with the same face topology - but different vertex positions. - """ - - def __init__(self, meshes=None) -> None: - """ - Args: - meshes: Meshes object or None. If a meshes object is provided, - the first mesh is used to compute the new faces of the - subdivided topology which can be reused for meshes with - the same input topology. - """ - super(SubdivideMeshes, self).__init__() - - self.precomputed = False - self._N = -1 - if meshes is not None: - # This computation is on indices, so gradients do not need to be - # tracked. - mesh = meshes[0] - with torch.no_grad(): - subdivided_faces = self.subdivide_faces(mesh) - if subdivided_faces.shape[1] != 3: - raise ValueError("faces can only have three vertices") - self.register_buffer("_subdivided_faces", subdivided_faces) - self.precomputed = True - - def subdivide_faces(self, meshes): - r""" - Args: - meshes: a Meshes object. - - Returns: - subdivided_faces_packed: (4*sum(F_n), 3) shape LongTensor of - original and new faces. - - Refer to pytorch3d.structures.meshes.py for more details on packed - representations of faces. - - Each face is split into 4 faces e.g. Input face - :: - v0 - /\ - / \ - / \ - e1 / \ e0 - / \ - / \ - / \ - /______________\ - v2 e2 v1 - - faces_packed = [[0, 1, 2]] - faces_packed_to_edges_packed = [[2, 1, 0]] - - `faces_packed_to_edges_packed` is used to represent all the new - vertex indices corresponding to the mid-points of edges in the mesh. - The actual vertex coordinates will be computed in the forward function. - To get the indices of the new vertices, offset - `faces_packed_to_edges_packed` by the total number of vertices. - :: - faces_packed_to_edges_packed = [[2, 1, 0]] + 3 = [[5, 4, 3]] - - e.g. subdivided face - :: - v0 - /\ - / \ - / f0 \ - v4 /______\ v3 - /\ /\ - / \ f3 / \ - / f2 \ / f1 \ - /______\/______\ - v2 v5 v1 - - f0 = [0, 3, 4] - f1 = [1, 5, 3] - f2 = [2, 4, 5] - f3 = [5, 4, 3] - - """ - verts_packed = meshes.verts_packed() - with torch.no_grad(): - faces_packed = meshes.faces_packed() - faces_packed_to_edges_packed = ( - meshes.faces_packed_to_edges_packed() + verts_packed.shape[0] - ) - - f0 = torch.stack( - [ - faces_packed[:, 0], - faces_packed_to_edges_packed[:, 2], - faces_packed_to_edges_packed[:, 1], - ], - dim=1, - ) - f1 = torch.stack( - [ - faces_packed[:, 1], - faces_packed_to_edges_packed[:, 0], - faces_packed_to_edges_packed[:, 2], - ], - dim=1, - ) - f2 = torch.stack( - [ - faces_packed[:, 2], - faces_packed_to_edges_packed[:, 1], - faces_packed_to_edges_packed[:, 0], - ], - dim=1, - ) - f3 = faces_packed_to_edges_packed - subdivided_faces_packed = torch.cat( - [f0, f1, f2, f3], dim=0 - ) # (4*sum(F_n), 3) - - return subdivided_faces_packed - - def forward(self, meshes, feats=None): - """ - Subdivide a batch of meshes by adding a new vertex on each edge, and - dividing each face into four new faces. New meshes contains two types - of vertices: - 1) Vertices that appear in the input meshes. - Data for these vertices are copied from the input meshes. - 2) New vertices at the midpoint of each edge. - Data for these vertices is the average of the data for the two - vertices that make up the edge. - - Args: - meshes: Meshes object representing a batch of meshes. - feats: Per-vertex features to be subdivided along with the verts. - Should be parallel to the packed vert representation of the - input meshes; so it should have shape (V, D) where V is the - total number of verts in the input meshes. Default: None. - - Returns: - 2-element tuple containing - - - **new_meshes**: Meshes object of a batch of subdivided meshes. - - **new_feats**: (optional) Tensor of subdivided feats, parallel to the - (packed) vertices of the subdivided meshes. Only returned - if feats is not None. - - """ - self._N = len(meshes) - if self.precomputed: - return self.subdivide_homogeneous(meshes, feats) - else: - return self.subdivide_heterogenerous(meshes, feats) - - def subdivide_homogeneous(self, meshes, feats=None): - """ - Subdivide verts (and optionally features) of a batch of meshes - where each mesh has the same topology of faces. The subdivided faces - are precomputed in the initializer. - - Args: - meshes: Meshes object representing a batch of meshes. - feats: Per-vertex features to be subdivided along with the verts. - - Returns: - 2-element tuple containing - - - **new_meshes**: Meshes object of a batch of subdivided meshes. - - **new_feats**: (optional) Tensor of subdivided feats, parallel to the - (packed) vertices of the subdivided meshes. Only returned - if feats is not None. - """ - verts = meshes.verts_padded() # (N, V, D) - edges = meshes[0].edges_packed() - - # The set of faces is the same across the different meshes. - new_faces = self._subdivided_faces.view(1, -1, 3).expand(self._N, -1, -1) - - # Add one new vertex at the midpoint of each edge by taking the average - # of the vertices that form each edge. - new_verts = verts[:, edges].mean(dim=2) - new_verts = torch.cat([verts, new_verts], dim=1) # (sum(V_n)+sum(E_n), 3) - new_feats = None - - # Calculate features for new vertices. - if feats is not None: - if feats.dim() == 2: - # feats is in packed format, transform it from packed to - # padded, i.e. (N*V, D) to (N, V, D). - feats = feats.view(verts.size(0), verts.size(1), feats.size(1)) - if feats.dim() != 3: - raise ValueError("features need to be of shape (N, V, D) or (N*V, D)") - - # Take average of the features at the vertices that form each edge. - new_feats = feats[:, edges].mean(dim=2) - new_feats = torch.cat([feats, new_feats], dim=1) # (sum(V_n)+sum(E_n), 3) - - new_meshes = Meshes(verts=new_verts, faces=new_faces) - - if feats is None: - return new_meshes - else: - return new_meshes, new_feats - - def subdivide_heterogenerous(self, meshes, feats=None): - """ - Subdivide faces, verts (and optionally features) of a batch of meshes - where each mesh can have different face topologies. - - Args: - meshes: Meshes object representing a batch of meshes. - feats: Per-vertex features to be subdivided along with the verts. - - Returns: - 2-element tuple containing - - - **new_meshes**: Meshes object of a batch of subdivided meshes. - - **new_feats**: (optional) Tensor of subdivided feats, parallel to the - (packed) vertices of the subdivided meshes. Only returned - if feats is not None. - """ - - # The computation of new faces is on face indices, so gradients do not - # need to be tracked. - verts = meshes.verts_packed() - with torch.no_grad(): - new_faces = self.subdivide_faces(meshes) - edges = meshes.edges_packed() - face_to_mesh_idx = meshes.faces_packed_to_mesh_idx() - edge_to_mesh_idx = meshes.edges_packed_to_mesh_idx() - num_edges_per_mesh = edge_to_mesh_idx.bincount(minlength=self._N) - num_verts_per_mesh = meshes.num_verts_per_mesh() - num_faces_per_mesh = meshes.num_faces_per_mesh() - - # Add one new vertex at the midpoint of each edge. - new_verts_per_mesh = num_verts_per_mesh + num_edges_per_mesh # (N,) - new_face_to_mesh_idx = torch.cat([face_to_mesh_idx] * 4, dim=0) - - # Calculate the indices needed to group the new and existing verts - # for each mesh. - verts_sort_idx = _create_verts_index( - num_verts_per_mesh, num_edges_per_mesh, meshes.device - ) # (sum(V_n)+sum(E_n),) - - verts_ordered_idx_init = torch.zeros( - new_verts_per_mesh.sum(), dtype=torch.int64, device=meshes.device - ) # (sum(V_n)+sum(E_n),) - - # Reassign vertex indices so that existing and new vertices for each - # mesh are sequential. - verts_ordered_idx = verts_ordered_idx_init.scatter_add( - 0, - verts_sort_idx, - torch.arange(new_verts_per_mesh.sum(), device=meshes.device), - ) - - # Retrieve vertex indices for each face. - new_faces = verts_ordered_idx[new_faces] - - # Calculate the indices needed to group the existing and new faces - # for each mesh. - face_sort_idx = _create_faces_index( - num_faces_per_mesh, device=meshes.device - ) - - # Reorder the faces to sequentially group existing and new faces - # for each mesh. - new_faces = new_faces[face_sort_idx] - new_face_to_mesh_idx = new_face_to_mesh_idx[face_sort_idx] - new_faces_per_mesh = new_face_to_mesh_idx.bincount( - minlength=self._N - ) # (sum(F_n)*4) - - # Add one new vertex at the midpoint of each edge by taking the average - # of the verts that form each edge. - new_verts = verts[edges].mean(dim=1) - new_verts = torch.cat([verts, new_verts], dim=0) - - # Reorder the verts to sequentially group existing and new verts for - # each mesh. - new_verts = new_verts[verts_sort_idx] - - if feats is not None: - new_feats = feats[edges].mean(dim=1) - new_feats = torch.cat([feats, new_feats], dim=0) - new_feats = new_feats[verts_sort_idx] - - verts_list = list(new_verts.split(new_verts_per_mesh.tolist(), 0)) - faces_list = list(new_faces.split(new_faces_per_mesh.tolist(), 0)) - new_verts_per_mesh_cumsum = torch.cat( - [ - new_verts_per_mesh.new_full(size=(1,), fill_value=0.0), - new_verts_per_mesh.cumsum(0)[:-1], - ], - dim=0, - ) - faces_list = [ - faces_list[n] - new_verts_per_mesh_cumsum[n] for n in range(self._N) - ] - if feats is not None: - feats_list = new_feats.split(new_verts_per_mesh.tolist(), 0) - new_meshes = Meshes(verts=verts_list, faces=faces_list) - - if feats is None: - return new_meshes - else: - new_feats = torch.cat(feats_list, dim=0) - return new_meshes, new_feats - - -def _create_verts_index(verts_per_mesh, edges_per_mesh, device=None): - """ - Helper function to group the vertex indices for each mesh. New vertices are - stacked at the end of the original verts tensor, so in order to have - sequential packing, the verts tensor needs to be reordered so that the - vertices corresponding to each mesh are grouped together. - - Args: - verts_per_mesh: Tensor of shape (N,) giving the number of vertices - in each mesh in the batch where N is the batch size. - edges_per_mesh: Tensor of shape (N,) giving the number of edges - in each mesh in the batch - - Returns: - verts_idx: A tensor with vert indices for each mesh ordered sequentially - by mesh index. - """ - # e.g. verts_per_mesh = (4, 5, 6) - # e.g. edges_per_mesh = (5, 7, 9) - - V = verts_per_mesh.sum() # e.g. 15 - E = edges_per_mesh.sum() # e.g. 21 - - verts_per_mesh_cumsum = verts_per_mesh.cumsum(dim=0) # (N,) e.g. (4, 9, 15) - edges_per_mesh_cumsum = edges_per_mesh.cumsum(dim=0) # (N,) e.g. (5, 12, 21) - - v_to_e_idx = verts_per_mesh_cumsum.clone() - - # vertex to edge index. - v_to_e_idx[1:] += edges_per_mesh_cumsum[ - :-1 - ] # e.g. (4, 9, 15) + (0, 5, 12) = (4, 14, 27) - - # vertex to edge offset. - v_to_e_offset = V - verts_per_mesh_cumsum # e.g. 15 - (4, 9, 15) = (11, 6, 0) - v_to_e_offset[1:] += edges_per_mesh_cumsum[ - :-1 - ] # e.g. (11, 6, 0) + (0, 5, 12) = (11, 11, 12) - e_to_v_idx = ( - verts_per_mesh_cumsum[:-1] + edges_per_mesh_cumsum[:-1] - ) # (4, 9) + (5, 12) = (9, 21) - e_to_v_offset = ( - verts_per_mesh_cumsum[:-1] - edges_per_mesh_cumsum[:-1] - V - ) # (4, 9) - (5, 12) - 15 = (-16, -18) - - # Add one new vertex per edge. - idx_diffs = torch.ones(V + E, device=device, dtype=torch.int64) # (36,) - idx_diffs[v_to_e_idx] += v_to_e_offset - idx_diffs[e_to_v_idx] += e_to_v_offset - - # e.g. - # [ - # 1, 1, 1, 1, 12, 1, 1, 1, 1, - # -15, 1, 1, 1, 1, 12, 1, 1, 1, 1, 1, 1, - # -17, 1, 1, 1, 1, 1, 13, 1, 1, 1, 1, 1, 1, 1 - # ] - - verts_idx = idx_diffs.cumsum(dim=0) - 1 - - # e.g. - # [ - # 0, 1, 2, 3, 15, 16, 17, 18, 19, --> mesh 0 - # 4, 5, 6, 7, 8, 20, 21, 22, 23, 24, 25, 26, --> mesh 1 - # 9, 10, 11, 12, 13, 14, 27, 28, 29, 30, 31, 32, 33, 34, 35 --> mesh 2 - # ] - # where for mesh 0, [0, 1, 2, 3] are the indices of the existing verts, and - # [15, 16, 17, 18, 19] are the indices of the new verts after subdivision. - - return verts_idx - - -def _create_faces_index(faces_per_mesh: torch.Tensor, device=None): - """ - Helper function to group the faces indices for each mesh. New faces are - stacked at the end of the original faces tensor, so in order to have - sequential packing, the faces tensor needs to be reordered to that faces - corresponding to each mesh are grouped together. - - Args: - faces_per_mesh: Tensor of shape (N,) giving the number of faces - in each mesh in the batch where N is the batch size. - - Returns: - faces_idx: A tensor with face indices for each mesh ordered sequentially - by mesh index. - """ - # e.g. faces_per_mesh = [2, 5, 3] - - F = faces_per_mesh.sum() # e.g. 10 - faces_per_mesh_cumsum = faces_per_mesh.cumsum(dim=0) # (N,) e.g. (2, 7, 10) - - switch1_idx = faces_per_mesh_cumsum.clone() - switch1_idx[1:] += ( - 3 * faces_per_mesh_cumsum[:-1] - ) # e.g. (2, 7, 10) + (0, 6, 21) = (2, 13, 31) - - switch2_idx = 2 * faces_per_mesh_cumsum # e.g. (4, 14, 20) - switch2_idx[1:] += ( - 2 * faces_per_mesh_cumsum[:-1] - ) # e.g. (4, 14, 20) + (0, 4, 14) = (4, 18, 34) - - switch3_idx = 3 * faces_per_mesh_cumsum # e.g. (6, 21, 30) - switch3_idx[1:] += faces_per_mesh_cumsum[ - :-1 - ] # e.g. (6, 21, 30) + (0, 2, 7) = (6, 23, 37) - - switch4_idx = 4 * faces_per_mesh_cumsum[:-1] # e.g. (8, 28) - - switch123_offset = F - faces_per_mesh # e.g. (8, 5, 7) - - # pyre-fixme[6]: For 1st param expected `Union[List[int], Size, - # typing.Tuple[int, ...]]` but got `Tensor`. - idx_diffs = torch.ones(4 * F, device=device, dtype=torch.int64) - idx_diffs[switch1_idx] += switch123_offset - idx_diffs[switch2_idx] += switch123_offset - idx_diffs[switch3_idx] += switch123_offset - idx_diffs[switch4_idx] -= 3 * F - - # e.g - # [ - # 1, 1, 9, 1, 9, 1, 9, 1, -> mesh 0 - # -29, 1, 1, 1, 1, 6, 1, 1, 1, 1, 6, 1, 1, 1, 1, 6, 1, 1, 1, 1, -> mesh 1 - # -29, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1 -> mesh 2 - # ] - - faces_idx = idx_diffs.cumsum(dim=0) - 1 - - # e.g. - # [ - # 0, 1, 10, 11, 20, 21, 30, 31, - # 2, 3, 4, 5, 6, 12, 13, 14, 15, 16, 22, 23, 24, 25, 26, 32, 33, 34, 35, 36, - # 7, 8, 9, 17, 18, 19, 27, 28, 29, 37, 38, 39 - # ] - # where for mesh 0, [0, 1] are the indices of the existing faces, and - # [10, 11, 20, 21, 30, 31] are the indices of the new faces after subdivision. - - return faces_idx diff --git a/pytorch3d/pytorch3d/ops/utils.py b/pytorch3d/pytorch3d/ops/utils.py deleted file mode 100644 index cb576d5b76b0cd7ef549b396a0e9743db874c21d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/utils.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Optional, Tuple, TYPE_CHECKING, Union - -import torch - -from .knn import knn_points - - -if TYPE_CHECKING: - from pytorch3d.structures import Pointclouds - - -def masked_gather(points: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: - """ - Helper function for torch.gather to collect the points at - the given indices in idx where some of the indices might be -1 to - indicate padding. These indices are first replaced with 0. - Then the points are gathered after which the padded values - are set to 0.0. - - Args: - points: (N, P, D) float32 tensor of points - idx: (N, K) or (N, P, K) long tensor of indices into points, where - some indices are -1 to indicate padding - - Returns: - selected_points: (N, K, D) float32 tensor of points - at the given indices - """ - - if len(idx) != len(points): - raise ValueError("points and idx must have the same batch dimension") - - N, P, D = points.shape - - if idx.ndim == 3: - # Case: KNN, Ball Query where idx is of shape (N, P', K) - # where P' is not necessarily the same as P as the - # points may be gathered from a different pointcloud. - K = idx.shape[2] - # Match dimensions for points and indices - idx_expanded = idx[..., None].expand(-1, -1, -1, D) - points = points[:, :, None, :].expand(-1, -1, K, -1) - elif idx.ndim == 2: - # Farthest point sampling where idx is of shape (N, K) - idx_expanded = idx[..., None].expand(-1, -1, D) - else: - raise ValueError("idx format is not supported %s" % repr(idx.shape)) - - idx_expanded_mask = idx_expanded.eq(-1) - idx_expanded = idx_expanded.clone() - # Replace -1 values with 0 for gather - idx_expanded[idx_expanded_mask] = 0 - # Gather points - selected_points = points.gather(dim=1, index=idx_expanded) - # Replace padded values - selected_points[idx_expanded_mask] = 0.0 - return selected_points - - -def wmean( - x: torch.Tensor, - weight: Optional[torch.Tensor] = None, - dim: Union[int, Tuple[int]] = -2, - keepdim: bool = True, - eps: float = 1e-9, -) -> torch.Tensor: - """ - Finds the mean of the input tensor across the specified dimension. - If the `weight` argument is provided, computes weighted mean. - Args: - x: tensor of shape `(*, D)`, where D is assumed to be spatial; - weights: if given, non-negative tensor of shape `(*,)`. It must be - broadcastable to `x.shape[:-1]`. Note that the weights for - the last (spatial) dimension are assumed same; - dim: dimension(s) in `x` to average over; - keepdim: tells whether to keep the resulting singleton dimension. - eps: minimum clamping value in the denominator. - Returns: - the mean tensor: - * if `weights` is None => `mean(x, dim)`, - * otherwise => `sum(x*w, dim) / max{sum(w, dim), eps}`. - """ - args = {"dim": dim, "keepdim": keepdim} - - if weight is None: - # pyre-fixme[6]: For 1st param expected `Optional[dtype]` but got - # `Union[Tuple[int], int]`. - return x.mean(**args) - - if any( - xd != wd and xd != 1 and wd != 1 - for xd, wd in zip(x.shape[-2::-1], weight.shape[::-1]) - ): - raise ValueError("wmean: weights are not compatible with the tensor") - - # pyre-fixme[6]: For 1st param expected `Optional[dtype]` but got - # `Union[Tuple[int], int]`. - return (x * weight[..., None]).sum(**args) / weight[..., None].sum(**args).clamp( - eps - ) - - -def eyes( - dim: int, - N: int, - device: Optional[torch.device] = None, - dtype: torch.dtype = torch.float32, -) -> torch.Tensor: - """ - Generates a batch of `N` identity matrices of shape `(N, dim, dim)`. - - Args: - **dim**: The dimensionality of the identity matrices. - **N**: The number of identity matrices. - **device**: The device to be used for allocating the matrices. - **dtype**: The datatype of the matrices. - - Returns: - **identities**: A batch of identity matrices of shape `(N, dim, dim)`. - """ - identities = torch.eye(dim, device=device, dtype=dtype) - return identities[None].repeat(N, 1, 1) - - -def convert_pointclouds_to_tensor(pcl: Union[torch.Tensor, "Pointclouds"]): - """ - If `type(pcl)==Pointclouds`, converts a `pcl` object to a - padded representation and returns it together with the number of points - per batch. Otherwise, returns the input itself with the number of points - set to the size of the second dimension of `pcl`. - """ - if is_pointclouds(pcl): - X = pcl.points_padded() # type: ignore - num_points = pcl.num_points_per_cloud() # type: ignore - elif torch.is_tensor(pcl): - X = pcl - num_points = X.shape[1] * torch.ones( # type: ignore - # pyre-fixme[16]: Item `Pointclouds` of `Union[Pointclouds, Tensor]` has - # no attribute `shape`. - X.shape[0], - device=X.device, - dtype=torch.int64, - ) - else: - raise ValueError( - "The inputs X, Y should be either Pointclouds objects or tensors." - ) - return X, num_points - - -def is_pointclouds(pcl: Union[torch.Tensor, "Pointclouds"]) -> bool: - """Checks whether the input `pcl` is an instance of `Pointclouds` - by checking the existence of `points_padded` and `num_points_per_cloud` - functions. - """ - return hasattr(pcl, "points_padded") and hasattr(pcl, "num_points_per_cloud") - - -def get_point_covariances( - points_padded: torch.Tensor, - num_points_per_cloud: torch.Tensor, - neighborhood_size: int, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Computes the per-point covariance matrices by of the 3D locations of - K-nearest neighbors of each point. - - Args: - **points_padded**: Input point clouds as a padded tensor - of shape `(minibatch, num_points, dim)`. - **num_points_per_cloud**: Number of points per cloud - of shape `(minibatch,)`. - **neighborhood_size**: Number of nearest neighbors for each point - used to estimate the covariance matrices. - - Returns: - **covariances**: A batch of per-point covariance matrices - of shape `(minibatch, dim, dim)`. - **k_nearest_neighbors**: A batch of `neighborhood_size` nearest - neighbors for each of the point cloud points - of shape `(minibatch, num_points, neighborhood_size, dim)`. - """ - # get K nearest neighbor idx for each point in the point cloud - k_nearest_neighbors = knn_points( - points_padded, - points_padded, - lengths1=num_points_per_cloud, - lengths2=num_points_per_cloud, - K=neighborhood_size, - return_nn=True, - ).knn - # obtain the mean of the neighborhood - pt_mean = k_nearest_neighbors.mean(2, keepdim=True) - # compute the diff of the neighborhood and the mean of the neighborhood - central_diff = k_nearest_neighbors - pt_mean - # per-nn-point covariances - per_pt_cov = central_diff.unsqueeze(4) * central_diff.unsqueeze(3) - # per-point covariances - covariances = per_pt_cov.mean(2) - - return covariances, k_nearest_neighbors diff --git a/pytorch3d/pytorch3d/ops/vert_align.py b/pytorch3d/pytorch3d/ops/vert_align.py deleted file mode 100644 index f8181aebddecb1fe0e4ab7d9522e0eec60b2e560..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/ops/vert_align.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import torch -import torch.nn.functional as F - - -def vert_align( - feats, - verts, - return_packed: bool = False, - interp_mode: str = "bilinear", - padding_mode: str = "zeros", - align_corners: bool = True, -) -> torch.Tensor: - """ - Sample vertex features from a feature map. This operation is called - "perceptual feature pooling" in [1] or "vert align" in [2]. - - [1] Wang et al, "Pixel2Mesh: Generating 3D Mesh Models from Single - RGB Images", ECCV 2018. - [2] Gkioxari et al, "Mesh R-CNN", ICCV 2019 - - Args: - feats: FloatTensor of shape (N, C, H, W) representing image features - from which to sample or a list of features each with potentially - different C, H or W dimensions. - verts: FloatTensor of shape (N, V, 3) or an object (e.g. Meshes or Pointclouds) - with `verts_padded' or `points_padded' as an attribute giving the (x, y, z) - vertex positions for which to sample. (x, y) verts should be normalized such - that (-1, -1) corresponds to top-left and (+1, +1) to bottom-right - location in the input feature map. - return_packed: (bool) Indicates whether to return packed features - interp_mode: (str) Specifies how to interpolate features. - ('bilinear' or 'nearest') - padding_mode: (str) Specifies how to handle vertices outside of the - [-1, 1] range. ('zeros', 'reflection', or 'border') - align_corners (bool): Geometrically, we consider the pixels of the - input as squares rather than points. - If set to ``True``, the extrema (``-1`` and ``1``) are considered as - referring to the center points of the input's corner pixels. If set - to ``False``, they are instead considered as referring to the corner - points of the input's corner pixels, making the sampling more - resolution agnostic. Default: ``True`` - - Returns: - feats_sampled: FloatTensor of shape (N, V, C) giving sampled features for each - vertex. If feats is a list, we return concatenated features in axis=2 of - shape (N, V, sum(C_n)) where C_n = feats[n].shape[1]. - If return_packed = True, the features are transformed to a packed - representation of shape (sum(V), C) - """ - if torch.is_tensor(verts): - if verts.dim() != 3: - raise ValueError("verts tensor should be 3 dimensional") - grid = verts - elif hasattr(verts, "verts_padded"): - grid = verts.verts_padded() - elif hasattr(verts, "points_padded"): - grid = verts.points_padded() - else: - raise ValueError( - "verts must be a tensor or have a " - + "`points_padded' or`verts_padded` attribute." - ) - - grid = grid[:, None, :, :2] # (N, 1, V, 2) - - if torch.is_tensor(feats): - feats = [feats] - for feat in feats: - if feat.dim() != 4: - raise ValueError("feats must have shape (N, C, H, W)") - if grid.shape[0] != feat.shape[0]: - raise ValueError("inconsistent batch dimension") - - feats_sampled = [] - for feat in feats: - feat_sampled = F.grid_sample( - feat, - grid, - mode=interp_mode, - padding_mode=padding_mode, - align_corners=align_corners, - ) # (N, C, 1, V) - feat_sampled = feat_sampled.squeeze(dim=2).transpose(1, 2) # (N, V, C) - feats_sampled.append(feat_sampled) - feats_sampled = torch.cat(feats_sampled, dim=2) # (N, V, sum(C)) - - if return_packed: - # flatten the first two dimensions: (N*V, C) - feats_sampled = feats_sampled.view(-1, feats_sampled.shape[-1]) - if hasattr(verts, "verts_padded_to_packed_idx"): - idx = ( - verts.verts_padded_to_packed_idx() - .view(-1, 1) - .expand(-1, feats_sampled.shape[-1]) - ) - feats_sampled = feats_sampled.gather(0, idx) # (sum(V), C) - - return feats_sampled diff --git a/pytorch3d/pytorch3d/renderer/__init__.py b/pytorch3d/pytorch3d/renderer/__init__.py deleted file mode 100644 index a667b012862f4018d9c8192d52c013cfacfde205..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/__init__.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .blending import ( - BlendParams, - hard_rgb_blend, - sigmoid_alpha_blend, - softmax_rgb_blend, -) -from .camera_utils import join_cameras_as_batch, rotate_on_spot -from .cameras import ( # deprecated # deprecated # deprecated # deprecated - camera_position_from_spherical_angles, - CamerasBase, - FoVOrthographicCameras, - FoVPerspectiveCameras, - get_world_to_view_transform, - look_at_rotation, - look_at_view_transform, - OpenGLOrthographicCameras, - OpenGLPerspectiveCameras, - OrthographicCameras, - PerspectiveCameras, - SfMOrthographicCameras, - SfMPerspectiveCameras, -) -from .implicit import ( - AbsorptionOnlyRaymarcher, - EmissionAbsorptionRaymarcher, - GridRaysampler, - HarmonicEmbedding, - HeterogeneousRayBundle, - ImplicitRenderer, - MonteCarloRaysampler, - MultinomialRaysampler, - NDCGridRaysampler, - NDCMultinomialRaysampler, - ray_bundle_to_ray_points, - ray_bundle_variables_to_ray_points, - RayBundle, - VolumeRenderer, - VolumeSampler, -) -from .lighting import AmbientLights, diffuse, DirectionalLights, PointLights, specular -from .materials import Materials -from .mesh import ( - gouraud_shading, - HardFlatShader, - HardGouraudShader, - HardPhongShader, - MeshRasterizer, - MeshRenderer, - MeshRendererWithFragments, - phong_shading, - RasterizationSettings, - rasterize_meshes, - SoftGouraudShader, - SoftPhongShader, - SoftSilhouetteShader, - SplatterPhongShader, - Textures, - TexturesAtlas, - TexturesUV, - TexturesVertex, -) - -from .points import ( - AlphaCompositor, - NormWeightedCompositor, - PointsRasterizationSettings, - PointsRasterizer, - PointsRenderer, - PulsarPointsRenderer, - rasterize_points, -) -from .splatter_blend import SplatterBlender -from .utils import ( - convert_to_tensors_and_broadcast, - ndc_grid_sample, - ndc_to_grid_sample_coords, - TensorProperties, -) - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/renderer/blending.py b/pytorch3d/pytorch3d/renderer/blending.py deleted file mode 100644 index 07c9243727cb2e751b61a2744bfbf848eb12b942..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/blending.py +++ /dev/null @@ -1,239 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import NamedTuple, Sequence, Union - -import torch -from pytorch3d import _C -from pytorch3d.common.datatypes import Device - -# Example functions for blending the top K colors per pixel using the outputs -# from rasterization. -# NOTE: All blending function should return an RGBA image per batch element - - -class BlendParams(NamedTuple): - """ - Data class to store blending params with defaults - - Members: - sigma (float): For SoftmaxPhong, controls the width of the sigmoid - function used to calculate the 2D distance based probability. Determines - the sharpness of the edges of the shape. Higher => faces have less defined - edges. For SplatterPhong, this is the standard deviation of the Gaussian - kernel. Higher => splats have a stronger effect and the rendered image is - more blurry. - gamma (float): Controls the scaling of the exponential function used - to set the opacity of the color. - Higher => faces are more transparent. - background_color: RGB values for the background color as a tuple or - as a tensor of three floats. - """ - - sigma: float = 1e-4 - gamma: float = 1e-4 - background_color: Union[torch.Tensor, Sequence[float]] = (1.0, 1.0, 1.0) - - -def _get_background_color( - blend_params: BlendParams, device: Device, dtype=torch.float32 -) -> torch.Tensor: - background_color_ = blend_params.background_color - if isinstance(background_color_, torch.Tensor): - background_color = background_color_.to(device) - else: - background_color = torch.tensor(background_color_, dtype=dtype, device=device) - return background_color - - -def hard_rgb_blend( - colors: torch.Tensor, fragments, blend_params: BlendParams -) -> torch.Tensor: - """ - Naive blending of top K faces to return an RGBA image - - **RGB** - choose color of the closest point i.e. K=0 - - **A** - 1.0 - - Args: - colors: (N, H, W, K, 3) RGB color for each of the top K faces per pixel. - fragments: the outputs of rasterization. From this we use - - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices - of the faces (in the packed representation) which - overlap each pixel in the image. This is used to - determine the output shape. - blend_params: BlendParams instance that contains a background_color - field specifying the color for the background - Returns: - RGBA pixel_colors: (N, H, W, 4) - """ - background_color = _get_background_color(blend_params, fragments.pix_to_face.device) - - # Mask for the background. - is_background = fragments.pix_to_face[..., 0] < 0 # (N, H, W) - - # Find out how much background_color needs to be expanded to be used for masked_scatter. - num_background_pixels = is_background.sum() - - # Set background color. - pixel_colors = colors[..., 0, :].masked_scatter( - is_background[..., None], - background_color[None, :].expand(num_background_pixels, -1), - ) # (N, H, W, 3) - - # Concat with the alpha channel. - alpha = (~is_background).type_as(pixel_colors)[..., None] - - return torch.cat([pixel_colors, alpha], dim=-1) # (N, H, W, 4) - - -# Wrapper for the C++/CUDA Implementation of sigmoid alpha blend. -class _SigmoidAlphaBlend(torch.autograd.Function): - @staticmethod - def forward(ctx, dists, pix_to_face, sigma): - alphas = _C.sigmoid_alpha_blend(dists, pix_to_face, sigma) - ctx.save_for_backward(dists, pix_to_face, alphas) - ctx.sigma = sigma - return alphas - - @staticmethod - def backward(ctx, grad_alphas): - dists, pix_to_face, alphas = ctx.saved_tensors - sigma = ctx.sigma - grad_dists = _C.sigmoid_alpha_blend_backward( - grad_alphas, alphas, dists, pix_to_face, sigma - ) - return grad_dists, None, None - - -_sigmoid_alpha = _SigmoidAlphaBlend.apply - - -def sigmoid_alpha_blend(colors, fragments, blend_params: BlendParams) -> torch.Tensor: - """ - Silhouette blending to return an RGBA image - - **RGB** - choose color of the closest point. - - **A** - blend based on the 2D distance based probability map [1]. - - Args: - colors: (N, H, W, K, 3) RGB color for each of the top K faces per pixel. - fragments: the outputs of rasterization. From this we use - - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices - of the faces (in the packed representation) which - overlap each pixel in the image. - - dists: FloatTensor of shape (N, H, W, K) specifying - the 2D euclidean distance from the center of each pixel - to each of the top K overlapping faces. - - Returns: - RGBA pixel_colors: (N, H, W, 4) - - [1] Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based - 3D Reasoning', ICCV 2019 - """ - N, H, W, K = fragments.pix_to_face.shape - pixel_colors = torch.ones((N, H, W, 4), dtype=colors.dtype, device=colors.device) - pixel_colors[..., :3] = colors[..., 0, :] - alpha = _sigmoid_alpha(fragments.dists, fragments.pix_to_face, blend_params.sigma) - pixel_colors[..., 3] = alpha - return pixel_colors - - -def softmax_rgb_blend( - colors: torch.Tensor, - fragments, - blend_params: BlendParams, - znear: Union[float, torch.Tensor] = 1.0, - zfar: Union[float, torch.Tensor] = 100, -) -> torch.Tensor: - """ - RGB and alpha channel blending to return an RGBA image based on the method - proposed in [1] - - **RGB** - blend the colors based on the 2D distance based probability map and - relative z distances. - - **A** - blend based on the 2D distance based probability map. - - Args: - colors: (N, H, W, K, 3) RGB color for each of the top K faces per pixel. - fragments: namedtuple with outputs of rasterization. We use properties - - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices - of the faces (in the packed representation) which - overlap each pixel in the image. - - dists: FloatTensor of shape (N, H, W, K) specifying - the 2D euclidean distance from the center of each pixel - to each of the top K overlapping faces. - - zbuf: FloatTensor of shape (N, H, W, K) specifying - the interpolated depth from each pixel to to each of the - top K overlapping faces. - blend_params: instance of BlendParams dataclass containing properties - - sigma: float, parameter which controls the width of the sigmoid - function used to calculate the 2D distance based probability. - Sigma controls the sharpness of the edges of the shape. - - gamma: float, parameter which controls the scaling of the - exponential function used to control the opacity of the color. - - background_color: (3) element list/tuple/torch.Tensor specifying - the RGB values for the background color. - znear: float, near clipping plane in the z direction - zfar: float, far clipping plane in the z direction - - Returns: - RGBA pixel_colors: (N, H, W, 4) - - [0] Shichen Liu et al, 'Soft Rasterizer: A Differentiable Renderer for - Image-based 3D Reasoning' - """ - - N, H, W, K = fragments.pix_to_face.shape - pixel_colors = torch.ones((N, H, W, 4), dtype=colors.dtype, device=colors.device) - background_color = _get_background_color(blend_params, fragments.pix_to_face.device) - - # Weight for background color - eps = 1e-10 - - # Mask for padded pixels. - mask = fragments.pix_to_face >= 0 - - # Sigmoid probability map based on the distance of the pixel to the face. - prob_map = torch.sigmoid(-fragments.dists / blend_params.sigma) * mask - - # The cumulative product ensures that alpha will be 0.0 if at least 1 - # face fully covers the pixel as for that face, prob will be 1.0. - # This results in a multiplication by 0.0 because of the (1.0 - prob) - # term. Therefore 1.0 - alpha will be 1.0. - alpha = torch.prod((1.0 - prob_map), dim=-1) - - # Weights for each face. Adjust the exponential by the max z to prevent - # overflow. zbuf shape (N, H, W, K), find max over K. - # TODO: there may still be some instability in the exponent calculation. - - # Reshape to be compatible with (N, H, W, K) values in fragments - if torch.is_tensor(zfar): - # pyre-fixme[16] - zfar = zfar[:, None, None, None] - if torch.is_tensor(znear): - # pyre-fixme[16]: Item `float` of `Union[float, Tensor]` has no attribute - # `__getitem__`. - znear = znear[:, None, None, None] - - z_inv = (zfar - fragments.zbuf) / (zfar - znear) * mask - z_inv_max = torch.max(z_inv, dim=-1).values[..., None].clamp(min=eps) - weights_num = prob_map * torch.exp((z_inv - z_inv_max) / blend_params.gamma) - - # Also apply exp normalize trick for the background color weight. - # Clamp to ensure delta is never 0. - # pyre-fixme[6]: Expected `Tensor` for 1st param but got `float`. - delta = torch.exp((eps - z_inv_max) / blend_params.gamma).clamp(min=eps) - - # Normalize weights. - # weights_num shape: (N, H, W, K). Sum over K and divide through by the sum. - denom = weights_num.sum(dim=-1)[..., None] + delta - - # Sum: weights * textures + background color - weighted_colors = (weights_num[..., None] * colors).sum(dim=-2) - weighted_background = delta * background_color - pixel_colors[..., :3] = (weighted_colors + weighted_background) / denom - pixel_colors[..., 3] = 1.0 - alpha - - return pixel_colors diff --git a/pytorch3d/pytorch3d/renderer/camera_conversions.py b/pytorch3d/pytorch3d/renderer/camera_conversions.py deleted file mode 100644 index 7617513d3235ac8a8e59d8e1b737a03194112c47..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/camera_conversions.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import logging -from typing import Tuple - -import torch - -from ..transforms import matrix_to_rotation_6d -from .cameras import PerspectiveCameras - - -LOGGER = logging.getLogger(__name__) - - -def _cameras_from_opencv_projection( - R: torch.Tensor, - tvec: torch.Tensor, - camera_matrix: torch.Tensor, - image_size: torch.Tensor, -) -> PerspectiveCameras: - focal_length = torch.stack([camera_matrix[:, 0, 0], camera_matrix[:, 1, 1]], dim=-1) - principal_point = camera_matrix[:, :2, 2] - - # Retype the image_size correctly and flip to width, height. - image_size_wh = image_size.to(R).flip(dims=(1,)) - - # Screen to NDC conversion: - # For non square images, we scale the points such that smallest side - # has range [-1, 1] and the largest side has range [-u, u], with u > 1. - # This convention is consistent with the PyTorch3D renderer, as well as - # the transformation function `get_ndc_to_screen_transform`. - scale = image_size_wh.to(R).min(dim=1, keepdim=True)[0] / 2.0 - scale = scale.expand(-1, 2) - c0 = image_size_wh / 2.0 - - # Get the PyTorch3D focal length and principal point. - focal_pytorch3d = focal_length / scale - p0_pytorch3d = -(principal_point - c0) / scale - - # For R, T we flip x, y axes (opencv screen space has an opposite - # orientation of screen axes). - # We also transpose R (opencv multiplies points from the opposite=left side). - R_pytorch3d = R.clone().permute(0, 2, 1) - T_pytorch3d = tvec.clone() - R_pytorch3d[:, :, :2] *= -1 - T_pytorch3d[:, :2] *= -1 - - return PerspectiveCameras( - R=R_pytorch3d, - T=T_pytorch3d, - focal_length=focal_pytorch3d, - principal_point=p0_pytorch3d, - image_size=image_size, - device=R.device, - ) - - -def _opencv_from_cameras_projection( - cameras: PerspectiveCameras, - image_size: torch.Tensor, -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - R_pytorch3d = cameras.R.clone() - T_pytorch3d = cameras.T.clone() - focal_pytorch3d = cameras.focal_length - p0_pytorch3d = cameras.principal_point - T_pytorch3d[:, :2] *= -1 - R_pytorch3d[:, :, :2] *= -1 - tvec = T_pytorch3d - R = R_pytorch3d.permute(0, 2, 1) - - # Retype the image_size correctly and flip to width, height. - image_size_wh = image_size.to(R).flip(dims=(1,)) - - # NDC to screen conversion. - scale = image_size_wh.to(R).min(dim=1, keepdim=True)[0] / 2.0 - scale = scale.expand(-1, 2) - c0 = image_size_wh / 2.0 - - principal_point = -p0_pytorch3d * scale + c0 - focal_length = focal_pytorch3d * scale - - camera_matrix = torch.zeros_like(R) - camera_matrix[:, :2, 2] = principal_point - camera_matrix[:, 2, 2] = 1.0 - camera_matrix[:, 0, 0] = focal_length[:, 0] - camera_matrix[:, 1, 1] = focal_length[:, 1] - return R, tvec, camera_matrix - - -def _pulsar_from_opencv_projection( - R: torch.Tensor, - tvec: torch.Tensor, - camera_matrix: torch.Tensor, - image_size: torch.Tensor, - znear: float = 0.1, -) -> torch.Tensor: - assert len(camera_matrix.size()) == 3, "This function requires batched inputs!" - assert len(R.size()) == 3, "This function requires batched inputs!" - assert len(tvec.size()) in (2, 3), "This function reuqires batched inputs!" - - # Validate parameters. - image_size_wh = image_size.to(R).flip(dims=(1,)) - assert torch.all( - image_size_wh > 0 - ), "height and width must be positive but min is: %s" % ( - str(image_size_wh.min().item()) - ) - assert ( - camera_matrix.size(1) == 3 and camera_matrix.size(2) == 3 - ), "Incorrect camera matrix shape: expected 3x3 but got %dx%d" % ( - camera_matrix.size(1), - camera_matrix.size(2), - ) - assert ( - R.size(1) == 3 and R.size(2) == 3 - ), "Incorrect R shape: expected 3x3 but got %dx%d" % ( - R.size(1), - R.size(2), - ) - if len(tvec.size()) == 2: - tvec = tvec.unsqueeze(2) - assert ( - tvec.size(1) == 3 and tvec.size(2) == 1 - ), "Incorrect tvec shape: expected 3x1 but got %dx%d" % ( - tvec.size(1), - tvec.size(2), - ) - # Check batch size. - batch_size = camera_matrix.size(0) - assert R.size(0) == batch_size, "Expected R to have batch size %d. Has size %d." % ( - batch_size, - R.size(0), - ) - assert ( - tvec.size(0) == batch_size - ), "Expected tvec to have batch size %d. Has size %d." % ( - batch_size, - tvec.size(0), - ) - # Check image sizes. - image_w = image_size_wh[0, 0] - image_h = image_size_wh[0, 1] - assert torch.all( - image_size_wh[:, 0] == image_w - ), "All images in a batch must have the same width!" - assert torch.all( - image_size_wh[:, 1] == image_h - ), "All images in a batch must have the same height!" - # Focal length. - fx = camera_matrix[:, 0, 0].unsqueeze(1) - fy = camera_matrix[:, 1, 1].unsqueeze(1) - # Check that we introduce less than 1% error by averaging the focal lengths. - fx_y = fx / fy - if torch.any(fx_y > 1.01) or torch.any(fx_y < 0.99): - LOGGER.warning( - "Pulsar only supports a single focal lengths. For converting OpenCV " - "focal lengths, we average them for x and y directions. " - "The focal lengths for x and y you provided differ by more than 1%, " - "which means this could introduce a noticeable error." - ) - f = (fx + fy) / 2 - # Normalize f into normalized device coordinates. - focal_length_px = f / image_w - # Transfer into focal_length and sensor_width. - focal_length = torch.tensor([znear - 1e-5], dtype=torch.float32, device=R.device) - focal_length = focal_length[None, :].repeat(batch_size, 1) - sensor_width = focal_length / focal_length_px - # Principal point. - cx = camera_matrix[:, 0, 2].unsqueeze(1) - cy = camera_matrix[:, 1, 2].unsqueeze(1) - # Transfer principal point offset into centered offset. - cx = -(cx - image_w / 2) - cy = cy - image_h / 2 - # Concatenate to final vector. - param = torch.cat([focal_length, sensor_width, cx, cy], dim=1) - R_trans = R.permute(0, 2, 1) - cam_pos = -torch.bmm(R_trans, tvec).squeeze(2) - cam_rot = matrix_to_rotation_6d(R_trans) - cam_params = torch.cat([cam_pos, cam_rot, param], dim=1) - return cam_params - - -def _pulsar_from_cameras_projection( - cameras: PerspectiveCameras, - image_size: torch.Tensor, -) -> torch.Tensor: - opencv_R, opencv_T, opencv_K = _opencv_from_cameras_projection(cameras, image_size) - return _pulsar_from_opencv_projection(opencv_R, opencv_T, opencv_K, image_size) diff --git a/pytorch3d/pytorch3d/renderer/camera_utils.py b/pytorch3d/pytorch3d/renderer/camera_utils.py deleted file mode 100644 index 1bddcaf23335d4515135b08471147c0ea7338358..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/camera_utils.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Sequence, Tuple - -import torch -from pytorch3d.transforms import Transform3d - -from .cameras import CamerasBase - - -def camera_to_eye_at_up( - world_to_view_transform: Transform3d, -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Given a world to view transform, return the eye, at and up vectors which - represent its position. - - For example, if cam is a camera object, then after running - - .. code-block:: - - from cameras import look_at_view_transform - eye, at, up = camera_to_eye_at_up(cam.get_world_to_view_transform()) - R, T = look_at_view_transform(eye=eye, at=at, up=up) - - any other camera created from R and T will have the same world to view - transform as cam. - - Also, given a camera position R and T, then after running: - - .. code-block:: - - from cameras import get_world_to_view_transform, look_at_view_transform - eye, at, up = camera_to_eye_at_up(get_world_to_view_transform(R=R, T=T)) - R2, T2 = look_at_view_transform(eye=eye, at=at, up=up) - - R2 will equal R and T2 will equal T. - - Args: - world_to_view_transform: Transform3d representing the extrinsic - transformation of N cameras. - - Returns: - eye: FloatTensor of shape [N, 3] representing the camera centers in world space. - at: FloatTensor of shape [N, 3] representing points in world space directly in - front of the cameras e.g. the positions of objects to be viewed by the - cameras. - up: FloatTensor of shape [N, 3] representing vectors in world space which - when projected on to the camera plane point upwards. - """ - cam_trans = world_to_view_transform.inverse() - # In the PyTorch3D right handed coordinate system, the camera in view space - # is always at the origin looking along the +z axis. - - # The up vector is not a position so cannot be transformed with - # transform_points. However the position eye+up above the camera - # (whose position vector in the camera coordinate frame is an up vector) - # can be transformed with transform_points. - eye_at_up_view = torch.tensor( - [[0, 0, 0], [0, 0, 1], [0, 1, 0]], dtype=torch.float32, device=cam_trans.device - ) - eye_at_up_world = cam_trans.transform_points(eye_at_up_view).reshape(-1, 3, 3) - - eye, at, up_plus_eye = eye_at_up_world.unbind(1) - up = up_plus_eye - eye - return eye, at, up - - -def rotate_on_spot( - R: torch.Tensor, T: torch.Tensor, rotation: torch.Tensor -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Given a camera position as R and T (batched or not), - and a rotation matrix (batched or not) - return a new R and T representing camera position(s) - in the same location but rotated on the spot by the - given rotation. In particular the new world to view - rotation will be the previous one followed by the inverse - of the given rotation. - - For example, adding the following lines before constructing a camera - will make the camera point a little to the right of where it - otherwise would have been. - - .. code-block:: - - from math import radians - from pytorch3d.transforms import axis_angle_to_matrix - angles = [0, radians(10), 0] - rotation = axis_angle_to_matrix(torch.FloatTensor(angles)) - R, T = rotate_on_spot(R, T, rotation) - - Note here that if you have a column vector, then when you - premultiply it by this `rotation` (see the rotation_conversions doc), - then it will be rotated anticlockwise if facing the -y axis. - In our context, where we postmultiply row vectors to transform them, - `rotation` will rotate the camera clockwise around the -y axis - (i.e. when looking down), which is a turn to the right. - - If angles was [radians(10), 0, 0], the camera would get pointed - up a bit instead. - - If angles was [0, 0, radians(10)], the camera would be rotated anticlockwise - a bit, so the image would appear rotated clockwise from how it - otherwise would have been. - - If you want to translate the camera from the origin in camera - coordinates, this is simple and does not need a separate function. - In particular, a translation by X = [a, b, c] would cause - the camera to move a units left, b units up, and c units - forward. This is achieved by using T-X in place of T. - - Args: - R: FloatTensor of shape [3, 3] or [N, 3, 3] - T: FloatTensor of shape [3] or [N, 3] - rotation: FloatTensor of shape [3, 3] or [n, 3, 3] - where if neither n nor N is 1, then n and N must be equal. - - Returns: - R: FloatTensor of shape [max(N, n), 3, 3] - T: FloatTensor of shape [max(N, n), 3] - """ - if R.ndim == 2: - R = R[None] - if T.ndim == 1: - T = T[None] - if rotation.ndim == 2: - rotation = rotation[None] - - if R.ndim != 3 or R.shape[1:] != (3, 3): - raise ValueError("Invalid R") - if T.ndim != 2 or T.shape[1] != 3: - raise ValueError("Invalid T") - if rotation.ndim != 3 or rotation.shape[1:] != (3, 3): - raise ValueError("Invalid rotation") - - new_R = R @ rotation.transpose(1, 2) - old_RT = torch.bmm(R, T[:, :, None]) - new_T = torch.matmul(new_R.transpose(1, 2), old_RT)[:, :, 0] - - return new_R, new_T - - -def join_cameras_as_batch(cameras_list: Sequence[CamerasBase]) -> CamerasBase: - """ - Create a batched cameras object by concatenating a list of input - cameras objects. All the tensor attributes will be joined along - the batch dimension. - - Args: - cameras_list: List of camera classes all of the same type and - on the same device. Each represents one or more cameras. - Returns: - cameras: single batched cameras object of the same - type as all the objects in the input list. - """ - # Get the type and fields to join from the first camera in the batch - c0 = cameras_list[0] - fields = c0._FIELDS - shared_fields = c0._SHARED_FIELDS - - if not all(isinstance(c, CamerasBase) for c in cameras_list): - raise ValueError("cameras in cameras_list must inherit from CamerasBase") - - if not all(type(c) is type(c0) for c in cameras_list[1:]): - raise ValueError("All cameras must be of the same type") - - if not all(c.device == c0.device for c in cameras_list[1:]): - raise ValueError("All cameras in the batch must be on the same device") - - # Concat the fields to make a batched tensor - kwargs = {} - kwargs["device"] = c0.device - - for field in fields: - field_not_none = [(getattr(c, field) is not None) for c in cameras_list] - if not any(field_not_none): - continue - if not all(field_not_none): - raise ValueError(f"Attribute {field} is inconsistently present") - - attrs_list = [getattr(c, field) for c in cameras_list] - - if field in shared_fields: - # Only needs to be set once - if not all(a == attrs_list[0] for a in attrs_list): - raise ValueError(f"Attribute {field} is not constant across inputs") - - # e.g. "in_ndc" is set as attribute "_in_ndc" on the class - # but provided as "in_ndc" in the input args - if field.startswith("_"): - field = field[1:] - - kwargs[field] = attrs_list[0] - elif isinstance(attrs_list[0], torch.Tensor): - # In the init, all inputs will be converted to - # batched tensors before set as attributes - # Join as a tensor along the batch dimension - kwargs[field] = torch.cat(attrs_list, dim=0) - else: - raise ValueError(f"Field {field} type is not supported for batching") - - return c0.__class__(**kwargs) diff --git a/pytorch3d/pytorch3d/renderer/cameras.py b/pytorch3d/pytorch3d/renderer/cameras.py deleted file mode 100644 index 97c1dfd8e509f57cfff8f80608c5f5becb7124c6..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/cameras.py +++ /dev/null @@ -1,1874 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -import warnings -from typing import Any, Dict, List, Optional, Sequence, Tuple, Union - -import numpy as np -import torch -import torch.nn.functional as F -from pytorch3d.common.datatypes import Device -from pytorch3d.transforms import Rotate, Transform3d, Translate - -from .utils import convert_to_tensors_and_broadcast, TensorProperties - - -# Default values for rotation and translation matrices. -_R = torch.eye(3)[None] # (1, 3, 3) -_T = torch.zeros(1, 3) # (1, 3) - -# An input which is a float per batch element -_BatchFloatType = Union[float, Sequence[float], torch.Tensor] - -# one or two floats per batch element -_FocalLengthType = Union[ - float, Sequence[Tuple[float]], Sequence[Tuple[float, float]], torch.Tensor -] - - -class CamerasBase(TensorProperties): - """ - `CamerasBase` implements a base class for all cameras. - - For cameras, there are four different coordinate systems (or spaces) - - World coordinate system: This is the system the object lives - the world. - - Camera view coordinate system: This is the system that has its origin on - the camera and the Z-axis perpendicular to the image plane. - In PyTorch3D, we assume that +X points left, and +Y points up and - +Z points out from the image plane. - The transformation from world --> view happens after applying a rotation (R) - and translation (T) - - NDC coordinate system: This is the normalized coordinate system that confines - points in a volume the rendered part of the object or scene, also known as - view volume. For square images, given the PyTorch3D convention, (+1, +1, znear) - is the top left near corner, and (-1, -1, zfar) is the bottom right far - corner of the volume. - The transformation from view --> NDC happens after applying the camera - projection matrix (P) if defined in NDC space. - For non square images, we scale the points such that smallest side - has range [-1, 1] and the largest side has range [-u, u], with u > 1. - - Screen coordinate system: This is another representation of the view volume with - the XY coordinates defined in image space instead of a normalized space. - - An illustration of the coordinate systems can be found in pytorch3d/docs/notes/cameras.md. - - CameraBase defines methods that are common to all camera models: - - `get_camera_center` that returns the optical center of the camera in - world coordinates - - `get_world_to_view_transform` which returns a 3D transform from - world coordinates to the camera view coordinates (R, T) - - `get_full_projection_transform` which composes the projection - transform (P) with the world-to-view transform (R, T) - - `transform_points` which takes a set of input points in world coordinates and - projects to the space the camera is defined in (NDC or screen) - - `get_ndc_camera_transform` which defines the transform from screen/NDC to - PyTorch3D's NDC space - - `transform_points_ndc` which takes a set of points in world coordinates and - projects them to PyTorch3D's NDC space - - `transform_points_screen` which takes a set of points in world coordinates and - projects them to screen space - - For each new camera, one should implement the `get_projection_transform` - routine that returns the mapping from camera view coordinates to camera - coordinates (NDC or screen). - - Another useful function that is specific to each camera model is - `unproject_points` which sends points from camera coordinates (NDC or screen) - back to camera view or world coordinates depending on the `world_coordinates` - boolean argument of the function. - """ - - # Used in __getitem__ to index the relevant fields - # When creating a new camera, this should be set in the __init__ - _FIELDS: Tuple[str, ...] = () - - # Names of fields which are a constant property of the whole batch, rather - # than themselves a batch of data. - # When joining objects into a batch, they will have to agree. - _SHARED_FIELDS: Tuple[str, ...] = () - - def get_projection_transform(self, **kwargs): - """ - Calculate the projective transformation matrix. - - Args: - **kwargs: parameters for the projection can be passed in as keyword - arguments to override the default values set in `__init__`. - - Return: - a `Transform3d` object which represents a batch of projection - matrices of shape (N, 3, 3) - """ - raise NotImplementedError() - - def unproject_points(self, xy_depth: torch.Tensor, **kwargs): - """ - Transform input points from camera coordinates (NDC or screen) - to the world / camera coordinates. - - Each of the input points `xy_depth` of shape (..., 3) is - a concatenation of the x, y location and its depth. - - For instance, for an input 2D tensor of shape `(num_points, 3)` - `xy_depth` takes the following form: - `xy_depth[i] = [x[i], y[i], depth[i]]`, - for a each point at an index `i`. - - The following example demonstrates the relationship between - `transform_points` and `unproject_points`: - - .. code-block:: python - - cameras = # camera object derived from CamerasBase - xyz = # 3D points of shape (batch_size, num_points, 3) - # transform xyz to the camera view coordinates - xyz_cam = cameras.get_world_to_view_transform().transform_points(xyz) - # extract the depth of each point as the 3rd coord of xyz_cam - depth = xyz_cam[:, :, 2:] - # project the points xyz to the camera - xy = cameras.transform_points(xyz)[:, :, :2] - # append depth to xy - xy_depth = torch.cat((xy, depth), dim=2) - # unproject to the world coordinates - xyz_unproj_world = cameras.unproject_points(xy_depth, world_coordinates=True) - print(torch.allclose(xyz, xyz_unproj_world)) # True - # unproject to the camera coordinates - xyz_unproj = cameras.unproject_points(xy_depth, world_coordinates=False) - print(torch.allclose(xyz_cam, xyz_unproj)) # True - - Args: - xy_depth: torch tensor of shape (..., 3). - world_coordinates: If `True`, unprojects the points back to world - coordinates using the camera extrinsics `R` and `T`. - `False` ignores `R` and `T` and unprojects to - the camera view coordinates. - from_ndc: If `False` (default), assumes xy part of input is in - NDC space if self.in_ndc(), otherwise in screen space. If - `True`, assumes xy is in NDC space even if the camera - is defined in screen space. - - Returns - new_points: unprojected points with the same shape as `xy_depth`. - """ - raise NotImplementedError() - - def get_camera_center(self, **kwargs) -> torch.Tensor: - """ - Return the 3D location of the camera optical center - in the world coordinates. - - Args: - **kwargs: parameters for the camera extrinsics can be passed in - as keyword arguments to override the default values - set in __init__. - - Setting R or T here will update the values set in init as these - values may be needed later on in the rendering pipeline e.g. for - lighting calculations. - - Returns: - C: a batch of 3D locations of shape (N, 3) denoting - the locations of the center of each camera in the batch. - """ - w2v_trans = self.get_world_to_view_transform(**kwargs) - P = w2v_trans.inverse().get_matrix() - # the camera center is the translation component (the first 3 elements - # of the last row) of the inverted world-to-view - # transform (4x4 RT matrix) - C = P[:, 3, :3] - return C - - def get_world_to_view_transform(self, **kwargs) -> Transform3d: - """ - Return the world-to-view transform. - - Args: - **kwargs: parameters for the camera extrinsics can be passed in - as keyword arguments to override the default values - set in __init__. - - Setting R and T here will update the values set in init as these - values may be needed later on in the rendering pipeline e.g. for - lighting calculations. - - Returns: - A Transform3d object which represents a batch of transforms - of shape (N, 3, 3) - """ - R: torch.Tensor = kwargs.get("R", self.R) - T: torch.Tensor = kwargs.get("T", self.T) - self.R = R - self.T = T - world_to_view_transform = get_world_to_view_transform(R=R, T=T) - return world_to_view_transform - - def get_full_projection_transform(self, **kwargs) -> Transform3d: - """ - Return the full world-to-camera transform composing the - world-to-view and view-to-camera transforms. - If camera is defined in NDC space, the projected points are in NDC space. - If camera is defined in screen space, the projected points are in screen space. - - Args: - **kwargs: parameters for the projection transforms can be passed in - as keyword arguments to override the default values - set in __init__. - - Setting R and T here will update the values set in init as these - values may be needed later on in the rendering pipeline e.g. for - lighting calculations. - - Returns: - a Transform3d object which represents a batch of transforms - of shape (N, 3, 3) - """ - self.R: torch.Tensor = kwargs.get("R", self.R) - self.T: torch.Tensor = kwargs.get("T", self.T) - world_to_view_transform = self.get_world_to_view_transform(R=self.R, T=self.T) - view_to_proj_transform = self.get_projection_transform(**kwargs) - return world_to_view_transform.compose(view_to_proj_transform) - - def transform_points( - self, points, eps: Optional[float] = None, **kwargs - ) -> torch.Tensor: - """ - Transform input points from world to camera space. - If camera is defined in NDC space, the projected points are in NDC space. - If camera is defined in screen space, the projected points are in screen space. - - For `CamerasBase.transform_points`, setting `eps > 0` - stabilizes gradients since it leads to avoiding division - by excessively low numbers for points close to the camera plane. - - Args: - points: torch tensor of shape (..., 3). - eps: If eps!=None, the argument is used to clamp the - divisor in the homogeneous normalization of the points - transformed to the ndc space. Please see - `transforms.Transform3d.transform_points` for details. - - For `CamerasBase.transform_points`, setting `eps > 0` - stabilizes gradients since it leads to avoiding division - by excessively low numbers for points close to the - camera plane. - - Returns - new_points: transformed points with the same shape as the input. - """ - world_to_proj_transform = self.get_full_projection_transform(**kwargs) - return world_to_proj_transform.transform_points(points, eps=eps) - - def get_ndc_camera_transform(self, **kwargs) -> Transform3d: - """ - Returns the transform from camera projection space (screen or NDC) to NDC space. - For cameras that can be specified in screen space, this transform - allows points to be converted from screen to NDC space. - The default transform scales the points from [0, W]x[0, H] - to [-1, 1]x[-u, u] or [-u, u]x[-1, 1] where u > 1 is the aspect ratio of the image. - This function should be modified per camera definitions if need be, - e.g. for Perspective/Orthographic cameras we provide a custom implementation. - This transform assumes PyTorch3D coordinate system conventions for - both the NDC space and the input points. - - This transform interfaces with the PyTorch3D renderer which assumes - input points to the renderer to be in NDC space. - """ - if self.in_ndc(): - return Transform3d(device=self.device, dtype=torch.float32) - else: - # For custom cameras which can be defined in screen space, - # users might might have to implement the screen to NDC transform based - # on the definition of the camera parameters. - # See PerspectiveCameras/OrthographicCameras for an example. - # We don't flip xy because we assume that world points are in - # PyTorch3D coordinates, and thus conversion from screen to ndc - # is a mere scaling from image to [-1, 1] scale. - image_size = kwargs.get("image_size", self.get_image_size()) - return get_screen_to_ndc_transform( - self, with_xyflip=False, image_size=image_size - ) - - def transform_points_ndc( - self, points, eps: Optional[float] = None, **kwargs - ) -> torch.Tensor: - """ - Transforms points from PyTorch3D world/camera space to NDC space. - Input points follow the PyTorch3D coordinate system conventions: +X left, +Y up. - Output points are in NDC space: +X left, +Y up, origin at image center. - - Args: - points: torch tensor of shape (..., 3). - eps: If eps!=None, the argument is used to clamp the - divisor in the homogeneous normalization of the points - transformed to the ndc space. Please see - `transforms.Transform3d.transform_points` for details. - - For `CamerasBase.transform_points`, setting `eps > 0` - stabilizes gradients since it leads to avoiding division - by excessively low numbers for points close to the - camera plane. - - Returns - new_points: transformed points with the same shape as the input. - """ - world_to_ndc_transform = self.get_full_projection_transform(**kwargs) - if not self.in_ndc(): - to_ndc_transform = self.get_ndc_camera_transform(**kwargs) - world_to_ndc_transform = world_to_ndc_transform.compose(to_ndc_transform) - - return world_to_ndc_transform.transform_points(points, eps=eps) - - def transform_points_screen( - self, points, eps: Optional[float] = None, with_xyflip: bool = True, **kwargs - ) -> torch.Tensor: - """ - Transforms points from PyTorch3D world/camera space to screen space. - Input points follow the PyTorch3D coordinate system conventions: +X left, +Y up. - Output points are in screen space: +X right, +Y down, origin at top left corner. - - Args: - points: torch tensor of shape (..., 3). - eps: If eps!=None, the argument is used to clamp the - divisor in the homogeneous normalization of the points - transformed to the ndc space. Please see - `transforms.Transform3d.transform_points` for details. - - For `CamerasBase.transform_points`, setting `eps > 0` - stabilizes gradients since it leads to avoiding division - by excessively low numbers for points close to the - camera plane. - with_xyflip: If True, flip x and y directions. In world/camera/ndc coords, - +x points to the left and +y up. If with_xyflip is true, in screen - coords +x points right, and +y down, following the usual RGB image - convention. Warning: do not set to False unless you know what you're - doing! - - Returns - new_points: transformed points with the same shape as the input. - """ - points_ndc = self.transform_points_ndc(points, eps=eps, **kwargs) - image_size = kwargs.get("image_size", self.get_image_size()) - return get_ndc_to_screen_transform( - self, with_xyflip=with_xyflip, image_size=image_size - ).transform_points(points_ndc, eps=eps) - - def clone(self): - """ - Returns a copy of `self`. - """ - cam_type = type(self) - other = cam_type(device=self.device) - return super().clone(other) - - def is_perspective(self): - raise NotImplementedError() - - def in_ndc(self): - """ - Specifies whether the camera is defined in NDC space - or in screen (image) space - """ - raise NotImplementedError() - - def get_znear(self): - return getattr(self, "znear", None) - - def get_image_size(self): - """ - Returns the image size, if provided, expected in the form of (height, width) - The image size is used for conversion of projected points to screen coordinates. - """ - return getattr(self, "image_size", None) - - def __getitem__( - self, index: Union[int, List[int], torch.BoolTensor, torch.LongTensor] - ) -> "CamerasBase": - """ - Override for the __getitem__ method in TensorProperties which needs to be - refactored. - - Args: - index: an integer index, list/tensor of integer indices, or tensor of boolean - indicators used to filter all the fields in the cameras given by self._FIELDS. - Returns: - an instance of the current cameras class with only the values at the selected index. - """ - - kwargs = {} - - tensor_types = { - # pyre-fixme[16]: Module `cuda` has no attribute `BoolTensor`. - "bool": (torch.BoolTensor, torch.cuda.BoolTensor), - # pyre-fixme[16]: Module `cuda` has no attribute `LongTensor`. - "long": (torch.LongTensor, torch.cuda.LongTensor), - } - if not isinstance( - index, (int, list, *tensor_types["bool"], *tensor_types["long"]) - ) or ( - isinstance(index, list) - and not all(isinstance(i, int) and not isinstance(i, bool) for i in index) - ): - msg = ( - "Invalid index type, expected int, List[int] or Bool/LongTensor; got %r" - ) - raise ValueError(msg % type(index)) - - if isinstance(index, int): - index = [index] - - if isinstance(index, tensor_types["bool"]): - # pyre-fixme[16]: Item `List` of `Union[List[int], BoolTensor, - # LongTensor]` has no attribute `ndim`. - # pyre-fixme[16]: Item `List` of `Union[List[int], BoolTensor, - # LongTensor]` has no attribute `shape`. - if index.ndim != 1 or index.shape[0] != len(self): - raise ValueError( - # pyre-fixme[16]: Item `List` of `Union[List[int], BoolTensor, - # LongTensor]` has no attribute `shape`. - f"Boolean index of shape {index.shape} does not match cameras" - ) - elif max(index) >= len(self): - raise IndexError(f"Index {max(index)} is out of bounds for select cameras") - - for field in self._FIELDS: - val = getattr(self, field, None) - if val is None: - continue - - # e.g. "in_ndc" is set as attribute "_in_ndc" on the class - # but provided as "in_ndc" on initialization - if field.startswith("_"): - field = field[1:] - - if isinstance(val, (str, bool)): - kwargs[field] = val - elif isinstance(val, torch.Tensor): - # In the init, all inputs will be converted to - # tensors before setting as attributes - kwargs[field] = val[index] - else: - raise ValueError(f"Field {field} type is not supported for indexing") - - kwargs["device"] = self.device - return self.__class__(**kwargs) - - -############################################################ -# Field of View Camera Classes # -############################################################ - - -def OpenGLPerspectiveCameras( - znear: _BatchFloatType = 1.0, - zfar: _BatchFloatType = 100.0, - aspect_ratio: _BatchFloatType = 1.0, - fov: _BatchFloatType = 60.0, - degrees: bool = True, - R: torch.Tensor = _R, - T: torch.Tensor = _T, - device: Device = "cpu", -) -> "FoVPerspectiveCameras": - """ - OpenGLPerspectiveCameras has been DEPRECATED. Use FoVPerspectiveCameras instead. - Preserving OpenGLPerspectiveCameras for backward compatibility. - """ - - warnings.warn( - """OpenGLPerspectiveCameras is deprecated, - Use FoVPerspectiveCameras instead. - OpenGLPerspectiveCameras will be removed in future releases.""", - PendingDeprecationWarning, - ) - - return FoVPerspectiveCameras( - znear=znear, - zfar=zfar, - aspect_ratio=aspect_ratio, - fov=fov, - degrees=degrees, - R=R, - T=T, - device=device, - ) - - -class FoVPerspectiveCameras(CamerasBase): - """ - A class which stores a batch of parameters to generate a batch of - projection matrices by specifying the field of view. - The definitions of the parameters follow the OpenGL perspective camera. - - The extrinsics of the camera (R and T matrices) can also be set in the - initializer or passed in to `get_full_projection_transform` to get - the full transformation from world -> ndc. - - The `transform_points` method calculates the full world -> ndc transform - and then applies it to the input points. - - The transforms can also be returned separately as Transform3d objects. - - * Setting the Aspect Ratio for Non Square Images * - - If the desired output image size is non square (i.e. a tuple of (H, W) where H != W) - the aspect ratio needs special consideration: There are two aspect ratios - to be aware of: - - the aspect ratio of each pixel - - the aspect ratio of the output image - The `aspect_ratio` setting in the FoVPerspectiveCameras sets the - pixel aspect ratio. When using this camera with the differentiable rasterizer - be aware that in the rasterizer we assume square pixels, but allow - variable image aspect ratio (i.e rectangle images). - - In most cases you will want to set the camera `aspect_ratio=1.0` - (i.e. square pixels) and only vary the output image dimensions in pixels - for rasterization. - """ - - # For __getitem__ - _FIELDS = ( - "K", - "znear", - "zfar", - "aspect_ratio", - "fov", - "R", - "T", - "degrees", - ) - - _SHARED_FIELDS = ("degrees",) - - def __init__( - self, - znear: _BatchFloatType = 1.0, - zfar: _BatchFloatType = 100.0, - aspect_ratio: _BatchFloatType = 1.0, - fov: _BatchFloatType = 60.0, - degrees: bool = True, - R: torch.Tensor = _R, - T: torch.Tensor = _T, - K: Optional[torch.Tensor] = None, - device: Device = "cpu", - ) -> None: - """ - - Args: - znear: near clipping plane of the view frustrum. - zfar: far clipping plane of the view frustrum. - aspect_ratio: aspect ratio of the image pixels. - 1.0 indicates square pixels. - fov: field of view angle of the camera. - degrees: bool, set to True if fov is specified in degrees. - R: Rotation matrix of shape (N, 3, 3) - T: Translation matrix of shape (N, 3) - K: (optional) A calibration matrix of shape (N, 4, 4) - If provided, don't need znear, zfar, fov, aspect_ratio, degrees - device: Device (as str or torch.device) - """ - # The initializer formats all inputs to torch tensors and broadcasts - # all the inputs to have the same batch dimension where necessary. - super().__init__( - device=device, - znear=znear, - zfar=zfar, - aspect_ratio=aspect_ratio, - fov=fov, - R=R, - T=T, - K=K, - ) - - # No need to convert to tensor or broadcast. - self.degrees = degrees - - def compute_projection_matrix( - self, znear, zfar, fov, aspect_ratio, degrees: bool - ) -> torch.Tensor: - """ - Compute the calibration matrix K of shape (N, 4, 4) - - Args: - znear: near clipping plane of the view frustrum. - zfar: far clipping plane of the view frustrum. - fov: field of view angle of the camera. - aspect_ratio: aspect ratio of the image pixels. - 1.0 indicates square pixels. - degrees: bool, set to True if fov is specified in degrees. - - Returns: - torch.FloatTensor of the calibration matrix with shape (N, 4, 4) - """ - K = torch.zeros((self._N, 4, 4), device=self.device, dtype=torch.float32) - ones = torch.ones((self._N), dtype=torch.float32, device=self.device) - if degrees: - fov = (np.pi / 180) * fov - - if not torch.is_tensor(fov): - fov = torch.tensor(fov, device=self.device) - tanHalfFov = torch.tan((fov / 2)) - max_y = tanHalfFov * znear - min_y = -max_y - max_x = max_y * aspect_ratio - min_x = -max_x - - # NOTE: In OpenGL the projection matrix changes the handedness of the - # coordinate frame. i.e the NDC space positive z direction is the - # camera space negative z direction. This is because the sign of the z - # in the projection matrix is set to -1.0. - # In pytorch3d we maintain a right handed coordinate system throughout - # so the so the z sign is 1.0. - z_sign = 1.0 - - # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. - K[:, 0, 0] = 2.0 * znear / (max_x - min_x) - # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. - K[:, 1, 1] = 2.0 * znear / (max_y - min_y) - K[:, 0, 2] = (max_x + min_x) / (max_x - min_x) - K[:, 1, 2] = (max_y + min_y) / (max_y - min_y) - K[:, 3, 2] = z_sign * ones - - # NOTE: This maps the z coordinate from [0, 1] where z = 0 if the point - # is at the near clipping plane and z = 1 when the point is at the far - # clipping plane. - K[:, 2, 2] = z_sign * zfar / (zfar - znear) - K[:, 2, 3] = -(zfar * znear) / (zfar - znear) - - return K - - def get_projection_transform(self, **kwargs) -> Transform3d: - """ - Calculate the perspective projection matrix with a symmetric - viewing frustrum. Use column major order. - The viewing frustrum will be projected into ndc, s.t. - (max_x, max_y) -> (+1, +1) - (min_x, min_y) -> (-1, -1) - - Args: - **kwargs: parameters for the projection can be passed in as keyword - arguments to override the default values set in `__init__`. - - Return: - a Transform3d object which represents a batch of projection - matrices of shape (N, 4, 4) - - .. code-block:: python - - h1 = (max_y + min_y)/(max_y - min_y) - w1 = (max_x + min_x)/(max_x - min_x) - tanhalffov = tan((fov/2)) - s1 = 1/tanhalffov - s2 = 1/(tanhalffov * (aspect_ratio)) - - # To map z to the range [0, 1] use: - f1 = far / (far - near) - f2 = -(far * near) / (far - near) - - # Projection matrix - K = [ - [s1, 0, w1, 0], - [0, s2, h1, 0], - [0, 0, f1, f2], - [0, 0, 1, 0], - ] - """ - K = kwargs.get("K", self.K) - if K is not None: - if K.shape != (self._N, 4, 4): - msg = "Expected K to have shape of (%r, 4, 4)" - raise ValueError(msg % (self._N)) - else: - K = self.compute_projection_matrix( - kwargs.get("znear", self.znear), - kwargs.get("zfar", self.zfar), - kwargs.get("fov", self.fov), - kwargs.get("aspect_ratio", self.aspect_ratio), - kwargs.get("degrees", self.degrees), - ) - - # Transpose the projection matrix as PyTorch3D transforms use row vectors. - transform = Transform3d( - matrix=K.transpose(1, 2).contiguous(), device=self.device - ) - return transform - - def unproject_points( - self, - xy_depth: torch.Tensor, - world_coordinates: bool = True, - scaled_depth_input: bool = False, - **kwargs, - ) -> torch.Tensor: - """>! - FoV cameras further allow for passing depth in world units - (`scaled_depth_input=False`) or in the [0, 1]-normalized units - (`scaled_depth_input=True`) - - Args: - scaled_depth_input: If `True`, assumes the input depth is in - the [0, 1]-normalized units. If `False` the input depth is in - the world units. - """ - - # obtain the relevant transformation to ndc - if world_coordinates: - to_ndc_transform = self.get_full_projection_transform() - else: - to_ndc_transform = self.get_projection_transform() - - if scaled_depth_input: - # the input is scaled depth, so we don't have to do anything - xy_sdepth = xy_depth - else: - # parse out important values from the projection matrix - K_matrix = self.get_projection_transform(**kwargs.copy()).get_matrix() - # parse out f1, f2 from K_matrix - unsqueeze_shape = [1] * xy_depth.dim() - unsqueeze_shape[0] = K_matrix.shape[0] - f1 = K_matrix[:, 2, 2].reshape(unsqueeze_shape) - f2 = K_matrix[:, 3, 2].reshape(unsqueeze_shape) - # get the scaled depth - sdepth = (f1 * xy_depth[..., 2:3] + f2) / xy_depth[..., 2:3] - # concatenate xy + scaled depth - xy_sdepth = torch.cat((xy_depth[..., 0:2], sdepth), dim=-1) - - # unproject with inverse of the projection - unprojection_transform = to_ndc_transform.inverse() - return unprojection_transform.transform_points(xy_sdepth) - - def is_perspective(self): - return True - - def in_ndc(self): - return True - - -def OpenGLOrthographicCameras( - znear: _BatchFloatType = 1.0, - zfar: _BatchFloatType = 100.0, - top: _BatchFloatType = 1.0, - bottom: _BatchFloatType = -1.0, - left: _BatchFloatType = -1.0, - right: _BatchFloatType = 1.0, - scale_xyz=((1.0, 1.0, 1.0),), # (1, 3) - R: torch.Tensor = _R, - T: torch.Tensor = _T, - device: Device = "cpu", -) -> "FoVOrthographicCameras": - """ - OpenGLOrthographicCameras has been DEPRECATED. Use FoVOrthographicCameras instead. - Preserving OpenGLOrthographicCameras for backward compatibility. - """ - - warnings.warn( - """OpenGLOrthographicCameras is deprecated, - Use FoVOrthographicCameras instead. - OpenGLOrthographicCameras will be removed in future releases.""", - PendingDeprecationWarning, - ) - - return FoVOrthographicCameras( - znear=znear, - zfar=zfar, - max_y=top, - min_y=bottom, - max_x=right, - min_x=left, - scale_xyz=scale_xyz, - R=R, - T=T, - device=device, - ) - - -class FoVOrthographicCameras(CamerasBase): - """ - A class which stores a batch of parameters to generate a batch of - projection matrices by specifying the field of view. - The definitions of the parameters follow the OpenGL orthographic camera. - """ - - # For __getitem__ - _FIELDS = ( - "K", - "znear", - "zfar", - "R", - "T", - "max_y", - "min_y", - "max_x", - "min_x", - "scale_xyz", - ) - - def __init__( - self, - znear: _BatchFloatType = 1.0, - zfar: _BatchFloatType = 100.0, - max_y: _BatchFloatType = 1.0, - min_y: _BatchFloatType = -1.0, - max_x: _BatchFloatType = 1.0, - min_x: _BatchFloatType = -1.0, - scale_xyz=((1.0, 1.0, 1.0),), # (1, 3) - R: torch.Tensor = _R, - T: torch.Tensor = _T, - K: Optional[torch.Tensor] = None, - device: Device = "cpu", - ): - """ - - Args: - znear: near clipping plane of the view frustrum. - zfar: far clipping plane of the view frustrum. - max_y: maximum y coordinate of the frustrum. - min_y: minimum y coordinate of the frustrum. - max_x: maximum x coordinate of the frustrum. - min_x: minimum x coordinate of the frustrum - scale_xyz: scale factors for each axis of shape (N, 3). - R: Rotation matrix of shape (N, 3, 3). - T: Translation of shape (N, 3). - K: (optional) A calibration matrix of shape (N, 4, 4) - If provided, don't need znear, zfar, max_y, min_y, max_x, min_x, scale_xyz - device: torch.device or string. - - Only need to set min_x, max_x, min_y, max_y for viewing frustrums - which are non symmetric about the origin. - """ - # The initializer formats all inputs to torch tensors and broadcasts - # all the inputs to have the same batch dimension where necessary. - super().__init__( - device=device, - znear=znear, - zfar=zfar, - max_y=max_y, - min_y=min_y, - max_x=max_x, - min_x=min_x, - scale_xyz=scale_xyz, - R=R, - T=T, - K=K, - ) - - def compute_projection_matrix( - self, znear, zfar, max_x, min_x, max_y, min_y, scale_xyz - ) -> torch.Tensor: - """ - Compute the calibration matrix K of shape (N, 4, 4) - - Args: - znear: near clipping plane of the view frustrum. - zfar: far clipping plane of the view frustrum. - max_x: maximum x coordinate of the frustrum. - min_x: minimum x coordinate of the frustrum - max_y: maximum y coordinate of the frustrum. - min_y: minimum y coordinate of the frustrum. - scale_xyz: scale factors for each axis of shape (N, 3). - """ - K = torch.zeros((self._N, 4, 4), dtype=torch.float32, device=self.device) - ones = torch.ones((self._N), dtype=torch.float32, device=self.device) - # NOTE: OpenGL flips handedness of coordinate system between camera - # space and NDC space so z sign is -ve. In PyTorch3D we maintain a - # right handed coordinate system throughout. - z_sign = +1.0 - - K[:, 0, 0] = (2.0 / (max_x - min_x)) * scale_xyz[:, 0] - K[:, 1, 1] = (2.0 / (max_y - min_y)) * scale_xyz[:, 1] - K[:, 0, 3] = -(max_x + min_x) / (max_x - min_x) - K[:, 1, 3] = -(max_y + min_y) / (max_y - min_y) - K[:, 3, 3] = ones - - # NOTE: This maps the z coordinate to the range [0, 1] and replaces the - # the OpenGL z normalization to [-1, 1] - K[:, 2, 2] = z_sign * (1.0 / (zfar - znear)) * scale_xyz[:, 2] - K[:, 2, 3] = -znear / (zfar - znear) - - return K - - def get_projection_transform(self, **kwargs) -> Transform3d: - """ - Calculate the orthographic projection matrix. - Use column major order. - - Args: - **kwargs: parameters for the projection can be passed in to - override the default values set in __init__. - Return: - a Transform3d object which represents a batch of projection - matrices of shape (N, 4, 4) - - .. code-block:: python - - scale_x = 2 / (max_x - min_x) - scale_y = 2 / (max_y - min_y) - scale_z = 2 / (far-near) - mid_x = (max_x + min_x) / (max_x - min_x) - mix_y = (max_y + min_y) / (max_y - min_y) - mid_z = (far + near) / (far - near) - - K = [ - [scale_x, 0, 0, -mid_x], - [0, scale_y, 0, -mix_y], - [0, 0, -scale_z, -mid_z], - [0, 0, 0, 1], - ] - """ - K = kwargs.get("K", self.K) - if K is not None: - if K.shape != (self._N, 4, 4): - msg = "Expected K to have shape of (%r, 4, 4)" - raise ValueError(msg % (self._N)) - else: - K = self.compute_projection_matrix( - kwargs.get("znear", self.znear), - kwargs.get("zfar", self.zfar), - kwargs.get("max_x", self.max_x), - kwargs.get("min_x", self.min_x), - kwargs.get("max_y", self.max_y), - kwargs.get("min_y", self.min_y), - kwargs.get("scale_xyz", self.scale_xyz), - ) - - transform = Transform3d( - matrix=K.transpose(1, 2).contiguous(), device=self.device - ) - return transform - - def unproject_points( - self, - xy_depth: torch.Tensor, - world_coordinates: bool = True, - scaled_depth_input: bool = False, - **kwargs, - ) -> torch.Tensor: - """>! - FoV cameras further allow for passing depth in world units - (`scaled_depth_input=False`) or in the [0, 1]-normalized units - (`scaled_depth_input=True`) - - Args: - scaled_depth_input: If `True`, assumes the input depth is in - the [0, 1]-normalized units. If `False` the input depth is in - the world units. - """ - - if world_coordinates: - to_ndc_transform = self.get_full_projection_transform(**kwargs.copy()) - else: - to_ndc_transform = self.get_projection_transform(**kwargs.copy()) - - if scaled_depth_input: - # the input depth is already scaled - xy_sdepth = xy_depth - else: - # we have to obtain the scaled depth first - K = self.get_projection_transform(**kwargs).get_matrix() - unsqueeze_shape = [1] * K.dim() - unsqueeze_shape[0] = K.shape[0] - mid_z = K[:, 3, 2].reshape(unsqueeze_shape) - scale_z = K[:, 2, 2].reshape(unsqueeze_shape) - scaled_depth = scale_z * xy_depth[..., 2:3] + mid_z - # cat xy and scaled depth - xy_sdepth = torch.cat((xy_depth[..., :2], scaled_depth), dim=-1) - # finally invert the transform - unprojection_transform = to_ndc_transform.inverse() - return unprojection_transform.transform_points(xy_sdepth) - - def is_perspective(self): - return False - - def in_ndc(self): - return True - - -############################################################ -# MultiView Camera Classes # -############################################################ -""" -Note that the MultiView Cameras accept parameters in NDC space. -""" - - -def SfMPerspectiveCameras( - focal_length: _FocalLengthType = 1.0, - principal_point=((0.0, 0.0),), - R: torch.Tensor = _R, - T: torch.Tensor = _T, - device: Device = "cpu", -) -> "PerspectiveCameras": - """ - SfMPerspectiveCameras has been DEPRECATED. Use PerspectiveCameras instead. - Preserving SfMPerspectiveCameras for backward compatibility. - """ - - warnings.warn( - """SfMPerspectiveCameras is deprecated, - Use PerspectiveCameras instead. - SfMPerspectiveCameras will be removed in future releases.""", - PendingDeprecationWarning, - ) - - return PerspectiveCameras( - focal_length=focal_length, - principal_point=principal_point, - R=R, - T=T, - device=device, - ) - - -class PerspectiveCameras(CamerasBase): - """ - A class which stores a batch of parameters to generate a batch of - transformation matrices using the multi-view geometry convention for - perspective camera. - - Parameters for this camera are specified in NDC if `in_ndc` is set to True. - If parameters are specified in screen space, `in_ndc` must be set to False. - """ - - # For __getitem__ - _FIELDS = ( - "K", - "R", - "T", - "focal_length", - "principal_point", - "_in_ndc", # arg is in_ndc but attribute set as _in_ndc - "image_size", - ) - - _SHARED_FIELDS = ("_in_ndc",) - - def __init__( - self, - focal_length: _FocalLengthType = 1.0, - principal_point=((0.0, 0.0),), - R: torch.Tensor = _R, - T: torch.Tensor = _T, - K: Optional[torch.Tensor] = None, - device: Device = "cpu", - in_ndc: bool = True, - image_size: Optional[Union[List, Tuple, torch.Tensor]] = None, - ) -> None: - """ - - Args: - focal_length: Focal length of the camera in world units. - A tensor of shape (N, 1) or (N, 2) for - square and non-square pixels respectively. - principal_point: xy coordinates of the center of - the principal point of the camera in pixels. - A tensor of shape (N, 2). - in_ndc: True if camera parameters are specified in NDC. - If camera parameters are in screen space, it must - be set to False. - R: Rotation matrix of shape (N, 3, 3) - T: Translation matrix of shape (N, 3) - K: (optional) A calibration matrix of shape (N, 4, 4) - If provided, don't need focal_length, principal_point - image_size: (height, width) of image size. - A tensor of shape (N, 2) or a list/tuple. Required for screen cameras. - device: torch.device or string - """ - # The initializer formats all inputs to torch tensors and broadcasts - # all the inputs to have the same batch dimension where necessary. - kwargs = {"image_size": image_size} if image_size is not None else {} - super().__init__( - device=device, - focal_length=focal_length, - principal_point=principal_point, - R=R, - T=T, - K=K, - _in_ndc=in_ndc, - **kwargs, # pyre-ignore - ) - if image_size is not None: - if (self.image_size < 1).any(): # pyre-ignore - raise ValueError("Image_size provided has invalid values") - else: - self.image_size = None - - # When focal length is provided as one value, expand to - # create (N, 2) shape tensor - if self.focal_length.ndim == 1: # (N,) - self.focal_length = self.focal_length[:, None] # (N, 1) - self.focal_length = self.focal_length.expand(-1, 2) # (N, 2) - - def get_projection_transform(self, **kwargs) -> Transform3d: - """ - Calculate the projection matrix using the - multi-view geometry convention. - - Args: - **kwargs: parameters for the projection can be passed in as keyword - arguments to override the default values set in __init__. - - Returns: - A `Transform3d` object with a batch of `N` projection transforms. - - .. code-block:: python - - fx = focal_length[:, 0] - fy = focal_length[:, 1] - px = principal_point[:, 0] - py = principal_point[:, 1] - - K = [ - [fx, 0, px, 0], - [0, fy, py, 0], - [0, 0, 0, 1], - [0, 0, 1, 0], - ] - """ - K = kwargs.get("K", self.K) - if K is not None: - if K.shape != (self._N, 4, 4): - msg = "Expected K to have shape of (%r, 4, 4)" - raise ValueError(msg % (self._N)) - else: - K = _get_sfm_calibration_matrix( - self._N, - self.device, - kwargs.get("focal_length", self.focal_length), - kwargs.get("principal_point", self.principal_point), - orthographic=False, - ) - - transform = Transform3d( - matrix=K.transpose(1, 2).contiguous(), device=self.device - ) - return transform - - def unproject_points( - self, - xy_depth: torch.Tensor, - world_coordinates: bool = True, - from_ndc: bool = False, - **kwargs, - ) -> torch.Tensor: - """ - Args: - from_ndc: If `False` (default), assumes xy part of input is in - NDC space if self.in_ndc(), otherwise in screen space. If - `True`, assumes xy is in NDC space even if the camera - is defined in screen space. - """ - if world_coordinates: - to_camera_transform = self.get_full_projection_transform(**kwargs) - else: - to_camera_transform = self.get_projection_transform(**kwargs) - if from_ndc: - to_camera_transform = to_camera_transform.compose( - self.get_ndc_camera_transform() - ) - - unprojection_transform = to_camera_transform.inverse() - xy_inv_depth = torch.cat( - (xy_depth[..., :2], 1.0 / xy_depth[..., 2:3]), dim=-1 # type: ignore - ) - return unprojection_transform.transform_points(xy_inv_depth) - - def get_principal_point(self, **kwargs) -> torch.Tensor: - """ - Return the camera's principal point - - Args: - **kwargs: parameters for the camera extrinsics can be passed in - as keyword arguments to override the default values - set in __init__. - """ - proj_mat = self.get_projection_transform(**kwargs).get_matrix() - return proj_mat[:, 2, :2] - - def get_ndc_camera_transform(self, **kwargs) -> Transform3d: - """ - Returns the transform from camera projection space (screen or NDC) to NDC space. - If the camera is defined already in NDC space, the transform is identity. - For cameras defined in screen space, we adjust the principal point computation - which is defined in the image space (commonly) and scale the points to NDC space. - - This transform leaves the depth unchanged. - - Important: This transforms assumes PyTorch3D conventions for the input points, - i.e. +X left, +Y up. - """ - if self.in_ndc(): - ndc_transform = Transform3d(device=self.device, dtype=torch.float32) - else: - # when cameras are defined in screen/image space, the principal point is - # provided in the (+X right, +Y down), aka image, coordinate system. - # Since input points are defined in the PyTorch3D system (+X left, +Y up), - # we need to adjust for the principal point transform. - pr_point_fix = torch.zeros( - (self._N, 4, 4), device=self.device, dtype=torch.float32 - ) - pr_point_fix[:, 0, 0] = 1.0 - pr_point_fix[:, 1, 1] = 1.0 - pr_point_fix[:, 2, 2] = 1.0 - pr_point_fix[:, 3, 3] = 1.0 - pr_point_fix[:, :2, 3] = -2.0 * self.get_principal_point(**kwargs) - pr_point_fix_transform = Transform3d( - matrix=pr_point_fix.transpose(1, 2).contiguous(), device=self.device - ) - image_size = kwargs.get("image_size", self.get_image_size()) - screen_to_ndc_transform = get_screen_to_ndc_transform( - self, with_xyflip=False, image_size=image_size - ) - ndc_transform = pr_point_fix_transform.compose(screen_to_ndc_transform) - - return ndc_transform - - def is_perspective(self): - return True - - def in_ndc(self): - return self._in_ndc - - -def SfMOrthographicCameras( - focal_length: _FocalLengthType = 1.0, - principal_point=((0.0, 0.0),), - R: torch.Tensor = _R, - T: torch.Tensor = _T, - device: Device = "cpu", -) -> "OrthographicCameras": - """ - SfMOrthographicCameras has been DEPRECATED. Use OrthographicCameras instead. - Preserving SfMOrthographicCameras for backward compatibility. - """ - - warnings.warn( - """SfMOrthographicCameras is deprecated, - Use OrthographicCameras instead. - SfMOrthographicCameras will be removed in future releases.""", - PendingDeprecationWarning, - ) - - return OrthographicCameras( - focal_length=focal_length, - principal_point=principal_point, - R=R, - T=T, - device=device, - ) - - -class OrthographicCameras(CamerasBase): - """ - A class which stores a batch of parameters to generate a batch of - transformation matrices using the multi-view geometry convention for - orthographic camera. - - Parameters for this camera are specified in NDC if `in_ndc` is set to True. - If parameters are specified in screen space, `in_ndc` must be set to False. - """ - - # For __getitem__ - _FIELDS = ( - "K", - "R", - "T", - "focal_length", - "principal_point", - "_in_ndc", - "image_size", - ) - - _SHARED_FIELDS = ("_in_ndc",) - - def __init__( - self, - focal_length: _FocalLengthType = 1.0, - principal_point=((0.0, 0.0),), - R: torch.Tensor = _R, - T: torch.Tensor = _T, - K: Optional[torch.Tensor] = None, - device: Device = "cpu", - in_ndc: bool = True, - image_size: Optional[Union[List, Tuple, torch.Tensor]] = None, - ) -> None: - """ - - Args: - focal_length: Focal length of the camera in world units. - A tensor of shape (N, 1) or (N, 2) for - square and non-square pixels respectively. - principal_point: xy coordinates of the center of - the principal point of the camera in pixels. - A tensor of shape (N, 2). - in_ndc: True if camera parameters are specified in NDC. - If False, then camera parameters are in screen space. - R: Rotation matrix of shape (N, 3, 3) - T: Translation matrix of shape (N, 3) - K: (optional) A calibration matrix of shape (N, 4, 4) - If provided, don't need focal_length, principal_point, image_size - image_size: (height, width) of image size. - A tensor of shape (N, 2) or list/tuple. Required for screen cameras. - device: torch.device or string - """ - # The initializer formats all inputs to torch tensors and broadcasts - # all the inputs to have the same batch dimension where necessary. - kwargs = {"image_size": image_size} if image_size is not None else {} - super().__init__( - device=device, - focal_length=focal_length, - principal_point=principal_point, - R=R, - T=T, - K=K, - _in_ndc=in_ndc, - **kwargs, # pyre-ignore - ) - if image_size is not None: - if (self.image_size < 1).any(): # pyre-ignore - raise ValueError("Image_size provided has invalid values") - else: - self.image_size = None - - # When focal length is provided as one value, expand to - # create (N, 2) shape tensor - if self.focal_length.ndim == 1: # (N,) - self.focal_length = self.focal_length[:, None] # (N, 1) - self.focal_length = self.focal_length.expand(-1, 2) # (N, 2) - - def get_projection_transform(self, **kwargs) -> Transform3d: - """ - Calculate the projection matrix using - the multi-view geometry convention. - - Args: - **kwargs: parameters for the projection can be passed in as keyword - arguments to override the default values set in __init__. - - Returns: - A `Transform3d` object with a batch of `N` projection transforms. - - .. code-block:: python - - fx = focal_length[:,0] - fy = focal_length[:,1] - px = principal_point[:,0] - py = principal_point[:,1] - - K = [ - [fx, 0, 0, px], - [0, fy, 0, py], - [0, 0, 1, 0], - [0, 0, 0, 1], - ] - """ - K = kwargs.get("K", self.K) - if K is not None: - if K.shape != (self._N, 4, 4): - msg = "Expected K to have shape of (%r, 4, 4)" - raise ValueError(msg % (self._N)) - else: - K = _get_sfm_calibration_matrix( - self._N, - self.device, - kwargs.get("focal_length", self.focal_length), - kwargs.get("principal_point", self.principal_point), - orthographic=True, - ) - - transform = Transform3d( - matrix=K.transpose(1, 2).contiguous(), device=self.device - ) - return transform - - def unproject_points( - self, - xy_depth: torch.Tensor, - world_coordinates: bool = True, - from_ndc: bool = False, - **kwargs, - ) -> torch.Tensor: - """ - Args: - from_ndc: If `False` (default), assumes xy part of input is in - NDC space if self.in_ndc(), otherwise in screen space. If - `True`, assumes xy is in NDC space even if the camera - is defined in screen space. - """ - if world_coordinates: - to_camera_transform = self.get_full_projection_transform(**kwargs) - else: - to_camera_transform = self.get_projection_transform(**kwargs) - if from_ndc: - to_camera_transform = to_camera_transform.compose( - self.get_ndc_camera_transform() - ) - - unprojection_transform = to_camera_transform.inverse() - return unprojection_transform.transform_points(xy_depth) - - def get_principal_point(self, **kwargs) -> torch.Tensor: - """ - Return the camera's principal point - - Args: - **kwargs: parameters for the camera extrinsics can be passed in - as keyword arguments to override the default values - set in __init__. - """ - proj_mat = self.get_projection_transform(**kwargs).get_matrix() - return proj_mat[:, 3, :2] - - def get_ndc_camera_transform(self, **kwargs) -> Transform3d: - """ - Returns the transform from camera projection space (screen or NDC) to NDC space. - If the camera is defined already in NDC space, the transform is identity. - For cameras defined in screen space, we adjust the principal point computation - which is defined in the image space (commonly) and scale the points to NDC space. - - Important: This transforms assumes PyTorch3D conventions for the input points, - i.e. +X left, +Y up. - """ - if self.in_ndc(): - ndc_transform = Transform3d(device=self.device, dtype=torch.float32) - else: - # when cameras are defined in screen/image space, the principal point is - # provided in the (+X right, +Y down), aka image, coordinate system. - # Since input points are defined in the PyTorch3D system (+X left, +Y up), - # we need to adjust for the principal point transform. - pr_point_fix = torch.zeros( - (self._N, 4, 4), device=self.device, dtype=torch.float32 - ) - pr_point_fix[:, 0, 0] = 1.0 - pr_point_fix[:, 1, 1] = 1.0 - pr_point_fix[:, 2, 2] = 1.0 - pr_point_fix[:, 3, 3] = 1.0 - pr_point_fix[:, :2, 3] = -2.0 * self.get_principal_point(**kwargs) - pr_point_fix_transform = Transform3d( - matrix=pr_point_fix.transpose(1, 2).contiguous(), device=self.device - ) - image_size = kwargs.get("image_size", self.get_image_size()) - screen_to_ndc_transform = get_screen_to_ndc_transform( - self, with_xyflip=False, image_size=image_size - ) - ndc_transform = pr_point_fix_transform.compose(screen_to_ndc_transform) - - return ndc_transform - - def is_perspective(self): - return False - - def in_ndc(self): - return self._in_ndc - - -################################################ -# Helper functions for cameras # -################################################ - - -def _get_sfm_calibration_matrix( - N: int, - device: Device, - focal_length, - principal_point, - orthographic: bool = False, -) -> torch.Tensor: - """ - Returns a calibration matrix of a perspective/orthographic camera. - - Args: - N: Number of cameras. - focal_length: Focal length of the camera. - principal_point: xy coordinates of the center of - the principal point of the camera in pixels. - orthographic: Boolean specifying if the camera is orthographic or not - - The calibration matrix `K` is set up as follows: - - .. code-block:: python - - fx = focal_length[:,0] - fy = focal_length[:,1] - px = principal_point[:,0] - py = principal_point[:,1] - - for orthographic==True: - K = [ - [fx, 0, 0, px], - [0, fy, 0, py], - [0, 0, 1, 0], - [0, 0, 0, 1], - ] - else: - K = [ - [fx, 0, px, 0], - [0, fy, py, 0], - [0, 0, 0, 1], - [0, 0, 1, 0], - ] - - Returns: - A calibration matrix `K` of the SfM-conventioned camera - of shape (N, 4, 4). - """ - - if not torch.is_tensor(focal_length): - focal_length = torch.tensor(focal_length, device=device) - - if focal_length.ndim in (0, 1) or focal_length.shape[1] == 1: - fx = fy = focal_length - else: - fx, fy = focal_length.unbind(1) - - if not torch.is_tensor(principal_point): - principal_point = torch.tensor(principal_point, device=device) - - px, py = principal_point.unbind(1) - - K = fx.new_zeros(N, 4, 4) - K[:, 0, 0] = fx - K[:, 1, 1] = fy - if orthographic: - K[:, 0, 3] = px - K[:, 1, 3] = py - K[:, 2, 2] = 1.0 - K[:, 3, 3] = 1.0 - else: - K[:, 0, 2] = px - K[:, 1, 2] = py - K[:, 3, 2] = 1.0 - K[:, 2, 3] = 1.0 - - return K - - -################################################ -# Helper functions for world to view transforms -################################################ - - -def get_world_to_view_transform( - R: torch.Tensor = _R, T: torch.Tensor = _T -) -> Transform3d: - """ - This function returns a Transform3d representing the transformation - matrix to go from world space to view space by applying a rotation and - a translation. - - PyTorch3D uses the same convention as Hartley & Zisserman. - I.e., for camera extrinsic parameters R (rotation) and T (translation), - we map a 3D point `X_world` in world coordinates to - a point `X_cam` in camera coordinates with: - `X_cam = X_world R + T` - - Args: - R: (N, 3, 3) matrix representing the rotation. - T: (N, 3) matrix representing the translation. - - Returns: - a Transform3d object which represents the composed RT transformation. - - """ - # TODO: also support the case where RT is specified as one matrix - # of shape (N, 4, 4). - - if T.shape[0] != R.shape[0]: - msg = "Expected R, T to have the same batch dimension; got %r, %r" - raise ValueError(msg % (R.shape[0], T.shape[0])) - if T.dim() != 2 or T.shape[1:] != (3,): - msg = "Expected T to have shape (N, 3); got %r" - raise ValueError(msg % repr(T.shape)) - if R.dim() != 3 or R.shape[1:] != (3, 3): - msg = "Expected R to have shape (N, 3, 3); got %r" - raise ValueError(msg % repr(R.shape)) - - # Create a Transform3d object - T_ = Translate(T, device=T.device) - R_ = Rotate(R, device=R.device) - return R_.compose(T_) - - -def camera_position_from_spherical_angles( - distance: float, - elevation: float, - azimuth: float, - degrees: bool = True, - device: Device = "cpu", -) -> torch.Tensor: - """ - Calculate the location of the camera based on the distance away from - the target point, the elevation and azimuth angles. - - Args: - distance: distance of the camera from the object. - elevation, azimuth: angles. - The inputs distance, elevation and azimuth can be one of the following - - Python scalar - - Torch scalar - - Torch tensor of shape (N) or (1) - degrees: bool, whether the angles are specified in degrees or radians. - device: str or torch.device, device for new tensors to be placed on. - - The vectors are broadcast against each other so they all have shape (N, 1). - - Returns: - camera_position: (N, 3) xyz location of the camera. - """ - broadcasted_args = convert_to_tensors_and_broadcast( - distance, elevation, azimuth, device=device - ) - dist, elev, azim = broadcasted_args - if degrees: - elev = math.pi / 180.0 * elev - azim = math.pi / 180.0 * azim - x = dist * torch.cos(elev) * torch.sin(azim) - y = dist * torch.sin(elev) - z = dist * torch.cos(elev) * torch.cos(azim) - camera_position = torch.stack([x, y, z], dim=1) - if camera_position.dim() == 0: - camera_position = camera_position.view(1, -1) # add batch dim. - return camera_position.view(-1, 3) - - -def look_at_rotation( - camera_position, at=((0, 0, 0),), up=((0, 1, 0),), device: Device = "cpu" -) -> torch.Tensor: - """ - This function takes a vector 'camera_position' which specifies the location - of the camera in world coordinates and two vectors `at` and `up` which - indicate the position of the object and the up directions of the world - coordinate system respectively. The object is assumed to be centered at - the origin. - - The output is a rotation matrix representing the transformation - from world coordinates -> view coordinates. - - Args: - camera_position: position of the camera in world coordinates - at: position of the object in world coordinates - up: vector specifying the up direction in the world coordinate frame. - - The inputs camera_position, at and up can each be a - - 3 element tuple/list - - torch tensor of shape (1, 3) - - torch tensor of shape (N, 3) - - The vectors are broadcast against each other so they all have shape (N, 3). - - Returns: - R: (N, 3, 3) batched rotation matrices - """ - # Format input and broadcast - broadcasted_args = convert_to_tensors_and_broadcast( - camera_position, at, up, device=device - ) - camera_position, at, up = broadcasted_args - for t, n in zip([camera_position, at, up], ["camera_position", "at", "up"]): - if t.shape[-1] != 3: - msg = "Expected arg %s to have shape (N, 3); got %r" - raise ValueError(msg % (n, t.shape)) - z_axis = F.normalize(at - camera_position, eps=1e-5) - x_axis = F.normalize(torch.cross(up, z_axis, dim=1), eps=1e-5) - y_axis = F.normalize(torch.cross(z_axis, x_axis, dim=1), eps=1e-5) - is_close = torch.isclose(x_axis, torch.tensor(0.0), atol=5e-3).all( - dim=1, keepdim=True - ) - if is_close.any(): - replacement = F.normalize(torch.cross(y_axis, z_axis, dim=1), eps=1e-5) - x_axis = torch.where(is_close, replacement, x_axis) - R = torch.cat((x_axis[:, None, :], y_axis[:, None, :], z_axis[:, None, :]), dim=1) - return R.transpose(1, 2) - - -def look_at_view_transform( - dist: _BatchFloatType = 1.0, - elev: _BatchFloatType = 0.0, - azim: _BatchFloatType = 0.0, - degrees: bool = True, - eye: Optional[Union[Sequence, torch.Tensor]] = None, - at=((0, 0, 0),), # (1, 3) - up=((0, 1, 0),), # (1, 3) - device: Device = "cpu", -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - This function returns a rotation and translation matrix - to apply the 'Look At' transformation from world -> view coordinates [0]. - - Args: - dist: distance of the camera from the object - elev: angle in degrees or radians. This is the angle between the - vector from the object to the camera, and the horizontal plane y = 0 (xz-plane). - azim: angle in degrees or radians. The vector from the object to - the camera is projected onto a horizontal plane y = 0. - azim is the angle between the projected vector and a - reference vector at (0, 0, 1) on the reference plane (the horizontal plane). - dist, elev and azim can be of shape (1), (N). - degrees: boolean flag to indicate if the elevation and azimuth - angles are specified in degrees or radians. - eye: the position of the camera(s) in world coordinates. If eye is not - None, it will override the camera position derived from dist, elev, azim. - up: the direction of the x axis in the world coordinate system. - at: the position of the object(s) in world coordinates. - eye, up and at can be of shape (1, 3) or (N, 3). - - Returns: - 2-element tuple containing - - - **R**: the rotation to apply to the points to align with the camera. - - **T**: the translation to apply to the points to align with the camera. - - References: - [0] https://www.scratchapixel.com - """ - - if eye is not None: - broadcasted_args = convert_to_tensors_and_broadcast(eye, at, up, device=device) - eye, at, up = broadcasted_args - C = eye - else: - broadcasted_args = convert_to_tensors_and_broadcast( - dist, elev, azim, at, up, device=device - ) - dist, elev, azim, at, up = broadcasted_args - C = ( - camera_position_from_spherical_angles( - dist, elev, azim, degrees=degrees, device=device - ) - + at - ) - - R = look_at_rotation(C, at, up, device=device) - T = -torch.bmm(R.transpose(1, 2), C[:, :, None])[:, :, 0] - return R, T - - -def get_ndc_to_screen_transform( - cameras, - with_xyflip: bool = False, - image_size: Optional[Union[List, Tuple, torch.Tensor]] = None, -) -> Transform3d: - """ - PyTorch3D NDC to screen conversion. - Conversion from PyTorch3D's NDC space (+X left, +Y up) to screen/image space - (+X right, +Y down, origin top left). - - Args: - cameras - with_xyflip: flips x- and y-axis if set to True. - Optional kwargs: - image_size: ((height, width),) specifying the height, width - of the image. If not provided, it reads it from cameras. - - We represent the NDC to screen conversion as a Transform3d - with projection matrix - - K = [ - [s, 0, 0, cx], - [0, s, 0, cy], - [0, 0, 1, 0], - [0, 0, 0, 1], - ] - - """ - # We require the image size, which is necessary for the transform - if image_size is None: - msg = "For NDC to screen conversion, image_size=(height, width) needs to be specified." - raise ValueError(msg) - - K = torch.zeros((cameras._N, 4, 4), device=cameras.device, dtype=torch.float32) - if not torch.is_tensor(image_size): - image_size = torch.tensor(image_size, device=cameras.device) - # pyre-fixme[16]: Item `List` of `Union[List[typing.Any], Tensor, Tuple[Any, - # ...]]` has no attribute `view`. - image_size = image_size.view(-1, 2) # of shape (1 or B)x2 - height, width = image_size.unbind(1) - - # For non square images, we scale the points such that smallest side - # has range [-1, 1] and the largest side has range [-u, u], with u > 1. - # This convention is consistent with the PyTorch3D renderer - scale = (image_size.min(dim=1).values - 0.0) / 2.0 - - K[:, 0, 0] = scale - K[:, 1, 1] = scale - K[:, 0, 3] = -1.0 * (width - 0.0) / 2.0 - K[:, 1, 3] = -1.0 * (height - 0.0) / 2.0 - K[:, 2, 2] = 1.0 - K[:, 3, 3] = 1.0 - - # Transpose the projection matrix as PyTorch3D transforms use row vectors. - transform = Transform3d( - matrix=K.transpose(1, 2).contiguous(), device=cameras.device - ) - - if with_xyflip: - # flip x, y axis - xyflip = torch.eye(4, device=cameras.device, dtype=torch.float32) - xyflip[0, 0] = -1.0 - xyflip[1, 1] = -1.0 - xyflip = xyflip.view(1, 4, 4).expand(cameras._N, -1, -1) - xyflip_transform = Transform3d( - matrix=xyflip.transpose(1, 2).contiguous(), device=cameras.device - ) - transform = transform.compose(xyflip_transform) - return transform - - -def get_screen_to_ndc_transform( - cameras, - with_xyflip: bool = False, - image_size: Optional[Union[List, Tuple, torch.Tensor]] = None, -) -> Transform3d: - """ - Screen to PyTorch3D NDC conversion. - Conversion from screen/image space (+X right, +Y down, origin top left) - to PyTorch3D's NDC space (+X left, +Y up). - - Args: - cameras - with_xyflip: flips x- and y-axis if set to True. - Optional kwargs: - image_size: ((height, width),) specifying the height, width - of the image. If not provided, it reads it from cameras. - - We represent the screen to NDC conversion as a Transform3d - with projection matrix - - K = [ - [1/s, 0, 0, cx/s], - [ 0, 1/s, 0, cy/s], - [ 0, 0, 1, 0], - [ 0, 0, 0, 1], - ] - - """ - transform = get_ndc_to_screen_transform( - cameras, - with_xyflip=with_xyflip, - image_size=image_size, - ).inverse() - return transform - - -def try_get_projection_transform( - cameras: CamerasBase, cameras_kwargs: Dict[str, Any] -) -> Optional[Transform3d]: - """ - Try block to get projection transform from cameras and cameras_kwargs. - - Args: - cameras: cameras instance, can be linear cameras or nonliear cameras - cameras_kwargs: camera parameters to be passed to cameras - - Returns: - If the camera implemented projection_transform, return the - projection transform; Otherwise, return None - """ - - transform = None - try: - transform = cameras.get_projection_transform(**cameras_kwargs) - except NotImplementedError: - pass - return transform diff --git a/pytorch3d/pytorch3d/renderer/compositing.py b/pytorch3d/pytorch3d/renderer/compositing.py deleted file mode 100644 index 669108e6eccf3b12afdb093d7a6717f2f656c5f1..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/compositing.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch -from pytorch3d import _C - - -# Example functions for blending the top K features per pixel using the outputs -# from rasterization. -# NOTE: All blending function should return a (N, H, W, C) tensor per batch element. -# This can be an image (C=3) or a set of features. - - -class _CompositeAlphaPoints(torch.autograd.Function): - """ - Composite features within a z-buffer using alpha compositing. Given a z-buffer - with corresponding features and weights, these values are accumulated according - to their weights such that features nearer in depth contribute more to the final - feature than ones further away. - - Concretely this means: - weighted_fs[b,c,i,j] = sum_k cum_alpha_k * features[c,pointsidx[b,k,i,j]] - cum_alpha_k = alphas[b,k,i,j] * prod_l=0..k-1 (1 - alphas[b,l,i,j]) - - Args: - features: Packed Tensor of shape (C, P) giving the features of each point. - alphas: float32 Tensor of shape (N, points_per_pixel, image_size, - image_size) giving the weight of each point in the z-buffer. - Values should be in the interval [0, 1]. - pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size) - giving the indices of the nearest points at each pixel, sorted in z-order. - Concretely pointsidx[n, k, y, x] = p means that features[:, p] is the - feature of the kth closest point (along the z-direction) to pixel (y, x) in - batch element n. This is weighted by alphas[n, k, y, x]. - - Returns: - weighted_fs: Tensor of shape (N, C, image_size, image_size) - giving the accumulated features at each point. - """ - - @staticmethod - def forward(ctx, features, alphas, points_idx): - pt_cld = _C.accum_alphacomposite(features, alphas, points_idx) - - ctx.save_for_backward(features.clone(), alphas.clone(), points_idx.clone()) - return pt_cld - - @staticmethod - def backward(ctx, grad_output): - grad_features = None - grad_alphas = None - grad_points_idx = None - features, alphas, points_idx = ctx.saved_tensors - - grad_features, grad_alphas = _C.accum_alphacomposite_backward( - grad_output, features, alphas, points_idx - ) - - return grad_features, grad_alphas, grad_points_idx, None - - -def alpha_composite(pointsidx, alphas, pt_clds) -> torch.Tensor: - """ - Composite features within a z-buffer using alpha compositing. Given a z-buffer - with corresponding features and weights, these values are accumulated according - to their weights such that features nearer in depth contribute more to the final - feature than ones further away. - - Concretely this means: - weighted_fs[b,c,i,j] = sum_k cum_alpha_k * features[c,pointsidx[b,k,i,j]] - cum_alpha_k = alphas[b,k,i,j] * prod_l=0..k-1 (1 - alphas[b,l,i,j]) - - - Args: - pt_clds: Tensor of shape (N, C, P) giving the features of each point (can use - RGB for example). - alphas: float32 Tensor of shape (N, points_per_pixel, image_size, - image_size) giving the weight of each point in the z-buffer. - Values should be in the interval [0, 1]. - pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size) - giving the indices of the nearest points at each pixel, sorted in z-order. - Concretely pointsidx[n, k, y, x] = p means that features[n, :, p] is the - feature of the kth closest point (along the z-direction) to pixel (y, x) in - batch element n. This is weighted by alphas[n, k, y, x]. - - Returns: - Combined features: Tensor of shape (N, C, image_size, image_size) - giving the accumulated features at each point. - """ - return _CompositeAlphaPoints.apply(pt_clds, alphas, pointsidx) - - -class _CompositeNormWeightedSumPoints(torch.autograd.Function): - """ - Composite features within a z-buffer using normalized weighted sum. Given a z-buffer - with corresponding features and weights, these values are accumulated - according to their weights such that depth is ignored; the weights are used to - perform a weighted sum. - - Concretely this means: - weighted_fs[b,c,i,j] = - sum_k alphas[b,k,i,j] * features[c,pointsidx[b,k,i,j]] / sum_k alphas[b,k,i,j] - - Args: - features: Packed Tensor of shape (C, P) giving the features of each point. - alphas: float32 Tensor of shape (N, points_per_pixel, image_size, - image_size) giving the weight of each point in the z-buffer. - Values should be in the interval [0, 1]. - pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size) - giving the indices of the nearest points at each pixel, sorted in z-order. - Concretely pointsidx[n, k, y, x] = p means that features[:, p] is the - feature of the kth closest point (along the z-direction) to pixel (y, x) in - batch element n. This is weighted by alphas[n, k, y, x]. - - Returns: - weighted_fs: Tensor of shape (N, C, image_size, image_size) - giving the accumulated features at each point. - """ - - @staticmethod - def forward(ctx, features, alphas, points_idx): - pt_cld = _C.accum_weightedsumnorm(features, alphas, points_idx) - - ctx.save_for_backward(features.clone(), alphas.clone(), points_idx.clone()) - return pt_cld - - @staticmethod - def backward(ctx, grad_output): - grad_features = None - grad_alphas = None - grad_points_idx = None - features, alphas, points_idx = ctx.saved_tensors - - grad_features, grad_alphas = _C.accum_weightedsumnorm_backward( - grad_output, features, alphas, points_idx - ) - - return grad_features, grad_alphas, grad_points_idx, None - - -def norm_weighted_sum(pointsidx, alphas, pt_clds) -> torch.Tensor: - """ - Composite features within a z-buffer using normalized weighted sum. Given a z-buffer - with corresponding features and weights, these values are accumulated - according to their weights such that depth is ignored; the weights are used to - perform a weighted sum. - - Concretely this means: - weighted_fs[b,c,i,j] = - sum_k alphas[b,k,i,j] * features[c,pointsidx[b,k,i,j]] / sum_k alphas[b,k,i,j] - - Args: - pt_clds: Packed feature tensor of shape (C, P) giving the features of each point - (can use RGB for example). - alphas: float32 Tensor of shape (N, points_per_pixel, image_size, - image_size) giving the weight of each point in the z-buffer. - Values should be in the interval [0, 1]. - pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size) - giving the indices of the nearest points at each pixel, sorted in z-order. - Concretely pointsidx[n, k, y, x] = p means that features[:, p] is the - feature of the kth closest point (along the z-direction) to pixel (y, x) in - batch element n. This is weighted by alphas[n, k, y, x]. - - Returns: - Combined features: Tensor of shape (N, C, image_size, image_size) - giving the accumulated features at each point. - """ - return _CompositeNormWeightedSumPoints.apply(pt_clds, alphas, pointsidx) - - -class _CompositeWeightedSumPoints(torch.autograd.Function): - """ - Composite features within a z-buffer using normalized weighted sum. Given a z-buffer - with corresponding features and weights, these values are accumulated - according to their weights such that depth is ignored; the weights are used to - perform a weighted sum. As opposed to norm weighted sum, the weights are not - normalized to sum to 1. - - Concretely this means: - weighted_fs[b,c,i,j] = sum_k alphas[b,k,i,j] * features[c,pointsidx[b,k,i,j]] - - Args: - features: Packed Tensor of shape (C, P) giving the features of each point. - alphas: float32 Tensor of shape (N, points_per_pixel, image_size, - image_size) giving the weight of each point in the z-buffer. - Values should be in the interval [0, 1]. - pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size) - giving the indices of the nearest points at each pixel, sorted in z-order. - Concretely pointsidx[n, k, y, x] = p means that features[:, p] is the - feature of the kth closest point (along the z-direction) to pixel (y, x) in - batch element n. This is weighted by alphas[n, k, y, x]. - - Returns: - weighted_fs: Tensor of shape (N, C, image_size, image_size) - giving the accumulated features at each point. - """ - - @staticmethod - def forward(ctx, features, alphas, points_idx): - pt_cld = _C.accum_weightedsum(features, alphas, points_idx) - - ctx.save_for_backward(features.clone(), alphas.clone(), points_idx.clone()) - return pt_cld - - @staticmethod - def backward(ctx, grad_output): - grad_features = None - grad_alphas = None - grad_points_idx = None - features, alphas, points_idx = ctx.saved_tensors - - grad_features, grad_alphas = _C.accum_weightedsum_backward( - grad_output, features, alphas, points_idx - ) - - return grad_features, grad_alphas, grad_points_idx, None - - -def weighted_sum(pointsidx, alphas, pt_clds) -> torch.Tensor: - """ - Composite features within a z-buffer using normalized weighted sum. - - Args: - pt_clds: Packed Tensor of shape (C, P) giving the features of each point - (can use RGB for example). - alphas: float32 Tensor of shape (N, points_per_pixel, image_size, - image_size) giving the weight of each point in the z-buffer. - Values should be in the interval [0, 1]. - pointsidx: int32 Tensor of shape (N, points_per_pixel, image_size, image_size) - giving the indices of the nearest points at each pixel, sorted in z-order. - Concretely pointsidx[n, k, y, x] = p means that features[:, p] is the - feature of the kth closest point (along the z-direction) to pixel (y, x) in - batch element n. This is weighted by alphas[n, k, y, x]. - - Returns: - Combined features: Tensor of shape (N, C, image_size, image_size) - giving the accumulated features at each point. - """ - return _CompositeWeightedSumPoints.apply(pt_clds, alphas, pointsidx) diff --git a/pytorch3d/pytorch3d/renderer/fisheyecameras.py b/pytorch3d/pytorch3d/renderer/fisheyecameras.py deleted file mode 100644 index 3da558df3ae02e3dc6a0e5858d9417bba485a1c7..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/fisheyecameras.py +++ /dev/null @@ -1,584 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -from typing import List, Optional, Tuple, Union - -import torch -from pytorch3d.common.datatypes import Device -from pytorch3d.renderer.cameras import _R, _T, CamerasBase - -_focal_length = torch.tensor(((1.0,),)) -_principal_point = torch.tensor(((0.0, 0.0),)) -_radial_params = torch.tensor(((0.0, 0.0, 0.0, 0.0, 0.0, 0.0),)) -_tangential_params = torch.tensor(((0.0, 0.0),)) -_thin_prism_params = torch.tensor(((0.0, 0.0, 0.0, 0.0),)) - - -class FishEyeCameras(CamerasBase): - """ - A class which extends Pinhole camera by considering radial, tangential and - thin-prism distortion. For the fisheye camera model, k1, k2, ..., k_n_radial are - polynomial coefficents to model radial distortions. Two common types of radial - distortions are barrel and pincusion radial distortions. - - a = x / z, b = y / z, r = (a*a+b*b)^(1/2) - th = atan(r) - [x_r] = (th+ k0 * th^3 + k1* th^5 + ...) [a/r] - [y_r] [b/r] [1] - - - The tangential distortion parameters are p1 and p2. The primary cause is - due to the lens assembly not being centered over and parallel to the image plane. - tangentialDistortion = [(2 x_r^2 + rd^2)*p_0 + 2*x_r*y_r*p_1] - [(2 y_r^2 + rd^2)*p_1 + 2*x_r*y_r*p_0] [2] - where rd^2 = x_r^2 + y_r^2 - - The thin-prism distortion is modeled with s1, s2, s3, s4 coefficients - thinPrismDistortion = [s0 * rd^2 + s1 rd^4] - [s2 * rd^2 + s3 rd^4] [3] - - The projection - proj = diag(f, f) * uvDistorted + [cu; cv] - uvDistorted = [x_r] + tangentialDistortion + thinPrismDistortion [4] - [y_r] - f is the focal length and cu, cv are principal points in x, y axis. - - """ - - _FIELDS = ( - "focal_length", - "principal_point", - "R", - "T", - "radial_params", - "tangential_params", - "thin_prism_params", - "world_coordinates", - "use_radial", - "use_tangential", - "use_tin_prism", - "device", - "image_size", - ) - - def __init__( - self, - focal_length=_focal_length, - principal_point=_principal_point, - radial_params=_radial_params, - tangential_params=_tangential_params, - thin_prism_params=_thin_prism_params, - R: torch.Tensor = _R, - T: torch.Tensor = _T, - world_coordinates: bool = False, - use_radial: bool = True, - use_tangential: bool = True, - use_thin_prism: bool = True, - device: Device = "cpu", - image_size: Optional[Union[List, Tuple, torch.Tensor]] = None, - ) -> None: - - """ - - Args: - focal_ength: Focal length of the camera in world units. - A tensor of shape (N, 1) for square pixels, - where N is number of transforms. - principal_point: xy coordinates of the center of - the principal point of the camera in pixels. - A tensor of shape (N, 2). - radial_params: parameters for radial distortions. - A tensor of shape (N, num_radial). - tangential_params:parameters for tangential distortions. - A tensor of shape (N, 2). - thin_prism_params: parameters for thin-prism distortions. - A tensor of shape (N, 4). - R: Rotation matrix of shape (N, 3, 3) - T: Translation matrix of shape (N, 3) - world_coordinates: if True, project from world coordinates; otherwise from camera - coordinates - use_radial: radial_distortion, default to True - use_tangential: tangential distortion, default to True - use_thin_prism: thin prism distortion, default to True - device: torch.device or string - image_size: (height, width) of image size. - A tensor of shape (N, 2) or a list/tuple. Required for screen cameras. - - """ - - kwargs = {"image_size": image_size} if image_size is not None else {} - super().__init__( - device=device, - R=R, - T=T, - **kwargs, # pyre-ignore - ) - if image_size is not None: - if (self.image_size < 1).any(): # pyre-ignore - raise ValueError("Image_size provided has invalid values") - else: - self.image_size = None - - self.device = device - self.focal = focal_length.to(self.device) - self.principal_point = principal_point.to(self.device) - self.radial_params = radial_params.to(self.device) - self.tangential_params = tangential_params.to(self.device) - self.thin_prism_params = thin_prism_params.to(self.device) - self.R = R - self.T = T - self.world_coordinates = world_coordinates - self.use_radial = use_radial - self.use_tangential = use_tangential - self.use_thin_prism = use_thin_prism - self.epsilon = 1e-10 - self.num_distortion_iters = 50 - - self.R = self.R.to(self.device) - self.T = self.T.to(self.device) - self.num_radial = radial_params.shape[-1] - - def _project_points_batch( - self, - focal, - principal_point, - radial_params, - tangential_params, - thin_prism_params, - points, - ) -> torch.Tensor: - """ - Takes in points in the local reference frame of the camera and projects it - onto the image plan. Since this is a symmetric model, points with negative z are - projected to the positive sphere. i.e project(1,1,-1) == project(-1,-1,1) - - Args: - focal: (1) - principal_point: (2) - radial_params: (num_radial) - tangential_params: (2) - thin_prism_params: (4) - points in the camera coordinate frame: (..., 3). E.g., (P, 3) (1, P, 3) - or (M, P, 3) where P is the number of points - - Returns: - projected_points in the image plane: (..., 3). E.g., (P, 3) or - (1, P, 3) or (M, P, 3) - - """ - assert points.shape[-1] == 3, "points shape incorrect" - ab = points[..., :2] / points[..., 2:] - uv_distorted = ab - - r = ab.norm(dim=-1) - th = r.atan() - theta_sq = th * th - - # compute radial distortions, eq 1 - t = theta_sq - theta_pow = torch.stack([t, t**2, t**3, t**4, t**5, t**6], dim=-1) - th_radial = 1 + torch.sum(theta_pow * radial_params, dim=-1) - - # compute th/r, using the limit for small values - th_divr = th / r - boolean_mask = abs(r) < self.epsilon - th_divr[boolean_mask] = 1.0 - - # the distorted coordinates -- except for focal length and principal point - # start with the radial term - coeff = th_radial * th_divr - xr_yr = coeff[..., None] * ab - xr_yr_squared_norm = torch.pow(xr_yr, 2).sum(dim=-1, keepdim=True) - - if self.use_radial: - uv_distorted = xr_yr - - # compute tangential distortions, eq 2 - if self.use_tangential: - temp = 2 * torch.sum( - xr_yr * tangential_params, - dim=-1, - ) - uv_distorted = uv_distorted + ( - temp[..., None] * xr_yr + xr_yr_squared_norm * tangential_params - ) - - # compute thin-prism distortions, eq 3 - sh = uv_distorted.shape[:-1] - if self.use_thin_prism: - radial_powers = torch.cat( - [xr_yr_squared_norm, xr_yr_squared_norm * xr_yr_squared_norm], dim=-1 - ) - uv_distorted[..., 0] = uv_distorted[..., 0] + torch.sum( - thin_prism_params[..., 0:2] * radial_powers, - dim=-1, - ) - uv_distorted[..., 1] = uv_distorted[..., 1] + torch.sum( - thin_prism_params[..., 2:4] * radial_powers, - dim=-1, - ) - # return value: distorted points on the uv plane, eq 4 - projected_points = focal * uv_distorted + principal_point - return torch.cat( - [projected_points, torch.ones(list(sh) + [1], device=self.device)], dim=-1 - ) - - def check_input(self, points: torch.Tensor, batch_size: int): - """ - Check if the shapes are broadcastable between points and transforms. - Accept points of shape (P, 3) or (1, P, 3) or (M, P, 3). The batch_size - for transforms should be 1 when points take (M, P, 3). The batch_size - can be 1 or N when points take shape (P, 3). - - Args: - points: tensor of shape (P, 3) or (1, P, 3) or (M, P, 3) - batch_size: number of transforms - - Returns: - Boolean value if the input shapes are compatible. - """ - if points.ndim > 3: - return False - if points.ndim == 3: - M, P, K = points.shape - if K != 3: - return False - if M > 1 and batch_size > 1: - return False - return True - - def transform_points( - self, points, eps: Optional[float] = None, **kwargs - ) -> torch.Tensor: - """ - Transform input points from camera space to image space. - Args: - points: tensor of (..., 3). E.g., (P, 3) or (1, P, 3), (M, P, 3) - eps: tiny number to avoid zero divsion - - Returns: - torch.Tensor - when points take shape (P, 3) or (1, P, 3), output is (N, P, 3) - when points take shape (M, P, 3), output is (M, P, 3) - where N is the number of transforms, P number of points - """ - # project from world space to camera space - if self.world_coordinates: - world_to_view_transform = self.get_world_to_view_transform( - R=self.R, T=self.T - ) - points = world_to_view_transform.transform_points( - points.to(self.device), eps=eps - ) - else: - points = points.to(self.device) - - # project from camera space to image space - N = len(self.radial_params) - if not self.check_input(points, N): - msg = "Expected points of (P, 3) with batch_size 1 or N, or shape (M, P, 3) \ - with batch_size 1; got points of shape %r and batch_size %r" - raise ValueError(msg % (points.shape, N)) - - if N == 1: - return self._project_points_batch( - self.focal[0], - self.principal_point[0], - self.radial_params[0], - self.tangential_params[0], - self.thin_prism_params[0], - points, - ) - else: - outputs = [] - for i in range(N): - outputs.append( - self._project_points_batch( - self.focal[i], - self.principal_point[i], - self.radial_params[i], - self.tangential_params[i], - self.thin_prism_params[i], - points, - ) - ) - outputs = torch.stack(outputs, dim=0) - return outputs.squeeze() - - def _unproject_points_batch( - self, - focal, - principal_point, - radial_params, - tangential_params, - thin_prism_params, - xy: torch.Tensor, - ) -> torch.Tensor: - """ - Args: - focal: (1) - principal_point: (2) - radial_params: (num_radial) - tangential_params: (2) - thin_prism_params: (4) - xy: (..., 2) - - Returns: - point3d_est: (..., 3) - """ - sh = list(xy.shape[:-1]) - assert xy.shape[-1] == 2, "xy_depth shape incorrect" - uv_distorted = (xy - principal_point) / focal - - # get xr_yr from uvDistorted - xr_yr = self._compute_xr_yr_from_uv_distorted( - tangential_params, thin_prism_params, uv_distorted - ) - xr_yrNorm = torch.norm(xr_yr, dim=-1) - - # find theta - theta = self._get_theta_from_norm_xr_yr(radial_params, xr_yrNorm) - # get the point coordinates: - point3d_est = theta.new_ones(*sh, 3) - point3d_est[..., :2] = theta.tan()[..., None] / xr_yrNorm[..., None] * xr_yr - return point3d_est - - def unproject_points( - self, - xy_depth: torch.Tensor, - world_coordinates: bool = True, - scaled_depth_input: bool = False, - **kwargs, - ) -> torch.Tensor: - """ - Takes in 3-point ``uv_depth`` in the image plane of the camera and unprojects it - into the reference frame of the camera. - This function is the inverse of ``transform_points``. In particular it holds that - - X = unproject(project(X)) - and - x = project(unproject(s*x)) - - Args: - xy_depth: points in the image plane of shape (..., 3). E.g., - (P, 3) or (1, P, 3) or (M, P, 3) - world_coordinates: if the output is in world_coordinate, if False, convert to - camera coordinate - scaled_depth_input: False - - Returns: - unprojected_points in the camera frame with z = 1 - when points take shape (P, 3) or (1, P, 3), output is (N, P, 3) - when points take shape (M, P, 3), output is (M, P, 3) - where N is the number of transforms, P number of point - """ - xy_depth = xy_depth.to(self.device) - N = len(self.radial_params) - if N == 1: - return self._unproject_points_batch( - self.focal[0], - self.principal_point[0], - self.radial_params[0], - self.tangential_params[0], - self.thin_prism_params[0], - xy_depth[..., 0:2], - ) - else: - outputs = [] - for i in range(N): - outputs.append( - self._unproject_points_batch( - self.focal[i], - self.principal_point[i], - self.radial_params[i], - self.tangential_params[i], - self.thin_prism_params[i], - xy_depth[..., 0:2], - ) - ) - outputs = torch.stack(outputs, dim=0) - return outputs.squeeze() - - def _compute_xr_yr_from_uv_distorted( - self, tangential_params, thin_prism_params, uv_distorted: torch.Tensor - ) -> torch.Tensor: - """ - Helper function to compute the vector [x_r; y_r] from uvDistorted - - Args: - tangential_params: (2) - thin_prism_params: (4) - uv_distorted: (..., 2), E.g., (P, 2), (1, P, 2), (M, P, 2) - - Returns: - xr_yr: (..., 2) - """ - # early exit if we're not using any tangential/ thin prism distortions - if not self.use_tangential and not self.use_thin_prism: - return uv_distorted - - xr_yr = uv_distorted - # do Newton iterations to find xr_yr - for _ in range(self.num_distortion_iters): - # compute the estimated uvDistorted - uv_distorted_est = xr_yr.clone() - xr_yr_squared_norm = torch.pow(xr_yr, 2).sum(dim=-1, keepdim=True) - - if self.use_tangential: - temp = 2.0 * torch.sum( - xr_yr * tangential_params[..., 0:2], - dim=-1, - keepdim=True, - ) - uv_distorted_est = uv_distorted_est + ( - temp * xr_yr + xr_yr_squared_norm * tangential_params[..., 0:2] - ) - - if self.use_thin_prism: - radial_powers = torch.cat( - [xr_yr_squared_norm, xr_yr_squared_norm * xr_yr_squared_norm], - dim=-1, - ) - uv_distorted_est[..., 0] = uv_distorted_est[..., 0] + torch.sum( - thin_prism_params[..., 0:2] * radial_powers, - dim=-1, - ) - uv_distorted_est[..., 1] = uv_distorted_est[..., 1] + torch.sum( - thin_prism_params[..., 2:4] * radial_powers, - dim=-1, - ) - - # compute the derivative of uvDistorted wrt xr_yr - duv_distorted_dxryr = self._compute_duv_distorted_dxryr( - tangential_params, thin_prism_params, xr_yr, xr_yr_squared_norm[..., 0] - ) - # compute correction: - # note: the matrix duvDistorted_dxryr will be close to identity (for reasonable - # values of tangential/thin prism distortions) - correction = torch.linalg.solve( - duv_distorted_dxryr, (uv_distorted - uv_distorted_est)[..., None] - ) - xr_yr = xr_yr + correction[..., 0] - return xr_yr - - def _get_theta_from_norm_xr_yr( - self, radial_params, th_radial_desired - ) -> torch.Tensor: - """ - Helper function to compute the angle theta from the norm of the vector [x_r; y_r] - - Args: - radial_params: k1, k2, ..., k_num_radial, (num_radial) - th_radial_desired: desired angle of shape (...), E.g., (P), (1, P), (M, P) - - Returns: - th: angle theta (in radians) of shape (...), E.g., (P), (1, P), (M, P) - """ - sh = list(th_radial_desired.shape) - th = th_radial_desired - c = torch.tensor( - [2.0 * i + 3 for i in range(self.num_radial)], device=self.device - ) - for _ in range(self.num_distortion_iters): - theta_sq = th * th - th_radial = 1.0 - dthD_dth = 1.0 - - # compute the theta polynomial and its derivative wrt theta - t = theta_sq - theta_pow = torch.stack([t, t**2, t**3, t**4, t**5, t**6], dim=-1) - th_radial = th_radial + torch.sum(theta_pow * radial_params, dim=-1) - - dthD_dth = dthD_dth + torch.sum(c * radial_params * theta_pow, dim=-1) - th_radial = th_radial * th - - # compute the correction - step = torch.zeros(*sh, device=self.device) - # make sure don't divide by zero - nonzero_mask = dthD_dth.abs() > self.epsilon - step = step + nonzero_mask * (th_radial_desired - th_radial) / dthD_dth - # if derivative is close to zero, apply small correction in the appropriate - # direction to avoid numerical explosions - close_to_zero_mask = dthD_dth.abs() <= self.epsilon - dir_mask = (th_radial_desired - th_radial) * dthD_dth > 0.0 - boolean_mask = close_to_zero_mask & dir_mask - step = step + 10.0 * self.epsilon * boolean_mask - step = step - 10 * self.epsilon * (~nonzero_mask & ~boolean_mask) - - # apply correction - th = th + step - # revert to within 180 degrees FOV to avoid numerical overflow - idw = th.abs() >= math.pi / 2.0 - th[idw] = 0.999 * math.pi / 2.0 - return th - - def _compute_duv_distorted_dxryr( - self, tangential_params, thin_prism_params, xr_yr, xr_yr_squareNorm - ) -> torch.Tensor: - """ - Helper function, computes the Jacobian of uvDistorted wrt the vector [x_r;y_r] - - Args: - tangential_params: (2) - thin_prism_params: (4) - xr_yr: (P, 2) - xr_yr_squareNorm: (...), E.g., (P), (1, P), (M, P) - - Returns: - duv_distorted_dxryr: (..., 2, 2) Jacobian - """ - sh = list(xr_yr.shape[:-1]) - duv_distorted_dxryr = torch.empty((*sh, 2, 2), device=self.device) - if self.use_tangential: - duv_distorted_dxryr[..., 0, 0] = ( - 1.0 - + 6.0 * xr_yr[..., 0] * tangential_params[..., 0] - + 2.0 * xr_yr[..., 1] * tangential_params[..., 1] - ) - offdiag = 2.0 * ( - xr_yr[..., 0] * tangential_params[..., 1] - + xr_yr[..., 1] * tangential_params[..., 0] - ) - duv_distorted_dxryr[..., 0, 1] = offdiag - duv_distorted_dxryr[..., 1, 0] = offdiag - duv_distorted_dxryr[..., 1, 1] = ( - 1.0 - + 6.0 * xr_yr[..., 1] * tangential_params[..., 1] - + 2.0 * xr_yr[..., 0] * tangential_params[..., 0] - ) - else: - duv_distorted_dxryr = torch.eye(2).repeat(*sh, 1, 1) - - if self.use_thin_prism: - temp1 = 2.0 * ( - thin_prism_params[..., 0] - + 2.0 * thin_prism_params[..., 1] * xr_yr_squareNorm[...] - ) - duv_distorted_dxryr[..., 0, 0] = ( - duv_distorted_dxryr[..., 0, 0] + xr_yr[..., 0] * temp1 - ) - duv_distorted_dxryr[..., 0, 1] = ( - duv_distorted_dxryr[..., 0, 1] + xr_yr[..., 1] * temp1 - ) - - temp2 = 2.0 * ( - thin_prism_params[..., 2] - + 2.0 * thin_prism_params[..., 3] * xr_yr_squareNorm[...] - ) - duv_distorted_dxryr[..., 1, 0] = ( - duv_distorted_dxryr[..., 1, 0] + xr_yr[..., 0] * temp2 - ) - duv_distorted_dxryr[..., 1, 1] = ( - duv_distorted_dxryr[..., 1, 1] + xr_yr[..., 1] * temp2 - ) - return duv_distorted_dxryr - - def in_ndc(self): - return True - - def is_perspective(self): - return False diff --git a/pytorch3d/pytorch3d/renderer/implicit/__init__.py b/pytorch3d/pytorch3d/renderer/implicit/__init__.py deleted file mode 100644 index 39090112a4b7753e73a4e3306338ee7962a61406..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/implicit/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .harmonic_embedding import HarmonicEmbedding -from .raymarching import AbsorptionOnlyRaymarcher, EmissionAbsorptionRaymarcher -from .raysampling import ( - GridRaysampler, - MonteCarloRaysampler, - MultinomialRaysampler, - NDCGridRaysampler, - NDCMultinomialRaysampler, -) -from .renderer import ImplicitRenderer, VolumeRenderer, VolumeSampler -from .utils import ( - HeterogeneousRayBundle, - ray_bundle_to_ray_points, - ray_bundle_variables_to_ray_points, - RayBundle, -) - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/renderer/implicit/harmonic_embedding.py b/pytorch3d/pytorch3d/renderer/implicit/harmonic_embedding.py deleted file mode 100644 index 90e857f8aa7cc0286ae603f2d95ba96f72bfb22a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/implicit/harmonic_embedding.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Optional - -import torch - - -class HarmonicEmbedding(torch.nn.Module): - def __init__( - self, - n_harmonic_functions: int = 6, - omega_0: float = 1.0, - logspace: bool = True, - append_input: bool = True, - ) -> None: - """ - The harmonic embedding layer supports the classical - Nerf positional encoding described in - `NeRF `_ - and the integrated position encoding in - `MIP-NeRF `_. - - During the inference you can provide the extra argument `diag_cov`. - - If `diag_cov is None`, it converts - rays parametrized with a `ray_bundle` to 3D points by - extending each ray according to the corresponding length. - Then it converts each feature - (i.e. vector along the last dimension) in `x` - into a series of harmonic features `embedding`, - where for each i in range(dim) the following are present - in embedding[...]:: - - [ - sin(f_1*x[..., i]), - sin(f_2*x[..., i]), - ... - sin(f_N * x[..., i]), - cos(f_1*x[..., i]), - cos(f_2*x[..., i]), - ... - cos(f_N * x[..., i]), - x[..., i], # only present if append_input is True. - ] - - where N corresponds to `n_harmonic_functions-1`, and f_i is a scalar - denoting the i-th frequency of the harmonic embedding. - - - If `diag_cov is not None`, it approximates - conical frustums following a ray bundle as gaussians, - defined by x, the means of the gaussians and diag_cov, - the diagonal covariances. - Then it converts each gaussian - into a series of harmonic features `embedding`, - where for each i in range(dim) the following are present - in embedding[...]:: - - [ - sin(f_1*x[..., i]) * exp(0.5 * f_1**2 * diag_cov[..., i,]), - sin(f_2*x[..., i]) * exp(0.5 * f_2**2 * diag_cov[..., i,]), - ... - sin(f_N * x[..., i]) * exp(0.5 * f_N**2 * diag_cov[..., i,]), - cos(f_1*x[..., i]) * exp(0.5 * f_1**2 * diag_cov[..., i,]), - cos(f_2*x[..., i]) * exp(0.5 * f_2**2 * diag_cov[..., i,]),, - ... - cos(f_N * x[..., i]) * exp(0.5 * f_N**2 * diag_cov[..., i,]), - x[..., i], # only present if append_input is True. - ] - - where N equals `n_harmonic_functions-1`, and f_i is a scalar - denoting the i-th frequency of the harmonic embedding. - - If `logspace==True`, the frequencies `[f_1, ..., f_N]` are - powers of 2: - `f_1, ..., f_N = 2**torch.arange(n_harmonic_functions)` - - If `logspace==False`, frequencies are linearly spaced between - `1.0` and `2**(n_harmonic_functions-1)`: - `f_1, ..., f_N = torch.linspace( - 1.0, 2**(n_harmonic_functions-1), n_harmonic_functions - )` - - Note that `x` is also premultiplied by the base frequency `omega_0` - before evaluating the harmonic functions. - - Args: - n_harmonic_functions: int, number of harmonic - features - omega_0: float, base frequency - logspace: bool, Whether to space the frequencies in - logspace or linear space - append_input: bool, whether to concat the original - input to the harmonic embedding. If true the - output is of the form (embed.sin(), embed.cos(), x) - """ - super().__init__() - - if logspace: - frequencies = 2.0 ** torch.arange( - n_harmonic_functions, - dtype=torch.float32, - ) - else: - frequencies = torch.linspace( - 1.0, - 2.0 ** (n_harmonic_functions - 1), - n_harmonic_functions, - dtype=torch.float32, - ) - - self.register_buffer("_frequencies", frequencies * omega_0, persistent=False) - self.register_buffer( - "_zero_half_pi", torch.tensor([0.0, 0.5 * torch.pi]), persistent=False - ) - self.append_input = append_input - - def forward( - self, x: torch.Tensor, diag_cov: Optional[torch.Tensor] = None, **kwargs - ) -> torch.Tensor: - """ - Args: - x: tensor of shape [..., dim] - diag_cov: An optional tensor of shape `(..., dim)` - representing the diagonal covariance matrices of our Gaussians, joined with x - as means of the Gaussians. - - Returns: - embedding: a harmonic embedding of `x` of shape - [..., (n_harmonic_functions * 2 + int(append_input)) * num_points_per_ray] - """ - # [..., dim, n_harmonic_functions] - embed = x[..., None] * self._frequencies - # [..., 1, dim, n_harmonic_functions] + [2, 1, 1] => [..., 2, dim, n_harmonic_functions] - embed = embed[..., None, :, :] + self._zero_half_pi[..., None, None] - # Use the trig identity cos(x) = sin(x + pi/2) - # and do one vectorized call to sin([x, x+pi/2]) instead of (sin(x), cos(x)). - embed = embed.sin() - if diag_cov is not None: - x_var = diag_cov[..., None] * torch.pow(self._frequencies, 2) - exp_var = torch.exp(-0.5 * x_var) - # [..., 2, dim, n_harmonic_functions] - embed = embed * exp_var[..., None, :, :] - - embed = embed.reshape(*x.shape[:-1], -1) - - if self.append_input: - return torch.cat([embed, x], dim=-1) - return embed - - @staticmethod - def get_output_dim_static( - input_dims: int, - n_harmonic_functions: int, - append_input: bool, - ) -> int: - """ - Utility to help predict the shape of the output of `forward`. - - Args: - input_dims: length of the last dimension of the input tensor - n_harmonic_functions: number of embedding frequencies - append_input: whether or not to concat the original - input to the harmonic embedding - Returns: - int: the length of the last dimension of the output tensor - """ - return input_dims * (2 * n_harmonic_functions + int(append_input)) - - def get_output_dim(self, input_dims: int = 3) -> int: - """ - Same as above. The default for input_dims is 3 for 3D applications - which use harmonic embedding for positional encoding, - so the input might be xyz. - """ - return self.get_output_dim_static( - input_dims, len(self._frequencies), self.append_input - ) diff --git a/pytorch3d/pytorch3d/renderer/implicit/raymarching.py b/pytorch3d/pytorch3d/renderer/implicit/raymarching.py deleted file mode 100644 index 047229b24af839d0d4183baca82ff56b197569e0..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/implicit/raymarching.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import warnings -from typing import Optional, Tuple, Union - -import torch - - -class EmissionAbsorptionRaymarcher(torch.nn.Module): - """ - Raymarch using the Emission-Absorption (EA) algorithm. - - The algorithm independently renders each ray by analyzing density and - feature values sampled at (typically uniformly) spaced 3D locations along - each ray. The density values `rays_densities` are of shape - `(..., n_points_per_ray)`, their values should range between [0, 1], and - represent the opaqueness of each point (the higher the less transparent). - The feature values `rays_features` of shape - `(..., n_points_per_ray, feature_dim)` represent the content of the - point that is supposed to be rendered in case the given point is opaque - (i.e. its density -> 1.0). - - EA first utilizes `rays_densities` to compute the absorption function - along each ray as follows:: - - absorption = cumprod(1 - rays_densities, dim=-1) - - The value of absorption at position `absorption[..., k]` specifies - how much light has reached `k`-th point along a ray since starting - its trajectory at `k=0`-th point. - - Each ray is then rendered into a tensor `features` of shape `(..., feature_dim)` - by taking a weighed combination of per-ray features `rays_features` as follows:: - - weights = absorption * rays_densities - features = (rays_features * weights).sum(dim=-2) - - Where `weights` denote a function that has a strong peak around the location - of the first surface point that a given ray passes through. - - Note that for a perfectly bounded volume (with a strictly binary density), - the `weights = cumprod(1 - rays_densities, dim=-1) * rays_densities` - function would yield 0 everywhere. In order to prevent this, - the result of the cumulative product is shifted `self.surface_thickness` - elements along the ray direction. - """ - - def __init__(self, surface_thickness: int = 1) -> None: - """ - Args: - surface_thickness: Denotes the overlap between the absorption - function and the density function. - """ - super().__init__() - self.surface_thickness = surface_thickness - - def forward( - self, - rays_densities: torch.Tensor, - rays_features: torch.Tensor, - eps: float = 1e-10, - **kwargs, - ) -> torch.Tensor: - """ - Args: - rays_densities: Per-ray density values represented with a tensor - of shape `(..., n_points_per_ray, 1)` whose values range in [0, 1]. - rays_features: Per-ray feature values represented with a tensor - of shape `(..., n_points_per_ray, feature_dim)`. - eps: A lower bound added to `rays_densities` before computing - the absorption function (cumprod of `1-rays_densities` along - each ray). This prevents the cumprod to yield exact 0 - which would inhibit any gradient-based learning. - - Returns: - features_opacities: A tensor of shape `(..., feature_dim+1)` - that concatenates two tensors along the last dimension: - 1) features: A tensor of per-ray renders - of shape `(..., feature_dim)`. - 2) opacities: A tensor of per-ray opacity values - of shape `(..., 1)`. Its values range between [0, 1] and - denote the total amount of light that has been absorbed - for each ray. E.g. a value of 0 corresponds to the ray - completely passing through a volume. Please refer to the - `AbsorptionOnlyRaymarcher` documentation for the - explanation of the algorithm that computes `opacities`. - """ - _check_raymarcher_inputs( - rays_densities, - rays_features, - None, - z_can_be_none=True, - features_can_be_none=False, - density_1d=True, - ) - _check_density_bounds(rays_densities) - rays_densities = rays_densities[..., 0] - absorption = _shifted_cumprod( - (1.0 + eps) - rays_densities, shift=self.surface_thickness - ) - weights = rays_densities * absorption - features = (weights[..., None] * rays_features).sum(dim=-2) - opacities = 1.0 - torch.prod(1.0 - rays_densities, dim=-1, keepdim=True) - - return torch.cat((features, opacities), dim=-1) - - -class AbsorptionOnlyRaymarcher(torch.nn.Module): - """ - Raymarch using the Absorption-Only (AO) algorithm. - - The algorithm independently renders each ray by analyzing density and - feature values sampled at (typically uniformly) spaced 3D locations along - each ray. The density values `rays_densities` are of shape - `(..., n_points_per_ray, 1)`, their values should range between [0, 1], and - represent the opaqueness of each point (the higher the less transparent). - The algorithm only measures the total amount of light absorbed along each ray - and, besides outputting per-ray `opacity` values of shape `(...,)`, - does not produce any feature renderings. - - The algorithm simply computes `total_transmission = prod(1 - rays_densities)` - of shape `(..., 1)` which, for each ray, measures the total amount of light - that passed through the volume. - It then returns `opacities = 1 - total_transmission`. - """ - - def __init__(self) -> None: - super().__init__() - - def forward( - self, rays_densities: torch.Tensor, **kwargs - ) -> Union[None, torch.Tensor]: - """ - Args: - rays_densities: Per-ray density values represented with a tensor - of shape `(..., n_points_per_ray)` whose values range in [0, 1]. - - Returns: - opacities: A tensor of per-ray opacity values of shape `(..., 1)`. - Its values range between [0, 1] and denote the total amount - of light that has been absorbed for each ray. E.g. a value - of 0 corresponds to the ray completely passing through a volume. - """ - - _check_raymarcher_inputs( - rays_densities, - None, - None, - features_can_be_none=True, - z_can_be_none=True, - density_1d=True, - ) - rays_densities = rays_densities[..., 0] - _check_density_bounds(rays_densities) - total_transmission = torch.prod(1 - rays_densities, dim=-1, keepdim=True) - opacities = 1.0 - total_transmission - return opacities - - -def _shifted_cumprod(x, shift: int = 1): - """ - Computes `torch.cumprod(x, dim=-1)` and prepends `shift` number of - ones and removes `shift` trailing elements to/from the last dimension - of the result. - """ - x_cumprod = torch.cumprod(x, dim=-1) - x_cumprod_shift = torch.cat( - [torch.ones_like(x_cumprod[..., :shift]), x_cumprod[..., :-shift]], dim=-1 - ) - return x_cumprod_shift - - -def _check_density_bounds( - rays_densities: torch.Tensor, bounds: Tuple[float, float] = (0.0, 1.0) -) -> None: - """ - Checks whether the elements of `rays_densities` range within `bounds`. - If not issues a warning. - """ - with torch.no_grad(): - if (rays_densities.max() > bounds[1]) or (rays_densities.min() < bounds[0]): - warnings.warn( - "One or more elements of rays_densities are outside of valid" - + f"range {str(bounds)}" - ) - - -def _check_raymarcher_inputs( - rays_densities: torch.Tensor, - rays_features: Optional[torch.Tensor], - rays_z: Optional[torch.Tensor], - features_can_be_none: bool = False, - z_can_be_none: bool = False, - density_1d: bool = True, -) -> None: - """ - Checks the validity of the inputs to raymarching algorithms. - """ - if not torch.is_tensor(rays_densities): - raise ValueError("rays_densities has to be an instance of torch.Tensor.") - - if not z_can_be_none and not torch.is_tensor(rays_z): - raise ValueError("rays_z has to be an instance of torch.Tensor.") - - if not features_can_be_none and not torch.is_tensor(rays_features): - raise ValueError("rays_features has to be an instance of torch.Tensor.") - - if rays_densities.ndim < 1: - raise ValueError("rays_densities have to have at least one dimension.") - - if density_1d and rays_densities.shape[-1] != 1: - raise ValueError( - "The size of the last dimension of rays_densities has to be one." - + f" Got shape {rays_densities.shape}." - ) - - rays_shape = rays_densities.shape[:-1] - - # pyre-fixme[16]: `Optional` has no attribute `shape`. - if not z_can_be_none and rays_z.shape != rays_shape: - raise ValueError("rays_z have to be of the same shape as rays_densities.") - - if not features_can_be_none and rays_features.shape[:-1] != rays_shape: - raise ValueError( - "The first to previous to last dimensions of rays_features" - " have to be the same as all dimensions of rays_densities." - ) diff --git a/pytorch3d/pytorch3d/renderer/implicit/raysampling.py b/pytorch3d/pytorch3d/renderer/implicit/raysampling.py deleted file mode 100644 index c81178afe8fd869049d9b4306b0b3d915a5a01ad..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/implicit/raysampling.py +++ /dev/null @@ -1,794 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import warnings -from typing import Optional, Tuple, Union - -import torch -from pytorch3d.common.compat import meshgrid_ij -from pytorch3d.ops import padded_to_packed -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.renderer.implicit.utils import HeterogeneousRayBundle, RayBundle -from torch.nn import functional as F - - -""" -This file defines three raysampling techniques: - - MultinomialRaysampler which can be used to sample rays from pixels of an image grid - - NDCMultinomialRaysampler which can be used to sample rays from pixels of an image grid, - which follows the pytorch3d convention for image grid coordinates - - MonteCarloRaysampler which randomly selects real-valued locations in the image plane - and emits rays from them -""" - - -class MultinomialRaysampler(torch.nn.Module): - """ - Samples a fixed number of points along rays which are regularly distributed - in a batch of rectangular image grids. Points along each ray - have uniformly-spaced z-coordinates between a predefined - minimum and maximum depth. - - The raysampler first generates a 3D coordinate grid of the following form:: - - / min_x, min_y, max_depth -------------- / max_x, min_y, max_depth - / /| - / / | ^ - / min_depth min_depth / | | - min_x ----------------------------- max_x | | image - min_y min_y | | height - | | | | - | | | v - | | | - | | / max_x, max_y, ^ - | | / max_depth / - min_x max_y / / n_pts_per_ray - max_y ----------------------------- max_x/ min_depth v - < --- image_width --- > - - In order to generate ray points, `MultinomialRaysampler` takes each 3D point of - the grid (with coordinates `[x, y, depth]`) and unprojects it - with `cameras.unproject_points([x, y, depth])`, where `cameras` are an - additional input to the `forward` function. - - Note that this is a generic implementation that can support any image grid - coordinate convention. For a raysampler which follows the PyTorch3D - coordinate conventions please refer to `NDCMultinomialRaysampler`. - As such, `NDCMultinomialRaysampler` is a special case of `MultinomialRaysampler`. - - Attributes: - min_x: The leftmost x-coordinate of each ray's source pixel's center. - max_x: The rightmost x-coordinate of each ray's source pixel's center. - min_y: The topmost y-coordinate of each ray's source pixel's center. - max_y: The bottommost y-coordinate of each ray's source pixel's center. - """ - - def __init__( - self, - *, - min_x: float, - max_x: float, - min_y: float, - max_y: float, - image_width: int, - image_height: int, - n_pts_per_ray: int, - min_depth: float, - max_depth: float, - n_rays_per_image: Optional[int] = None, - n_rays_total: Optional[int] = None, - unit_directions: bool = False, - stratified_sampling: bool = False, - ) -> None: - """ - Args: - min_x: The leftmost x-coordinate of each ray's source pixel's center. - max_x: The rightmost x-coordinate of each ray's source pixel's center. - min_y: The topmost y-coordinate of each ray's source pixel's center. - max_y: The bottommost y-coordinate of each ray's source pixel's center. - image_width: The horizontal size of the image grid. - image_height: The vertical size of the image grid. - n_pts_per_ray: The number of points sampled along each ray. - min_depth: The minimum depth of a ray-point. - max_depth: The maximum depth of a ray-point. - n_rays_per_image: If given, this amount of rays are sampled from the grid. - `n_rays_per_image` and `n_rays_total` cannot both be defined. - n_rays_total: How many rays in total to sample from the cameras provided. The result - is as if `n_rays_total_training` cameras were sampled with replacement from the - cameras provided and for every camera one ray was sampled. If set returns the - HeterogeneousRayBundle with batch_size=n_rays_total. - `n_rays_per_image` and `n_rays_total` cannot both be defined. - unit_directions: whether to normalize direction vectors in ray bundle. - stratified_sampling: if True, performs stratified random sampling - along the ray; otherwise takes ray points at deterministic offsets. - """ - super().__init__() - self._n_pts_per_ray = n_pts_per_ray - self._min_depth = min_depth - self._max_depth = max_depth - self._n_rays_per_image = n_rays_per_image - self._n_rays_total = n_rays_total - self._unit_directions = unit_directions - self._stratified_sampling = stratified_sampling - self.min_x, self.max_x = min_x, max_x - self.min_y, self.max_y = min_y, max_y - # get the initial grid of image xy coords - y, x = meshgrid_ij( - torch.linspace(min_y, max_y, image_height, dtype=torch.float32), - torch.linspace(min_x, max_x, image_width, dtype=torch.float32), - ) - _xy_grid = torch.stack([x, y], dim=-1) - - self.register_buffer("_xy_grid", _xy_grid, persistent=False) - - def forward( - self, - cameras: CamerasBase, - *, - mask: Optional[torch.Tensor] = None, - min_depth: Optional[float] = None, - max_depth: Optional[float] = None, - n_rays_per_image: Optional[int] = None, - n_pts_per_ray: Optional[int] = None, - stratified_sampling: Optional[bool] = None, - n_rays_total: Optional[int] = None, - **kwargs, - ) -> Union[RayBundle, HeterogeneousRayBundle]: - """ - Args: - cameras: A batch of `batch_size` cameras from which the rays are emitted. - mask: if given, the rays are sampled from the mask. Should be of size - (batch_size, image_height, image_width). - min_depth: The minimum depth of a ray-point. - max_depth: The maximum depth of a ray-point. - n_rays_per_image: If given, this amount of rays are sampled from the grid. - `n_rays_per_image` and `n_rays_total` cannot both be defined. - n_pts_per_ray: The number of points sampled along each ray. - stratified_sampling: if set, overrides stratified_sampling provided - in __init__. - n_rays_total: How many rays in total to sample from the cameras provided. The result - is as if `n_rays_total_training` cameras were sampled with replacement from the - cameras provided and for every camera one ray was sampled. If set returns the - HeterogeneousRayBundle with batch_size=n_rays_total. - `n_rays_per_image` and `n_rays_total` cannot both be defined. - Returns: - A named tuple RayBundle or dataclass HeterogeneousRayBundle with the - following fields: - - origins: A tensor of shape - `(batch_size, s1, s2, 3)` - denoting the locations of ray origins in the world coordinates. - directions: A tensor of shape - `(batch_size, s1, s2, 3)` - denoting the directions of each ray in the world coordinates. - lengths: A tensor of shape - `(batch_size, s1, s2, n_pts_per_ray)` - containing the z-coordinate (=depth) of each ray in world units. - xys: A tensor of shape - `(batch_size, s1, s2, 2)` - containing the 2D image coordinates of each ray or, - if mask is given, `(batch_size, n, 1, 2)` - Here `s1, s2` refer to spatial dimensions. - `(s1, s2)` refer to (highest priority first): - - `(1, 1)` if `n_rays_total` is provided, (batch_size=n_rays_total) - - `(n_rays_per_image, 1) if `n_rays_per_image` if provided, - - `(n, 1)` where n is the minimum cardinality of the mask - in the batch if `mask` is provided - - `(image_height, image_width)` if nothing from above is satisfied - - `HeterogeneousRayBundle` has additional members: - - camera_ids: tensor of shape (M,), where `M` is the number of unique sampled - cameras. It represents unique ids of sampled cameras. - - camera_counts: tensor of shape (M,), where `M` is the number of unique sampled - cameras. Represents how many times each camera from `camera_ids` was sampled - - `HeterogeneousRayBundle` is returned if `n_rays_total` is provided else `RayBundle` - is returned. - """ - n_rays_total = n_rays_total or self._n_rays_total - n_rays_per_image = n_rays_per_image or self._n_rays_per_image - if (n_rays_total is not None) and (n_rays_per_image is not None): - raise ValueError( - "`n_rays_total` and `n_rays_per_image` cannot both be defined." - ) - if n_rays_total: - ( - cameras, - mask, - camera_ids, # unique ids of sampled cameras - camera_counts, # number of times unique camera id was sampled - # `n_rays_per_image` is equal to the max number of times a simgle camera - # was sampled. We sample all cameras at `camera_ids` `n_rays_per_image` times - # and then discard the unneeded rays. - # pyre-ignore[9] - n_rays_per_image, - ) = _sample_cameras_and_masks(n_rays_total, cameras, mask) - else: - # pyre-ignore[9] - camera_ids: torch.LongTensor = torch.arange(len(cameras), dtype=torch.long) - - batch_size = cameras.R.shape[0] - device = cameras.device - - # expand the (H, W, 2) grid batch_size-times to (B, H, W, 2) - xy_grid = self._xy_grid.to(device).expand(batch_size, -1, -1, -1) - - if mask is not None and n_rays_per_image is None: - # if num rays not given, sample according to the smallest mask - n_rays_per_image = ( - n_rays_per_image or mask.sum(dim=(1, 2)).min().int().item() - ) - - if n_rays_per_image is not None: - if mask is not None: - assert mask.shape == xy_grid.shape[:3] - weights = mask.reshape(batch_size, -1) - else: - # it is probably more efficient to use torch.randperm - # for uniform weights but it is unlikely given that randperm - # is not batched and does not support partial permutation - _, width, height, _ = xy_grid.shape - weights = xy_grid.new_ones(batch_size, width * height) - # pyre-fixme[6]: For 2nd param expected `int` but got `Union[bool, - # float, int]`. - rays_idx = _safe_multinomial(weights, n_rays_per_image)[..., None].expand( - -1, -1, 2 - ) - - xy_grid = torch.gather(xy_grid.reshape(batch_size, -1, 2), 1, rays_idx)[ - :, :, None - ] - - min_depth = min_depth if min_depth is not None else self._min_depth - max_depth = max_depth if max_depth is not None else self._max_depth - n_pts_per_ray = ( - n_pts_per_ray if n_pts_per_ray is not None else self._n_pts_per_ray - ) - stratified_sampling = ( - stratified_sampling - if stratified_sampling is not None - else self._stratified_sampling - ) - - ray_bundle = _xy_to_ray_bundle( - cameras, - xy_grid, - min_depth, - max_depth, - n_pts_per_ray, - self._unit_directions, - stratified_sampling, - ) - - return ( - # pyre-ignore[61] - _pack_ray_bundle(ray_bundle, camera_ids, camera_counts) - if n_rays_total - else ray_bundle - ) - - -class NDCMultinomialRaysampler(MultinomialRaysampler): - """ - Samples a fixed number of points along rays which are regularly distributed - in a batch of rectangular image grids. Points along each ray - have uniformly-spaced z-coordinates between a predefined minimum and maximum depth. - - `NDCMultinomialRaysampler` follows the screen conventions of the `Meshes` and `Pointclouds` - renderers. I.e. the pixel coordinates are in [-1, 1]x[-u, u] or [-u, u]x[-1, 1] - where u > 1 is the aspect ratio of the image. - - For the description of arguments, see the documentation to MultinomialRaysampler. - """ - - def __init__( - self, - *, - image_width: int, - image_height: int, - n_pts_per_ray: int, - min_depth: float, - max_depth: float, - n_rays_per_image: Optional[int] = None, - n_rays_total: Optional[int] = None, - unit_directions: bool = False, - stratified_sampling: bool = False, - ) -> None: - if image_width >= image_height: - range_x = image_width / image_height - range_y = 1.0 - else: - range_x = 1.0 - range_y = image_height / image_width - - half_pix_width = range_x / image_width - half_pix_height = range_y / image_height - super().__init__( - min_x=range_x - half_pix_width, - max_x=-range_x + half_pix_width, - min_y=range_y - half_pix_height, - max_y=-range_y + half_pix_height, - image_width=image_width, - image_height=image_height, - n_pts_per_ray=n_pts_per_ray, - min_depth=min_depth, - max_depth=max_depth, - n_rays_per_image=n_rays_per_image, - n_rays_total=n_rays_total, - unit_directions=unit_directions, - stratified_sampling=stratified_sampling, - ) - - -class MonteCarloRaysampler(torch.nn.Module): - """ - Samples a fixed number of pixels within denoted xy bounds uniformly at random. - For each pixel, a fixed number of points is sampled along its ray at uniformly-spaced - z-coordinates such that the z-coordinates range between a predefined minimum - and maximum depth. - - For practical purposes, this is similar to MultinomialRaysampler without a mask, - however sampling at real-valued locations bypassing replacement checks may be faster. - """ - - def __init__( - self, - min_x: float, - max_x: float, - min_y: float, - max_y: float, - n_rays_per_image: int, - n_pts_per_ray: int, - min_depth: float, - max_depth: float, - *, - n_rays_total: Optional[int] = None, - unit_directions: bool = False, - stratified_sampling: bool = False, - ) -> None: - """ - Args: - min_x: The smallest x-coordinate of each ray's source pixel. - max_x: The largest x-coordinate of each ray's source pixel. - min_y: The smallest y-coordinate of each ray's source pixel. - max_y: The largest y-coordinate of each ray's source pixel. - n_rays_per_image: The number of rays randomly sampled in each camera. - `n_rays_per_image` and `n_rays_total` cannot both be defined. - n_pts_per_ray: The number of points sampled along each ray. - min_depth: The minimum depth of each ray-point. - max_depth: The maximum depth of each ray-point. - n_rays_total: How many rays in total to sample from the cameras provided. The result - is as if `n_rays_total_training` cameras were sampled with replacement from the - cameras provided and for every camera one ray was sampled. If set returns the - HeterogeneousRayBundle with batch_size=n_rays_total. - `n_rays_per_image` and `n_rays_total` cannot both be defined. - unit_directions: whether to normalize direction vectors in ray bundle. - stratified_sampling: if True, performs stratified sampling in n_pts_per_ray - bins for each ray; otherwise takes n_pts_per_ray deterministic points - on each ray with uniform offsets. - """ - super().__init__() - self._min_x = min_x - self._max_x = max_x - self._min_y = min_y - self._max_y = max_y - self._n_rays_per_image = n_rays_per_image - self._n_pts_per_ray = n_pts_per_ray - self._min_depth = min_depth - self._max_depth = max_depth - self._n_rays_total = n_rays_total - self._unit_directions = unit_directions - self._stratified_sampling = stratified_sampling - - def forward( - self, - cameras: CamerasBase, - *, - stratified_sampling: Optional[bool] = None, - **kwargs, - ) -> Union[RayBundle, HeterogeneousRayBundle]: - """ - Args: - cameras: A batch of `batch_size` cameras from which the rays are emitted. - stratified_sampling: if set, overrides stratified_sampling provided - in __init__. - Returns: - A named tuple `RayBundle` or dataclass `HeterogeneousRayBundle` with the - following fields: - - origins: A tensor of shape - `(batch_size, n_rays_per_image, 3)` - denoting the locations of ray origins in the world coordinates. - directions: A tensor of shape - `(batch_size, n_rays_per_image, 3)` - denoting the directions of each ray in the world coordinates. - lengths: A tensor of shape - `(batch_size, n_rays_per_image, n_pts_per_ray)` - containing the z-coordinate (=depth) of each ray in world units. - xys: A tensor of shape - `(batch_size, n_rays_per_image, 2)` - containing the 2D image coordinates of each ray. - If `n_rays_total` is provided `batch_size=n_rays_total`and - `n_rays_per_image=1` and `HeterogeneousRayBundle` is returned else `RayBundle` - is returned. - - `HeterogeneousRayBundle` has additional members: - - camera_ids: tensor of shape (M,), where `M` is the number of unique sampled - cameras. It represents unique ids of sampled cameras. - - camera_counts: tensor of shape (M,), where `M` is the number of unique sampled - cameras. Represents how many times each camera from `camera_ids` was sampled - """ - if ( - sum(x is not None for x in [self._n_rays_total, self._n_rays_per_image]) - != 1 - ): - raise ValueError( - "Exactly one of `self.n_rays_total` and `self.n_rays_per_image` " - "must be given." - ) - - if self._n_rays_total: - ( - cameras, - _, - camera_ids, - camera_counts, - n_rays_per_image, - ) = _sample_cameras_and_masks(self._n_rays_total, cameras, None) - else: - # pyre-ignore[9] - camera_ids: torch.LongTensor = torch.arange(len(cameras), dtype=torch.long) - n_rays_per_image = self._n_rays_per_image - - batch_size = cameras.R.shape[0] - - device = cameras.device - - # get the initial grid of image xy coords - # of shape (batch_size, n_rays_per_image, 2) - rays_xy = torch.cat( - [ - torch.rand( - size=(batch_size, n_rays_per_image, 1), - dtype=torch.float32, - device=device, - ) - * (high - low) - + low - for low, high in ( - (self._min_x, self._max_x), - (self._min_y, self._max_y), - ) - ], - dim=2, - ) - - stratified_sampling = ( - stratified_sampling - if stratified_sampling is not None - else self._stratified_sampling - ) - - ray_bundle = _xy_to_ray_bundle( - cameras, - rays_xy, - self._min_depth, - self._max_depth, - self._n_pts_per_ray, - self._unit_directions, - stratified_sampling, - ) - - return ( - # pyre-ignore[61] - _pack_ray_bundle(ray_bundle, camera_ids, camera_counts) - if self._n_rays_total - else ray_bundle - ) - - -# Settings for backwards compatibility -def GridRaysampler( - min_x: float, - max_x: float, - min_y: float, - max_y: float, - image_width: int, - image_height: int, - n_pts_per_ray: int, - min_depth: float, - max_depth: float, -) -> "MultinomialRaysampler": - """ - GridRaysampler has been DEPRECATED. Use MultinomialRaysampler instead. - Preserving GridRaysampler for backward compatibility. - """ - - warnings.warn( - """GridRaysampler is deprecated, - Use MultinomialRaysampler instead. - GridRaysampler will be removed in future releases.""", - PendingDeprecationWarning, - ) - - return MultinomialRaysampler( - min_x=min_x, - max_x=max_x, - min_y=min_y, - max_y=max_y, - image_width=image_width, - image_height=image_height, - n_pts_per_ray=n_pts_per_ray, - min_depth=min_depth, - max_depth=max_depth, - ) - - -# Settings for backwards compatibility -def NDCGridRaysampler( - image_width: int, - image_height: int, - n_pts_per_ray: int, - min_depth: float, - max_depth: float, -) -> "NDCMultinomialRaysampler": - """ - NDCGridRaysampler has been DEPRECATED. Use NDCMultinomialRaysampler instead. - Preserving NDCGridRaysampler for backward compatibility. - """ - - warnings.warn( - """NDCGridRaysampler is deprecated, - Use NDCMultinomialRaysampler instead. - NDCGridRaysampler will be removed in future releases.""", - PendingDeprecationWarning, - ) - - return NDCMultinomialRaysampler( - image_width=image_width, - image_height=image_height, - n_pts_per_ray=n_pts_per_ray, - min_depth=min_depth, - max_depth=max_depth, - ) - - -def _safe_multinomial(input: torch.Tensor, num_samples: int) -> torch.Tensor: - """ - Wrapper around torch.multinomial that attempts sampling without replacement - when possible, otherwise resorts to sampling with replacement. - - Args: - input: tensor of shape [B, n] containing non-negative values; - rows are interpreted as unnormalized event probabilities - in categorical distributions. - num_samples: number of samples to take. - - Returns: - LongTensor of shape [B, num_samples] containing - values from {0, ..., n - 1} where the elements [i, :] of row i make - (1) if there are num_samples or more non-zero values in input[i], - a random subset of the indices of those values, with - probabilities proportional to the values in input[i, :]. - - (2) if not, a random sample with replacement of the indices of - those values, with probabilities proportional to them. - This sample might not contain all the indices of the - non-zero values. - Behavior undetermined if there are no non-zero values in a whole row - or if there are negative values. - """ - try: - res = torch.multinomial(input, num_samples, replacement=False) - except RuntimeError: - # this is probably rare, so we don't mind sampling twice - res = torch.multinomial(input, num_samples, replacement=True) - no_repl = (input > 0.0).sum(dim=-1) >= num_samples - res[no_repl] = torch.multinomial(input[no_repl], num_samples, replacement=False) - return res - - # in some versions of Pytorch, zero probabilty samples can be drawn without an error - # due to this bug: https://github.com/pytorch/pytorch/issues/50034. Handle this case: - repl = (input > 0.0).sum(dim=-1) < num_samples - if repl.any(): - res[repl] = torch.multinomial(input[repl], num_samples, replacement=True) - - return res - - -def _xy_to_ray_bundle( - cameras: CamerasBase, - xy_grid: torch.Tensor, - min_depth: float, - max_depth: float, - n_pts_per_ray: int, - unit_directions: bool, - stratified_sampling: bool = False, -) -> RayBundle: - """ - Extends the `xy_grid` input of shape `(batch_size, ..., 2)` to rays. - This adds to each xy location in the grid a vector of `n_pts_per_ray` depths - uniformly spaced between `min_depth` and `max_depth`. - - The extended grid is then unprojected with `cameras` to yield - ray origins, directions and depths. - - Args: - cameras: cameras object representing a batch of cameras. - xy_grid: torch.tensor grid of image xy coords. - min_depth: The minimum depth of each ray-point. - max_depth: The maximum depth of each ray-point. - n_pts_per_ray: The number of points sampled along each ray. - unit_directions: whether to normalize direction vectors in ray bundle. - stratified_sampling: if True, performs stratified sampling in n_pts_per_ray - bins for each ray; otherwise takes n_pts_per_ray deterministic points - on each ray with uniform offsets. - """ - batch_size = xy_grid.shape[0] - spatial_size = xy_grid.shape[1:-1] - n_rays_per_image = spatial_size.numel() - - # ray z-coords - rays_zs = xy_grid.new_empty((0,)) - if n_pts_per_ray > 0: - depths = torch.linspace( - min_depth, - max_depth, - n_pts_per_ray, - dtype=xy_grid.dtype, - device=xy_grid.device, - ) - rays_zs = depths[None, None].expand(batch_size, n_rays_per_image, n_pts_per_ray) - - if stratified_sampling: - rays_zs = _jiggle_within_stratas(rays_zs) - - # make two sets of points at a constant depth=1 and 2 - to_unproject = torch.cat( - ( - xy_grid.view(batch_size, 1, n_rays_per_image, 2) - .expand(batch_size, 2, n_rays_per_image, 2) - .reshape(batch_size, n_rays_per_image * 2, 2), - torch.cat( - ( - xy_grid.new_ones(batch_size, n_rays_per_image, 1), - 2.0 * xy_grid.new_ones(batch_size, n_rays_per_image, 1), - ), - dim=1, - ), - ), - dim=-1, - ) - - # unproject the points - unprojected = cameras.unproject_points(to_unproject, from_ndc=True) - - # split the two planes back - rays_plane_1_world = unprojected[:, :n_rays_per_image] - rays_plane_2_world = unprojected[:, n_rays_per_image:] - - # directions are the differences between the two planes of points - rays_directions_world = rays_plane_2_world - rays_plane_1_world - - # origins are given by subtracting the ray directions from the first plane - rays_origins_world = rays_plane_1_world - rays_directions_world - - if unit_directions: - rays_directions_world = F.normalize(rays_directions_world, dim=-1) - - return RayBundle( - rays_origins_world.view(batch_size, *spatial_size, 3), - rays_directions_world.view(batch_size, *spatial_size, 3), - rays_zs.view(batch_size, *spatial_size, n_pts_per_ray), - xy_grid, - ) - - -def _jiggle_within_stratas(bin_centers: torch.Tensor) -> torch.Tensor: - """ - Performs sampling of 1 point per bin given the bin centers. - - More specifically, it replaces each point's value `z` - with a sample from a uniform random distribution on - `[z - delta_-, z + delta_+]`, where `delta_-` is half of the difference - between `z` and the previous point, and `delta_+` is half of the difference - between the next point and `z`. For the first and last items, the - corresponding boundary deltas are assumed zero. - - Args: - `bin_centers`: The input points of size (..., N); the result is broadcast - along all but the last dimension (the rows). Each row should be - sorted in ascending order. - - Returns: - a tensor of size (..., N) with the locations jiggled within stratas/bins. - """ - # Get intervals between bin centers. - mids = 0.5 * (bin_centers[..., 1:] + bin_centers[..., :-1]) - upper = torch.cat((mids, bin_centers[..., -1:]), dim=-1) - lower = torch.cat((bin_centers[..., :1], mids), dim=-1) - # Samples in those intervals. - jiggled = lower + (upper - lower) * torch.rand_like(lower) - return jiggled - - -def _sample_cameras_and_masks( - n_samples: int, cameras: CamerasBase, mask: Optional[torch.Tensor] = None -) -> Tuple[ - CamerasBase, - Optional[torch.Tensor], - torch.LongTensor, - torch.LongTensor, - torch.LongTensor, -]: - """ - Samples n_rays_total cameras and masks and returns them in a form - (camera_idx, count), where count represents number of times the same camera - has been sampled. - - Args: - n_samples: how many camera and mask pairs to sample - cameras: A batch of `batch_size` cameras from which the rays are emitted. - mask: Optional. Should be of size (batch_size, image_height, image_width). - Returns: - tuple of a form (sampled_cameras, sampled_masks, unique_sampled_camera_ids, - number_of_times_each_sampled_camera_has_been_sampled, - max_number_of_times_camera_has_been_sampled, - ) - """ - sampled_ids = torch.randint( - 0, - len(cameras), - size=(n_samples,), - dtype=torch.long, - ) - unique_ids, counts = torch.unique(sampled_ids, return_counts=True) - # pyre-ignore[7] - return ( - cameras[unique_ids], - mask[unique_ids] if mask is not None else None, - unique_ids, - counts, - torch.max(counts), - ) - - -# TODO: this function can be unified with ImplicitronRayBundle.get_padded_xys -def _pack_ray_bundle( - ray_bundle: RayBundle, camera_ids: torch.LongTensor, camera_counts: torch.LongTensor -) -> HeterogeneousRayBundle: - """ - Pack the raybundle from [n_cameras, max(rays_per_camera), ...] to - [total_num_rays, 1, ...] - - Args: - ray_bundle: A ray_bundle to pack - camera_ids: Unique ids of cameras that were sampled - camera_counts: how many of which camera to pack, each count coresponds to - one 'row' of the ray_bundle and says how many rays wll be taken - from it and packed. - Returns: - HeterogeneousRayBundle where batch_size=sum(camera_counts) and n_rays_per_image=1 - """ - # pyre-ignore[9] - camera_counts = camera_counts.to(ray_bundle.origins.device) - cumsum = torch.cumsum(camera_counts, dim=0, dtype=torch.long) - # pyre-ignore[9] - first_idxs: torch.LongTensor = torch.cat( - (camera_counts.new_zeros((1,), dtype=torch.long), cumsum[:-1]) - ) - num_inputs = int(camera_counts.sum()) - - return HeterogeneousRayBundle( - origins=padded_to_packed(ray_bundle.origins, first_idxs, num_inputs)[:, None], - directions=padded_to_packed(ray_bundle.directions, first_idxs, num_inputs)[ - :, None - ], - lengths=padded_to_packed(ray_bundle.lengths, first_idxs, num_inputs)[:, None], - xys=padded_to_packed(ray_bundle.xys, first_idxs, num_inputs)[:, None], - camera_ids=camera_ids, - camera_counts=camera_counts, - ) diff --git a/pytorch3d/pytorch3d/renderer/implicit/renderer.py b/pytorch3d/pytorch3d/renderer/implicit/renderer.py deleted file mode 100644 index ffd7578e4d31fb938e9fd30bf1bc96344155c909..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/implicit/renderer.py +++ /dev/null @@ -1,413 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Callable, Tuple, Union - -import torch - -from ...ops.utils import eyes -from ...structures import Volumes -from ...transforms import Transform3d -from ..cameras import CamerasBase -from .raysampling import HeterogeneousRayBundle, RayBundle -from .utils import _validate_ray_bundle_variables, ray_bundle_variables_to_ray_points - - -# The implicit renderer class should be initialized with a -# function for raysampling and a function for raymarching. - -# During the forward pass: -# 1) The raysampler: -# - samples rays from input cameras -# - transforms the rays to world coordinates -# 2) The volumetric_function (which is a callable argument of the forward pass) -# evaluates ray_densities and ray_features at the sampled ray-points. -# 3) The raymarcher takes ray_densities and ray_features and uses a raymarching -# algorithm to render each ray. - - -class ImplicitRenderer(torch.nn.Module): - """ - A class for rendering a batch of implicit surfaces. The class should - be initialized with a raysampler and raymarcher class which both have - to be a `Callable`. - - VOLUMETRIC_FUNCTION - - The `forward` function of the renderer accepts as input the rendering cameras - as well as the `volumetric_function` `Callable`, which defines a field of opacity - and feature vectors over the 3D domain of the scene. - - A standard `volumetric_function` has the following signature:: - - def volumetric_function( - ray_bundle: Union[RayBundle, HeterogeneousRayBundle], - **kwargs, - ) -> Tuple[torch.Tensor, torch.Tensor] - - With the following arguments: - `ray_bundle`: A RayBundle or HeterogeneousRayBundle object - containing the following variables: - - `origins`: A tensor of shape `(minibatch, ..., 3)` denoting - the origins of the rendering rays. - `directions`: A tensor of shape `(minibatch, ..., 3)` - containing the direction vectors of rendering rays. - `lengths`: A tensor of shape - `(minibatch, ..., num_points_per_ray)`containing the - lengths at which the ray points are sampled. - `xys`: A tensor of shape - `(minibatch, ..., 2)` containing the - xy locations of each ray's pixel in the screen space. - Calling `volumetric_function` then returns the following: - `rays_densities`: A tensor of shape - `(minibatch, ..., num_points_per_ray, opacity_dim)` containing - the an opacity vector for each ray point. - `rays_features`: A tensor of shape - `(minibatch, ..., num_points_per_ray, feature_dim)` containing - the an feature vector for each ray point. - - Note that, in order to increase flexibility of the API, we allow multiple - other arguments to enter the volumetric function via additional - (optional) keyword arguments `**kwargs`. - A typical use-case is passing a `CamerasBase` object as an additional - keyword argument, which can allow the volumetric function to adjust its - outputs based on the directions of the projection rays. - - Example: - A simple volumetric function of a 0-centered - RGB sphere with a unit diameter is defined as follows:: - - def volumetric_function( - ray_bundle: Union[RayBundle, HeterogeneousRayBundle], - **kwargs, - ) -> Tuple[torch.Tensor, torch.Tensor]: - - # first convert the ray origins, directions and lengths - # to 3D ray point locations in world coords - rays_points_world = ray_bundle_to_ray_points(ray_bundle) - - # set the densities as an inverse sigmoid of the - # ray point distance from the sphere centroid - rays_densities = torch.sigmoid( - -100.0 * rays_points_world.norm(dim=-1, keepdim=True) - ) - - # set the ray features to RGB colors proportional - # to the 3D location of the projection of ray points - # on the sphere surface - rays_features = torch.nn.functional.normalize( - rays_points_world, dim=-1 - ) * 0.5 + 0.5 - - return rays_densities, rays_features - - """ - - def __init__(self, raysampler: Callable, raymarcher: Callable) -> None: - """ - Args: - raysampler: A `Callable` that takes as input scene cameras - (an instance of `CamerasBase`) and returns a - RayBundle or HeterogeneousRayBundle, that - describes the rays emitted from the cameras. - raymarcher: A `Callable` that receives the response of the - `volumetric_function` (an input to `self.forward`) evaluated - along the sampled rays, and renders the rays with a - ray-marching algorithm. - """ - super().__init__() - - if not callable(raysampler): - raise ValueError('"raysampler" has to be a "Callable" object.') - if not callable(raymarcher): - raise ValueError('"raymarcher" has to be a "Callable" object.') - - self.raysampler = raysampler - self.raymarcher = raymarcher - - def forward( - self, cameras: CamerasBase, volumetric_function: Callable, **kwargs - ) -> Tuple[torch.Tensor, Union[RayBundle, HeterogeneousRayBundle]]: - """ - Render a batch of images using a volumetric function - represented as a callable (e.g. a Pytorch module). - - Args: - cameras: A batch of cameras that render the scene. A `self.raysampler` - takes the cameras as input and samples rays that pass through the - domain of the volumetric function. - volumetric_function: A `Callable` that accepts the parametrizations - of the rendering rays and returns the densities and features - at the respective 3D of the rendering rays. Please refer to - the main class documentation for details. - - Returns: - images: A tensor of shape `(minibatch, ..., feature_dim + opacity_dim)` - containing the result of the rendering. - ray_bundle: A `Union[RayBundle, HeterogeneousRayBundle]` containing - the parametrizations of the sampled rendering rays. - """ - - if not callable(volumetric_function): - raise ValueError('"volumetric_function" has to be a "Callable" object.') - - # first call the ray sampler that returns the RayBundle or HeterogeneousRayBundle - # parametrizing the rendering rays. - ray_bundle = self.raysampler( - cameras=cameras, volumetric_function=volumetric_function, **kwargs - ) - # ray_bundle.origins - minibatch x ... x 3 - # ray_bundle.directions - minibatch x ... x 3 - # ray_bundle.lengths - minibatch x ... x n_pts_per_ray - # ray_bundle.xys - minibatch x ... x 2 - - # given sampled rays, call the volumetric function that - # evaluates the densities and features at the locations of the - # ray points - # pyre-fixme[23]: Unable to unpack `object` into 2 values. - rays_densities, rays_features = volumetric_function( - ray_bundle=ray_bundle, cameras=cameras, **kwargs - ) - # ray_densities - minibatch x ... x n_pts_per_ray x density_dim - # ray_features - minibatch x ... x n_pts_per_ray x feature_dim - - # finally, march along the sampled rays to obtain the renders - images = self.raymarcher( - rays_densities=rays_densities, - rays_features=rays_features, - ray_bundle=ray_bundle, - **kwargs, - ) - # images - minibatch x ... x (feature_dim + opacity_dim) - - return images, ray_bundle - - -# The volume renderer class should be initialized with a -# function for raysampling and a function for raymarching. - -# During the forward pass: -# 1) The raysampler: -# - samples rays from input cameras -# - transforms the rays to world coordinates -# 2) The scene volumes (which are an argument of the forward function) -# are then sampled at the locations of the ray-points to generate -# ray_densities and ray_features. -# 3) The raymarcher takes ray_densities and ray_features and uses a raymarching -# algorithm to render each ray. - - -class VolumeRenderer(torch.nn.Module): - """ - A class for rendering a batch of Volumes. The class should - be initialized with a raysampler and a raymarcher class which both have - to be a `Callable`. - """ - - def __init__( - self, raysampler: Callable, raymarcher: Callable, sample_mode: str = "bilinear" - ) -> None: - """ - Args: - raysampler: A `Callable` that takes as input scene cameras - (an instance of `CamerasBase`) and returns a - `Union[RayBundle, HeterogeneousRayBundle],` that - describes the rays emitted from the cameras. - raymarcher: A `Callable` that receives the `volumes` - (an instance of `Volumes` input to `self.forward`) - sampled at the ray-points, and renders the rays with a - ray-marching algorithm. - sample_mode: Defines the algorithm used to sample the volumetric - voxel grid. Can be either "bilinear" or "nearest". - """ - super().__init__() - - self.renderer = ImplicitRenderer(raysampler, raymarcher) - self._sample_mode = sample_mode - - def forward( - self, cameras: CamerasBase, volumes: Volumes, **kwargs - ) -> Tuple[torch.Tensor, Union[RayBundle, HeterogeneousRayBundle]]: - """ - Render a batch of images using raymarching over rays cast through - input `Volumes`. - - Args: - cameras: A batch of cameras that render the scene. A `self.raysampler` - takes the cameras as input and samples rays that pass through the - domain of the volumetric function. - volumes: An instance of the `Volumes` class representing a - batch of volumes that are being rendered. - - Returns: - images: A tensor of shape `(minibatch, ..., (feature_dim + opacity_dim)` - containing the result of the rendering. - ray_bundle: A `RayBundle` or `HeterogeneousRayBundle` containing the - parametrizations of the sampled rendering rays. - """ - volumetric_function = VolumeSampler(volumes, sample_mode=self._sample_mode) - return self.renderer( - cameras=cameras, volumetric_function=volumetric_function, **kwargs - ) - - -class VolumeSampler(torch.nn.Module): - """ - A module to sample a batch of volumes `Volumes` - at 3D points sampled along projection rays. - """ - - def __init__( - self, - volumes: Volumes, - sample_mode: str = "bilinear", - padding_mode: str = "zeros", - ) -> None: - """ - Args: - volumes: An instance of the `Volumes` class representing a - batch of volumes that are being rendered. - sample_mode: Defines the algorithm used to sample the volumetric - voxel grid. Can be either "bilinear" or "nearest". - padding_mode: How to handle values outside of the volume. - One of: zeros, border, reflection - See torch.nn.functional.grid_sample for more information. - """ - super().__init__() - if not isinstance(volumes, Volumes): - raise ValueError("'volumes' have to be an instance of the 'Volumes' class.") - self._volumes = volumes - self._sample_mode = sample_mode - self._padding_mode = padding_mode - - def _get_ray_directions_transform(self): - """ - Compose the ray-directions transform by removing the translation component - from the volume global-to-local coords transform. - """ - world2local = self._volumes.get_world_to_local_coords_transform().get_matrix() - directions_transform_matrix = eyes( - 4, - N=world2local.shape[0], - device=world2local.device, - dtype=world2local.dtype, - ) - directions_transform_matrix[:, :3, :3] = world2local[:, :3, :3] - directions_transform = Transform3d(matrix=directions_transform_matrix) - return directions_transform - - def forward( - self, ray_bundle: Union[RayBundle, HeterogeneousRayBundle], **kwargs - ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Given an input ray parametrization, the forward function samples - `self._volumes` at the respective 3D ray-points. - Can also accept ImplicitronRayBundle as argument for ray_bundle. - - Args: - ray_bundle: A RayBundle or HeterogeneousRayBundle object with the following fields: - rays_origins_world: A tensor of shape `(minibatch, ..., 3)` denoting the - origins of the sampling rays in world coords. - rays_directions_world: A tensor of shape `(minibatch, ..., 3)` - containing the direction vectors of sampling rays in world coords. - rays_lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)` - containing the lengths at which the rays are sampled. - - Returns: - rays_densities: A tensor of shape - `(minibatch, ..., num_points_per_ray, opacity_dim)` containing the - density vectors sampled from the volume at the locations of - the ray points. - rays_features: A tensor of shape - `(minibatch, ..., num_points_per_ray, feature_dim)` containing the - feature vectors sampled from the volume at the locations of - the ray points. - """ - - # take out the interesting parts of ray_bundle - rays_origins_world = ray_bundle.origins - rays_directions_world = ray_bundle.directions - rays_lengths = ray_bundle.lengths - - # validate the inputs - _validate_ray_bundle_variables( - rays_origins_world, rays_directions_world, rays_lengths - ) - if self._volumes.densities().shape[0] != rays_origins_world.shape[0]: - raise ValueError("Input volumes have to have the same batch size as rays.") - - ######################################################### - # 1) convert the origins/directions to the local coords # - ######################################################### - - # origins are mapped with the world_to_local transform of the volumes - rays_origins_local = self._volumes.world_to_local_coords(rays_origins_world) - - # obtain the Transform3d object that transforms ray directions to local coords - directions_transform = self._get_ray_directions_transform() - - # transform the directions to the local coords - rays_directions_local = directions_transform.transform_points( - rays_directions_world.view(rays_lengths.shape[0], -1, 3) - ).view(rays_directions_world.shape) - - ############################ - # 2) obtain the ray points # - ############################ - - # this op produces a fairly big tensor (minibatch, ..., n_samples_per_ray, 3) - rays_points_local = ray_bundle_variables_to_ray_points( - rays_origins_local, rays_directions_local, rays_lengths - ) - - ######################## - # 3) sample the volume # - ######################## - - # generate the tensor for sampling - volumes_densities = self._volumes.densities() - dim_density = volumes_densities.shape[1] - volumes_features = self._volumes.features() - - # reshape to a size which grid_sample likes - rays_points_local_flat = rays_points_local.view( - rays_points_local.shape[0], -1, 1, 1, 3 - ) - - # run the grid sampler on the volumes densities - rays_densities = torch.nn.functional.grid_sample( - volumes_densities, - rays_points_local_flat, - mode=self._sample_mode, - padding_mode=self._padding_mode, - align_corners=self._volumes.get_align_corners(), - ) - - # permute the dimensions & reshape densities after sampling - rays_densities = rays_densities.permute(0, 2, 3, 4, 1).view( - *rays_points_local.shape[:-1], volumes_densities.shape[1] - ) - - # if features exist, run grid sampler again on the features densities - if volumes_features is None: - dim_feature = 0 - _, rays_features = rays_densities.split([dim_density, dim_feature], dim=-1) - else: - rays_features = torch.nn.functional.grid_sample( - volumes_features, - rays_points_local_flat, - mode=self._sample_mode, - padding_mode=self._padding_mode, - align_corners=self._volumes.get_align_corners(), - ) - - # permute the dimensions & reshape features after sampling - rays_features = rays_features.permute(0, 2, 3, 4, 1).view( - *rays_points_local.shape[:-1], volumes_features.shape[1] - ) - - return rays_densities, rays_features diff --git a/pytorch3d/pytorch3d/renderer/implicit/sample_pdf.py b/pytorch3d/pytorch3d/renderer/implicit/sample_pdf.py deleted file mode 100644 index c2387e5b503d4f3ec8efb6e07dabd95dd4ff0eba..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/implicit/sample_pdf.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import torch -from pytorch3d import _C - - -def sample_pdf( - bins: torch.Tensor, - weights: torch.Tensor, - n_samples: int, - det: bool = False, - eps: float = 1e-5, -) -> torch.Tensor: - """ - Samples probability density functions defined by bin edges `bins` and - the non-negative per-bin probabilities `weights`. - - Args: - bins: Tensor of shape `(..., n_bins+1)` denoting the edges of the sampling bins. - weights: Tensor of shape `(..., n_bins)` containing non-negative numbers - representing the probability of sampling the corresponding bin. - n_samples: The number of samples to draw from each set of bins. - det: If `False`, the sampling is random. `True` yields deterministic - uniformly-spaced sampling from the inverse cumulative density function. - eps: A constant preventing division by zero in case empty bins are present. - - Returns: - samples: Tensor of shape `(..., n_samples)` containing `n_samples` samples - drawn from each probability distribution. - - Refs: - [1] https://github.com/bmild/nerf/blob/55d8b00244d7b5178f4d003526ab6667683c9da9/run_nerf_helpers.py#L183 # noqa E501 - """ - if torch.is_grad_enabled() and (bins.requires_grad or weights.requires_grad): - raise NotImplementedError("sample_pdf differentiability.") - if weights.min() <= -eps: - raise ValueError("Negative weights provided.") - batch_shape = bins.shape[:-1] - n_bins = weights.shape[-1] - if n_bins + 1 != bins.shape[-1] or weights.shape[:-1] != batch_shape: - shapes = f"{bins.shape}{weights.shape}" - raise ValueError("Inconsistent shapes of bins and weights: " + shapes) - output_shape = batch_shape + (n_samples,) - - if det: - u = torch.linspace(0.0, 1.0, n_samples, device=bins.device, dtype=torch.float32) - output = u.expand(output_shape).contiguous() - else: - output = torch.rand(output_shape, dtype=torch.float32, device=bins.device) - - # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`. - _C.sample_pdf( - bins.reshape(-1, n_bins + 1), - weights.reshape(-1, n_bins), - output.reshape(-1, n_samples), - eps, - ) - - return output - - -def sample_pdf_python( - bins: torch.Tensor, - weights: torch.Tensor, - N_samples: int, - det: bool = False, - eps: float = 1e-5, -) -> torch.Tensor: - """ - This is a pure python implementation of the `sample_pdf` function. - It may be faster than sample_pdf when the number of bins is very large, - because it behaves as O(batchsize * [n_bins + log(n_bins) * n_samples] ) - whereas sample_pdf behaves as O(batchsize * n_bins * n_samples). - For 64 bins sample_pdf is much faster. - - Samples probability density functions defined by bin edges `bins` and - the non-negative per-bin probabilities `weights`. - - Note: This is a direct conversion of the TensorFlow function from the original - release [1] to PyTorch. It requires PyTorch 1.6 or greater due to the use of - torch.searchsorted. - - Args: - bins: Tensor of shape `(..., n_bins+1)` denoting the edges of the sampling bins. - weights: Tensor of shape `(..., n_bins)` containing non-negative numbers - representing the probability of sampling the corresponding bin. - N_samples: The number of samples to draw from each set of bins. - det: If `False`, the sampling is random. `True` yields deterministic - uniformly-spaced sampling from the inverse cumulative density function. - eps: A constant preventing division by zero in case empty bins are present. - - Returns: - samples: Tensor of shape `(..., N_samples)` containing `N_samples` samples - drawn from each probability distribution. - - Refs: - [1] https://github.com/bmild/nerf/blob/55d8b00244d7b5178f4d003526ab6667683c9da9/run_nerf_helpers.py#L183 # noqa E501 - """ - - # Get pdf - weights = weights + eps # prevent nans - if weights.min() <= 0: - raise ValueError("Negative weights provided.") - pdf = weights / weights.sum(dim=-1, keepdim=True) - cdf = torch.cumsum(pdf, -1) - cdf = torch.cat([torch.zeros_like(cdf[..., :1]), cdf], -1) - - # Take uniform samples u of shape (..., N_samples) - if det: - u = torch.linspace(0.0, 1.0, N_samples, device=cdf.device, dtype=cdf.dtype) - u = u.expand(list(cdf.shape[:-1]) + [N_samples]).contiguous() - else: - u = torch.rand( - list(cdf.shape[:-1]) + [N_samples], device=cdf.device, dtype=cdf.dtype - ) - - # Invert CDF - inds = torch.searchsorted(cdf, u, right=True) - # inds has shape (..., N_samples) identifying the bin of each sample. - below = (inds - 1).clamp(0) - above = inds.clamp(max=cdf.shape[-1] - 1) - # Below and above are of shape (..., N_samples), identifying the bin - # edges surrounding each sample. - - inds_g = torch.stack([below, above], -1).view( - *below.shape[:-1], below.shape[-1] * 2 - ) - cdf_g = torch.gather(cdf, -1, inds_g).view(*below.shape, 2) - bins_g = torch.gather(bins, -1, inds_g).view(*below.shape, 2) - # cdf_g and bins_g are of shape (..., N_samples, 2) and identify - # the cdf and the index of the two bin edges surrounding each sample. - - denom = cdf_g[..., 1] - cdf_g[..., 0] - denom = torch.where(denom < eps, torch.ones_like(denom), denom) - t = (u - cdf_g[..., 0]) / denom - # t is of shape (..., N_samples) and identifies how far through - # each sample is in its bin. - - samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0]) - - return samples diff --git a/pytorch3d/pytorch3d/renderer/implicit/utils.py b/pytorch3d/pytorch3d/renderer/implicit/utils.py deleted file mode 100644 index d73c8583b04985a2dda3cd4ecf0856bacc8bd4f6..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/implicit/utils.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import dataclasses -from typing import NamedTuple, Optional, Union - -import torch - - -class RayBundle(NamedTuple): - """ - Parametrizes points along projection rays by storing: - - origins: A tensor of shape `(..., 3)` denoting the - origins of the sampling rays in world coords. - directions: A tensor of shape `(..., 3)` containing the direction - vectors of sampling rays in world coords. They don't have to be normalized; - they define unit vectors in the respective 1D coordinate systems; see - documentation for :func:`ray_bundle_to_ray_points` for the conversion formula. - lengths: A tensor of shape `(..., num_points_per_ray)` - containing the lengths at which the rays are sampled. - xys: A tensor of shape `(..., 2)`, the xy-locations (`xys`) of the ray pixels - """ - - origins: torch.Tensor - directions: torch.Tensor - lengths: torch.Tensor - xys: torch.Tensor - - -@dataclasses.dataclass -class HeterogeneousRayBundle: - """ - Members: - origins: A tensor of shape `(..., 3)` denoting the - origins of the sampling rays in world coords. - directions: A tensor of shape `(..., 3)` containing the direction - vectors of sampling rays in world coords. They don't have to be normalized; - they define unit vectors in the respective 1D coordinate systems; see - documentation for :func:`ray_bundle_to_ray_points` for the conversion formula. - lengths: A tensor of shape `(..., num_points_per_ray)` - containing the lengths at which the rays are sampled. - xys: A tensor of shape `(..., 2)`, the xy-locations (`xys`) of the ray pixels - camera_ids: A tensor of shape (N, ) which indicates which camera - was used to sample the rays. `N` is the number of unique sampled cameras. - camera_counts: A tensor of shape (N, ) which how many times the - coresponding camera in `camera_ids` was sampled. - `sum(camera_counts)==total_number_of_rays` - - If we sample cameras of ids [0, 3, 5, 3, 1, 0, 0] that would be - stored as camera_ids=[1, 3, 5, 0] and camera_counts=[1, 2, 1, 3]. `camera_ids` is a - set like object with no particular ordering of elements. ith element of - `camera_ids` coresponds to the ith element of `camera_counts`. - """ - - origins: torch.Tensor - directions: torch.Tensor - lengths: torch.Tensor - xys: torch.Tensor - camera_ids: Optional[torch.LongTensor] = None - camera_counts: Optional[torch.LongTensor] = None - - -def ray_bundle_to_ray_points( - ray_bundle: Union[RayBundle, HeterogeneousRayBundle] -) -> torch.Tensor: - """ - Converts rays parametrized with a `ray_bundle` (an instance of the `RayBundle` - named tuple or HeterogeneousRayBundle dataclass) to 3D points by - extending each ray according to the corresponding length. - - E.g. for 2 dimensional tensors `ray_bundle.origins`, `ray_bundle.directions` - and `ray_bundle.lengths`, the ray point at position `[i, j]` is:: - - ray_bundle.points[i, j, :] = ( - ray_bundle.origins[i, :] - + ray_bundle.directions[i, :] * ray_bundle.lengths[i, j] - ) - - Note that both the directions and magnitudes of the vectors in - `ray_bundle.directions` matter. - - Args: - ray_bundle: A `RayBundle` or `HeterogeneousRayBundle` object with fields: - origins: A tensor of shape `(..., 3)` - directions: A tensor of shape `(..., 3)` - lengths: A tensor of shape `(..., num_points_per_ray)` - - Returns: - rays_points: A tensor of shape `(..., num_points_per_ray, 3)` - containing the points sampled along each ray. - """ - return ray_bundle_variables_to_ray_points( - ray_bundle.origins, ray_bundle.directions, ray_bundle.lengths - ) - - -def ray_bundle_variables_to_ray_points( - rays_origins: torch.Tensor, - rays_directions: torch.Tensor, - rays_lengths: torch.Tensor, -) -> torch.Tensor: - """ - Converts rays parametrized with origins and directions - to 3D points by extending each ray according to the corresponding - ray length: - - E.g. for 2 dimensional input tensors `rays_origins`, `rays_directions` - and `rays_lengths`, the ray point at position `[i, j]` is:: - - rays_points[i, j, :] = ( - rays_origins[i, :] - + rays_directions[i, :] * rays_lengths[i, j] - ) - - Note that both the directions and magnitudes of the vectors in - `rays_directions` matter. - - Args: - rays_origins: A tensor of shape `(..., 3)` - rays_directions: A tensor of shape `(..., 3)` - rays_lengths: A tensor of shape `(..., num_points_per_ray)` - - Returns: - rays_points: A tensor of shape `(..., num_points_per_ray, 3)` - containing the points sampled along each ray. - """ - rays_points = ( - rays_origins[..., None, :] - + rays_lengths[..., :, None] * rays_directions[..., None, :] - ) - return rays_points - - -def _validate_ray_bundle_variables( - rays_origins: torch.Tensor, - rays_directions: torch.Tensor, - rays_lengths: torch.Tensor, -) -> None: - """ - Validate the shapes of RayBundle variables - `rays_origins`, `rays_directions`, and `rays_lengths`. - """ - ndim = rays_origins.ndim - if any(r.ndim != ndim for r in (rays_directions, rays_lengths)): - raise ValueError( - "rays_origins, rays_directions and rays_lengths" - + " have to have the same number of dimensions." - ) - - if ndim <= 2: - raise ValueError( - "rays_origins, rays_directions and rays_lengths" - + " have to have at least 3 dimensions." - ) - - spatial_size = rays_origins.shape[:-1] - if any(spatial_size != r.shape[:-1] for r in (rays_directions, rays_lengths)): - raise ValueError( - "The shapes of rays_origins, rays_directions and rays_lengths" - + " may differ only in the last dimension." - ) - - if any(r.shape[-1] != 3 for r in (rays_origins, rays_directions)): - raise ValueError( - "The size of the last dimension of rays_origins/rays_directions" - + "has to be 3." - ) diff --git a/pytorch3d/pytorch3d/renderer/lighting.py b/pytorch3d/pytorch3d/renderer/lighting.py deleted file mode 100644 index ab4f5fd3a7f131778fd9b3dfb303fbe498dd3cfa..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/lighting.py +++ /dev/null @@ -1,339 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import torch -import torch.nn.functional as F - -from ..common.datatypes import Device -from .utils import convert_to_tensors_and_broadcast, TensorProperties - - -def diffuse(normals, color, direction) -> torch.Tensor: - """ - Calculate the diffuse component of light reflection using Lambert's - cosine law. - - Args: - normals: (N, ..., 3) xyz normal vectors. Normals and points are - expected to have the same shape. - color: (1, 3) or (N, 3) RGB color of the diffuse component of the light. - direction: (x,y,z) direction of the light - - Returns: - colors: (N, ..., 3), same shape as the input points. - - The normals and light direction should be in the same coordinate frame - i.e. if the points have been transformed from world -> view space then - the normals and direction should also be in view space. - - NOTE: to use with the packed vertices (i.e. no batch dimension) reformat the - inputs in the following way. - - .. code-block:: python - - Args: - normals: (P, 3) - color: (N, 3)[batch_idx, :] -> (P, 3) - direction: (N, 3)[batch_idx, :] -> (P, 3) - - Returns: - colors: (P, 3) - - where batch_idx is of shape (P). For meshes, batch_idx can be: - meshes.verts_packed_to_mesh_idx() or meshes.faces_packed_to_mesh_idx() - depending on whether points refers to the vertex coordinates or - average/interpolated face coordinates. - """ - # TODO: handle multiple directional lights per batch element. - # TODO: handle attenuation. - - # Ensure color and location have same batch dimension as normals - normals, color, direction = convert_to_tensors_and_broadcast( - normals, color, direction, device=normals.device - ) - - # Reshape direction and color so they have all the arbitrary intermediate - # dimensions as normals. Assume first dim = batch dim and last dim = 3. - points_dims = normals.shape[1:-1] - expand_dims = (-1,) + (1,) * len(points_dims) + (3,) - if direction.shape != normals.shape: - direction = direction.view(expand_dims) - if color.shape != normals.shape: - color = color.view(expand_dims) - - # Renormalize the normals in case they have been interpolated. - # We tried to replace the following with F.cosine_similarity, but it wasn't faster. - normals = F.normalize(normals, p=2, dim=-1, eps=1e-6) - direction = F.normalize(direction, p=2, dim=-1, eps=1e-6) - angle = F.relu(torch.sum(normals * direction, dim=-1)) - return color * angle[..., None] - - -def specular( - points, normals, direction, color, camera_position, shininess -) -> torch.Tensor: - """ - Calculate the specular component of light reflection. - - Args: - points: (N, ..., 3) xyz coordinates of the points. - normals: (N, ..., 3) xyz normal vectors for each point. - color: (N, 3) RGB color of the specular component of the light. - direction: (N, 3) vector direction of the light. - camera_position: (N, 3) The xyz position of the camera. - shininess: (N) The specular exponent of the material. - - Returns: - colors: (N, ..., 3), same shape as the input points. - - The points, normals, camera_position, and direction should be in the same - coordinate frame i.e. if the points have been transformed from - world -> view space then the normals, camera_position, and light direction - should also be in view space. - - To use with a batch of packed points reindex in the following way. - .. code-block:: python:: - - Args: - points: (P, 3) - normals: (P, 3) - color: (N, 3)[batch_idx] -> (P, 3) - direction: (N, 3)[batch_idx] -> (P, 3) - camera_position: (N, 3)[batch_idx] -> (P, 3) - shininess: (N)[batch_idx] -> (P) - Returns: - colors: (P, 3) - - where batch_idx is of shape (P). For meshes batch_idx can be: - meshes.verts_packed_to_mesh_idx() or meshes.faces_packed_to_mesh_idx(). - """ - # TODO: handle multiple directional lights - # TODO: attenuate based on inverse squared distance to the light source - - if points.shape != normals.shape: - msg = "Expected points and normals to have the same shape: got %r, %r" - raise ValueError(msg % (points.shape, normals.shape)) - - # Ensure all inputs have same batch dimension as points - matched_tensors = convert_to_tensors_and_broadcast( - points, color, direction, camera_position, shininess, device=points.device - ) - _, color, direction, camera_position, shininess = matched_tensors - - # Reshape direction and color so they have all the arbitrary intermediate - # dimensions as points. Assume first dim = batch dim and last dim = 3. - points_dims = points.shape[1:-1] - expand_dims = (-1,) + (1,) * len(points_dims) - if direction.shape != normals.shape: - direction = direction.view(expand_dims + (3,)) - if color.shape != normals.shape: - color = color.view(expand_dims + (3,)) - if camera_position.shape != normals.shape: - camera_position = camera_position.view(expand_dims + (3,)) - if shininess.shape != normals.shape: - shininess = shininess.view(expand_dims) - - # Renormalize the normals in case they have been interpolated. - # We tried a version that uses F.cosine_similarity instead of renormalizing, - # but it was slower. - normals = F.normalize(normals, p=2, dim=-1, eps=1e-6) - direction = F.normalize(direction, p=2, dim=-1, eps=1e-6) - cos_angle = torch.sum(normals * direction, dim=-1) - # No specular highlights if angle is less than 0. - mask = (cos_angle > 0).to(torch.float32) - - # Calculate the specular reflection. - view_direction = camera_position - points - view_direction = F.normalize(view_direction, p=2, dim=-1, eps=1e-6) - reflect_direction = -direction + 2 * (cos_angle[..., None] * normals) - - # Cosine of the angle between the reflected light ray and the viewer - alpha = F.relu(torch.sum(view_direction * reflect_direction, dim=-1)) * mask - return color * torch.pow(alpha, shininess)[..., None] - - -class DirectionalLights(TensorProperties): - def __init__( - self, - ambient_color=((0.5, 0.5, 0.5),), - diffuse_color=((0.3, 0.3, 0.3),), - specular_color=((0.2, 0.2, 0.2),), - direction=((0, 1, 0),), - device: Device = "cpu", - ) -> None: - """ - Args: - ambient_color: RGB color of the ambient component. - diffuse_color: RGB color of the diffuse component. - specular_color: RGB color of the specular component. - direction: (x, y, z) direction vector of the light. - device: Device (as str or torch.device) on which the tensors should be located - - The inputs can each be - - 3 element tuple/list or list of lists - - torch tensor of shape (1, 3) - - torch tensor of shape (N, 3) - The inputs are broadcast against each other so they all have batch - dimension N. - """ - super().__init__( - device=device, - ambient_color=ambient_color, - diffuse_color=diffuse_color, - specular_color=specular_color, - direction=direction, - ) - _validate_light_properties(self) - if self.direction.shape[-1] != 3: - msg = "Expected direction to have shape (N, 3); got %r" - raise ValueError(msg % repr(self.direction.shape)) - - def clone(self): - other = self.__class__(device=self.device) - return super().clone(other) - - def diffuse(self, normals, points=None) -> torch.Tensor: - # NOTE: Points is not used but is kept in the args so that the API is - # the same for directional and point lights. The call sites should not - # need to know the light type. - return diffuse( - normals=normals, - color=self.diffuse_color, - direction=self.direction, - ) - - def specular(self, normals, points, camera_position, shininess) -> torch.Tensor: - return specular( - points=points, - normals=normals, - color=self.specular_color, - direction=self.direction, - camera_position=camera_position, - shininess=shininess, - ) - - -class PointLights(TensorProperties): - def __init__( - self, - ambient_color=((0.5, 0.5, 0.5),), - diffuse_color=((0.3, 0.3, 0.3),), - specular_color=((0.2, 0.2, 0.2),), - location=((0, 1, 0),), - device: Device = "cpu", - ) -> None: - """ - Args: - ambient_color: RGB color of the ambient component - diffuse_color: RGB color of the diffuse component - specular_color: RGB color of the specular component - location: xyz position of the light. - device: Device (as str or torch.device) on which the tensors should be located - - The inputs can each be - - 3 element tuple/list or list of lists - - torch tensor of shape (1, 3) - - torch tensor of shape (N, 3) - The inputs are broadcast against each other so they all have batch - dimension N. - """ - super().__init__( - device=device, - ambient_color=ambient_color, - diffuse_color=diffuse_color, - specular_color=specular_color, - location=location, - ) - _validate_light_properties(self) - if self.location.shape[-1] != 3: - msg = "Expected location to have shape (N, 3); got %r" - raise ValueError(msg % repr(self.location.shape)) - - def clone(self): - other = self.__class__(device=self.device) - return super().clone(other) - - def reshape_location(self, points) -> torch.Tensor: - """ - Reshape the location tensor to have dimensions - compatible with the points which can either be of - shape (P, 3) or (N, H, W, K, 3). - """ - if self.location.ndim == points.ndim: - return self.location - return self.location[:, None, None, None, :] - - def diffuse(self, normals, points) -> torch.Tensor: - location = self.reshape_location(points) - direction = location - points - return diffuse(normals=normals, color=self.diffuse_color, direction=direction) - - def specular(self, normals, points, camera_position, shininess) -> torch.Tensor: - location = self.reshape_location(points) - direction = location - points - return specular( - points=points, - normals=normals, - color=self.specular_color, - direction=direction, - camera_position=camera_position, - shininess=shininess, - ) - - -class AmbientLights(TensorProperties): - """ - A light object representing the same color of light everywhere. - By default, this is white, which effectively means lighting is - not used in rendering. - - Unlike other lights this supports an arbitrary number of channels, not just 3 for RGB. - The ambient_color input determines the number of channels. - """ - - def __init__(self, *, ambient_color=None, device: Device = "cpu") -> None: - """ - If ambient_color is provided, it should be a sequence of - triples of floats. - - Args: - ambient_color: RGB color - device: Device (as str or torch.device) on which the tensors should be located - - The ambient_color if provided, should be - - tuple/list of C-element tuples of floats - - torch tensor of shape (1, C) - - torch tensor of shape (N, C) - where C is the number of channels and N is batch size. - For RGB, C is 3. - """ - if ambient_color is None: - ambient_color = ((1.0, 1.0, 1.0),) - super().__init__(ambient_color=ambient_color, device=device) - - def clone(self): - other = self.__class__(device=self.device) - return super().clone(other) - - def diffuse(self, normals, points) -> torch.Tensor: - return self._zeros_channels(points) - - def specular(self, normals, points, camera_position, shininess) -> torch.Tensor: - return self._zeros_channels(points) - - def _zeros_channels(self, points: torch.Tensor) -> torch.Tensor: - ch = self.ambient_color.shape[-1] - return torch.zeros(*points.shape[:-1], ch, device=points.device) - - -def _validate_light_properties(obj) -> None: - props = ("ambient_color", "diffuse_color", "specular_color") - for n in props: - t = getattr(obj, n) - if t.shape[-1] != 3: - msg = "Expected %s to have shape (N, 3); got %r" - raise ValueError(msg % (n, t.shape)) diff --git a/pytorch3d/pytorch3d/renderer/materials.py b/pytorch3d/pytorch3d/renderer/materials.py deleted file mode 100644 index 27558ed8a66f82d43e702e6e96f7734ba6ce803f..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/materials.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import torch - -from ..common.datatypes import Device -from .utils import TensorProperties - - -class Materials(TensorProperties): - """ - A class for storing a batch of material properties. Currently only one - material per batch element is supported. - """ - - def __init__( - self, - ambient_color=((1, 1, 1),), - diffuse_color=((1, 1, 1),), - specular_color=((1, 1, 1),), - shininess=64, - device: Device = "cpu", - ) -> None: - """ - Args: - ambient_color: ambient reflectivity of the material - diffuse_color: diffuse reflectivity of the material - specular_color: specular reflectivity of the material - shininess: The specular exponent for the material. This defines - the focus of the specular highlight with a high value - resulting in a concentrated highlight. Shininess values - can range from 0-1000. - device: Device (as str or torch.device) on which the tensors should be located - - ambient_color, diffuse_color and specular_color can be of shape - (1, C) or (N, C) where C is typically 3 (for RGB). shininess can be of shape (1,) - or (N,). - - The colors and shininess are broadcast against each other so need to - have either the same batch dimension or batch dimension = 1. - """ - super().__init__( - device=device, - diffuse_color=diffuse_color, - ambient_color=ambient_color, - specular_color=specular_color, - shininess=shininess, - ) - C = self.ambient_color.shape[-1] - for n in ["ambient_color", "diffuse_color", "specular_color"]: - t = getattr(self, n) - if t.shape[-1] != C: - msg = "Expected %s to have shape (N, %d); got %r" - raise ValueError(msg % (n, C, t.shape)) - if self.shininess.shape != torch.Size([self._N]): - msg = "shininess should have shape (N); got %r" - raise ValueError(msg % repr(self.shininess.shape)) - - def clone(self): - other = Materials(device=self.device) - return super().clone(other) diff --git a/pytorch3d/pytorch3d/renderer/mesh/__init__.py b/pytorch3d/pytorch3d/renderer/mesh/__init__.py deleted file mode 100644 index f6bda3f77477f6ea1de5b7d31f0b619b05027f92..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/mesh/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .clip import ( - clip_faces, - ClipFrustum, - ClippedFaces, - convert_clipped_rasterization_to_original_faces, -) -from .rasterize_meshes import rasterize_meshes -from .rasterizer import MeshRasterizer, RasterizationSettings -from .renderer import MeshRenderer, MeshRendererWithFragments -from .shader import ( # DEPRECATED - BlendParams, - HardFlatShader, - HardGouraudShader, - HardPhongShader, - SoftGouraudShader, - SoftPhongShader, - SoftSilhouetteShader, - SplatterPhongShader, - TexturedSoftPhongShader, -) -from .shading import gouraud_shading, phong_shading -from .textures import ( # DEPRECATED - Textures, - TexturesAtlas, - TexturesBase, - TexturesUV, - TexturesVertex, -) - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/renderer/mesh/clip.py b/pytorch3d/pytorch3d/renderer/mesh/clip.py deleted file mode 100644 index 6261f9c5e2fbd50533c388cfa372b438003c7958..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/mesh/clip.py +++ /dev/null @@ -1,724 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Any, List, Optional, Tuple - -import torch - - -""" -Mesh clipping is done before rasterization and is implemented using 4 cases -(these will be referred to throughout the functions below) - -Case 1: the triangle is completely in front of the clipping plane (it is left - unchanged) -Case 2: the triangle is completely behind the clipping plane (it is culled) -Case 3: the triangle has exactly two vertices behind the clipping plane (it is - clipped into a smaller triangle) -Case 4: the triangle has exactly one vertex behind the clipping plane (it is clipped - into a smaller quadrilateral and divided into two triangular faces) - -After rasterization, the Fragments from the clipped/modified triangles -are mapped back to the triangles in the original mesh. The indices, -barycentric coordinates and distances are all relative to original mesh triangles. - -NOTE: It is assumed that all z-coordinates are in world coordinates (not NDC -coordinates), while x and y coordinates may be in NDC/screen coordinates -(i.e after applying a projective transform e.g. cameras.transform_points(points)). -""" - - -class ClippedFaces: - """ - Helper class to store the data for the clipped version of a Meshes object - (face_verts, mesh_to_face_first_idx, num_faces_per_mesh) along with - conversion information (faces_clipped_to_unclipped_idx, barycentric_conversion, - faces_clipped_to_conversion_idx, clipped_faces_neighbor_idx) required to convert - barycentric coordinates from rasterization of the clipped Meshes to barycentric - coordinates in terms of the unclipped Meshes. - - Args: - face_verts: FloatTensor of shape (F_clipped, 3, 3) giving the verts of - each of the clipped faces - mesh_to_face_first_idx: an tensor of shape (N,), where N is the number of meshes - in the batch. The ith element stores the index into face_verts - of the first face of the ith mesh. - num_faces_per_mesh: a tensor of shape (N,) storing the number of faces in each mesh. - faces_clipped_to_unclipped_idx: (F_clipped,) shaped LongTensor mapping each clipped - face back to the face in faces_unclipped (i.e. the faces in the original meshes - obtained using meshes.faces_packed()) - barycentric_conversion: (T, 3, 3) FloatTensor, where barycentric_conversion[i, :, k] - stores the barycentric weights in terms of the world coordinates of the original - (big) unclipped triangle for the kth vertex in the clipped (small) triangle. - If the rasterizer then expresses some NDC coordinate in terms of barycentric - world coordinates for the clipped (small) triangle as alpha_clipped[i,:], - alpha_unclipped[i, :] = barycentric_conversion[i, :, :]*alpha_clipped[i, :] - faces_clipped_to_conversion_idx: (F_clipped,) shaped LongTensor mapping each clipped - face to the applicable row of barycentric_conversion (or set to -1 if conversion is - not needed). - clipped_faces_neighbor_idx: LongTensor of shape (F_clipped,) giving the index of the - neighboring face for each case 4 triangle. e.g. for a case 4 face with f split - into two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx. - Faces which are not clipped and subdivided are set to -1 (i.e cases 1/2/3). - """ - - __slots__ = [ - "face_verts", - "mesh_to_face_first_idx", - "num_faces_per_mesh", - "faces_clipped_to_unclipped_idx", - "barycentric_conversion", - "faces_clipped_to_conversion_idx", - "clipped_faces_neighbor_idx", - ] - - def __init__( - self, - face_verts: torch.Tensor, - mesh_to_face_first_idx: torch.Tensor, - num_faces_per_mesh: torch.Tensor, - faces_clipped_to_unclipped_idx: Optional[torch.Tensor] = None, - barycentric_conversion: Optional[torch.Tensor] = None, - faces_clipped_to_conversion_idx: Optional[torch.Tensor] = None, - clipped_faces_neighbor_idx: Optional[torch.Tensor] = None, - ) -> None: - self.face_verts = face_verts - self.mesh_to_face_first_idx = mesh_to_face_first_idx - self.num_faces_per_mesh = num_faces_per_mesh - self.faces_clipped_to_unclipped_idx = faces_clipped_to_unclipped_idx - self.barycentric_conversion = barycentric_conversion - self.faces_clipped_to_conversion_idx = faces_clipped_to_conversion_idx - self.clipped_faces_neighbor_idx = clipped_faces_neighbor_idx - - -class ClipFrustum: - """ - Helper class to store the information needed to represent a view frustum - (left, right, top, bottom, znear, zfar), which is used to clip or cull triangles. - Values left as None mean that culling should not be performed for that axis. - The parameters perspective_correct, cull, and z_clip_value are used to define - behavior for clipping triangles to the frustum. - - Args: - left: NDC coordinate of the left clipping plane (along x axis) - right: NDC coordinate of the right clipping plane (along x axis) - top: NDC coordinate of the top clipping plane (along y axis) - bottom: NDC coordinate of the bottom clipping plane (along y axis) - znear: world space z coordinate of the near clipping plane - zfar: world space z coordinate of the far clipping plane - perspective_correct: should be set to True for a perspective camera - cull: if True, triangles outside the frustum should be culled - z_clip_value: if not None, then triangles should be clipped (possibly into - smaller triangles) such that z >= z_clip_value. This avoids projections - that go to infinity as z->0 - """ - - __slots__ = [ - "left", - "right", - "top", - "bottom", - "znear", - "zfar", - "perspective_correct", - "cull", - "z_clip_value", - ] - - def __init__( - self, - left: Optional[float] = None, - right: Optional[float] = None, - top: Optional[float] = None, - bottom: Optional[float] = None, - znear: Optional[float] = None, - zfar: Optional[float] = None, - perspective_correct: bool = False, - cull: bool = True, - z_clip_value: Optional[float] = None, - ) -> None: - self.left = left - self.right = right - self.top = top - self.bottom = bottom - self.znear = znear - self.zfar = zfar - self.perspective_correct = perspective_correct - self.cull = cull - self.z_clip_value = z_clip_value - - -def _get_culled_faces(face_verts: torch.Tensor, frustum: ClipFrustum) -> torch.Tensor: - """ - Helper function used to find all the faces in Meshes which are - fully outside the view frustum. A face is culled if all 3 vertices are outside - the same axis of the view frustum. - - Args: - face_verts: An (F,3,3) tensor, where F is the number of faces in - the packed representation of Meshes. The 2nd dimension represents the 3 vertices - of a triangle, and the 3rd dimension stores the xyz locations of each - vertex. - frustum: An instance of the ClipFrustum class with the information on the - position of the clipping planes. - - Returns: - faces_culled: An boolean tensor of size F specifying whether or not each face should be - culled. - """ - clipping_planes = ( - (frustum.left, 0, "<"), - (frustum.right, 0, ">"), - (frustum.top, 1, "<"), - (frustum.bottom, 1, ">"), - (frustum.znear, 2, "<"), - (frustum.zfar, 2, ">"), - ) - faces_culled = torch.zeros( - [face_verts.shape[0]], dtype=torch.bool, device=face_verts.device - ) - for plane in clipping_planes: - clip_value, axis, op = plane - # If clip_value is None then don't clip along that plane - if frustum.cull and clip_value is not None: - if op == "<": - verts_clipped = face_verts[:, axis] < clip_value - else: - verts_clipped = face_verts[:, axis] > clip_value - - # If all verts are clipped then face is outside the frustum - faces_culled |= verts_clipped.sum(1) == 3 - - return faces_culled - - -def _find_verts_intersecting_clipping_plane( - face_verts: torch.Tensor, - p1_face_ind: torch.Tensor, - clip_value: float, - perspective_correct: bool, -) -> Tuple[Tuple[Any, Any, Any, Any, Any], List[Any]]: - r""" - Helper function to find the vertices used to form a new triangle for case 3/case 4 faces. - - Given a list of triangles that are already known to intersect the clipping plane, - solve for the two vertices p4 and p5 where the edges of the triangle intersects the - clipping plane. - - p1 - /\ - / \ - / t \ - _____________p4/______\p5__________ clip_value - / \ - /____ \ - p2 ---____\p3 - - Args: - face_verts: An (F,3,3) tensor, where F is the number of faces in - the packed representation of the Meshes, the 2nd dimension represents - the 3 vertices of the face, and the 3rd dimension stores the xyz locations of each - vertex. The z-coordinates must be represented in world coordinates, while - the xy-coordinates may be in NDC/screen coordinates (i.e. after projection). - p1_face_ind: A tensor of shape (N,) with values in the range of 0 to 2. In each - case 3/case 4 triangle, two vertices are on the same side of the - clipping plane and the 3rd is on the other side. p1_face_ind stores the index of - the vertex that is not on the same side as any other vertex in the triangle. - clip_value: Float, the z-value defining where to clip the triangle. - perspective_correct: Bool, Should be set to true if a perspective camera was - used and xy-coordinates of face_verts_unclipped are in NDC/screen coordinates. - - Returns: - A 2-tuple - p: (p1, p2, p3, p4, p5)) - p_barycentric (p1_bary, p2_bary, p3_bary, p4_bary, p5_bary) - - Each of p1...p5 is an (F,3) tensor of the xyz locations of the 5 points in the - diagram above for case 3/case 4 faces. Each p1_bary...p5_bary is an (F, 3) tensor - storing the barycentric weights used to encode p1...p5 in terms of the the original - unclipped triangle. - """ - - # Let T be number of triangles in face_verts (note that these correspond to the subset - # of case 1 or case 2 triangles). p1_face_ind, p2_face_ind, and p3_face_ind are (T) - # tensors with values in the range of 0 to 2. p1_face_ind stores the index of the - # vertex that is not on the same side as any other vertex in the triangle, and - # p2_face_ind and p3_face_ind are the indices of the other two vertices preserving - # the same counterclockwise or clockwise ordering - T = face_verts.shape[0] - p2_face_ind = torch.remainder(p1_face_ind + 1, 3) - p3_face_ind = torch.remainder(p1_face_ind + 2, 3) - - # p1, p2, p3 are (T, 3) tensors storing the corresponding (x, y, z) coordinates - # of p1_face_ind, p2_face_ind, p3_face_ind - p1 = face_verts.gather(1, p1_face_ind[:, None, None].expand(-1, -1, 3)).squeeze(1) - p2 = face_verts.gather(1, p2_face_ind[:, None, None].expand(-1, -1, 3)).squeeze(1) - p3 = face_verts.gather(1, p3_face_ind[:, None, None].expand(-1, -1, 3)).squeeze(1) - - ################################## - # Solve for intersection point p4 - ################################## - - # p4 is a (T, 3) tensor is the point on the segment between p1 and p2 that - # intersects the clipping plane. - # Solve for the weight w2 such that p1.z*(1-w2) + p2.z*w2 = clip_value. - # Then interpolate p4 = p1*(1-w2) + p2*w2 where it is assumed that z-coordinates - # are expressed in world coordinates (since we want to clip z in world coordinates). - w2 = (p1[:, 2] - clip_value) / (p1[:, 2] - p2[:, 2]) - p4 = p1 * (1 - w2[:, None]) + p2 * w2[:, None] - if perspective_correct: - # It is assumed that all z-coordinates are in world coordinates (not NDC - # coordinates), while x and y coordinates may be in NDC/screen coordinates. - # If x and y are in NDC/screen coordinates and a projective transform was used - # in a perspective camera, then we effectively want to: - # 1. Convert back to world coordinates (by multiplying by z) - # 2. Interpolate using w2 - # 3. Convert back to NDC/screen coordinates (by dividing by the new z=clip_value) - p1_world = p1[:, :2] * p1[:, 2:3] - p2_world = p2[:, :2] * p2[:, 2:3] - p4[:, :2] = (p1_world * (1 - w2[:, None]) + p2_world * w2[:, None]) / clip_value - - ################################## - # Solve for intersection point p5 - ################################## - - # p5 is a (T, 3) tensor representing the point on the segment between p1 and p3 that - # intersects the clipping plane. - # Solve for the weight w3 such that p1.z * (1-w3) + p2.z * w3 = clip_value, - # and then interpolate p5 = p1 * (1-w3) + p3 * w3 - w3 = (p1[:, 2] - clip_value) / (p1[:, 2] - p3[:, 2]) - w3 = w3.detach() - p5 = p1 * (1 - w3[:, None]) + p3 * w3[:, None] - if perspective_correct: - # Again if using a perspective camera, convert back to world coordinates - # interpolate and convert back - p1_world = p1[:, :2] * p1[:, 2:3] - p3_world = p3[:, :2] * p3[:, 2:3] - p5[:, :2] = (p1_world * (1 - w3[:, None]) + p3_world * w3[:, None]) / clip_value - - # Set the barycentric coordinates of p1,p2,p3,p4,p5 in terms of the original - # unclipped triangle in face_verts. - T_idx = torch.arange(T, device=face_verts.device) - p_barycentric = [torch.zeros((T, 3), device=face_verts.device) for i in range(5)] - p_barycentric[0][(T_idx, p1_face_ind)] = 1 - p_barycentric[1][(T_idx, p2_face_ind)] = 1 - p_barycentric[2][(T_idx, p3_face_ind)] = 1 - p_barycentric[3][(T_idx, p1_face_ind)] = 1 - w2 - p_barycentric[3][(T_idx, p2_face_ind)] = w2 - p_barycentric[4][(T_idx, p1_face_ind)] = 1 - w3 - p_barycentric[4][(T_idx, p3_face_ind)] = w3 - - p = (p1, p2, p3, p4, p5) - - return p, p_barycentric - - -################### -# Main Entry point -################### -def clip_faces( - face_verts_unclipped: torch.Tensor, - mesh_to_face_first_idx: torch.Tensor, - num_faces_per_mesh: torch.Tensor, - frustum: ClipFrustum, -) -> ClippedFaces: - """ - Clip a mesh to the portion contained within a view frustum and with z > z_clip_value. - - There are two types of clipping: - 1) Cull triangles that are completely outside the view frustum. This is purely - to save computation by reducing the number of triangles that need to be - rasterized. - 2) Clip triangles into the portion of the triangle where z > z_clip_value. The - clipped region may be a quadrilateral, which results in splitting a triangle - into two triangles. This does not save computation, but is necessary to - correctly rasterize using perspective cameras for triangles that pass through - z <= 0, because NDC/screen coordinates go to infinity at z=0. - - Args: - face_verts_unclipped: An (F, 3, 3) tensor, where F is the number of faces in - the packed representation of Meshes, the 2nd dimension represents the 3 vertices - of the triangle, and the 3rd dimension stores the xyz locations of each - vertex. The z-coordinates must be represented in world coordinates, while - the xy-coordinates may be in NDC/screen coordinates - mesh_to_face_first_idx: an tensor of shape (N,), where N is the number of meshes - in the batch. The ith element stores the index into face_verts_unclipped - of the first face of the ith mesh. - num_faces_per_mesh: a tensor of shape (N,) storing the number of faces in each mesh. - frustum: a ClipFrustum object defining the frustum used to cull faces. - - Returns: - clipped_faces: ClippedFaces object storing a clipped version of the Meshes - along with tensors that can be used to convert barycentric coordinates - returned by rasterization of the clipped meshes into a barycentric - coordinates for the unclipped meshes. - """ - F = face_verts_unclipped.shape[0] - device = face_verts_unclipped.device - - # Triangles completely outside the view frustum will be culled - # faces_culled is of shape (F, ) - faces_culled = _get_culled_faces(face_verts_unclipped, frustum) - - # Triangles that are partially behind the z clipping plane will be clipped to - # smaller triangles - z_clip_value = frustum.z_clip_value - perspective_correct = frustum.perspective_correct - if z_clip_value is not None: - # (F, 3) tensor (where F is the number of triangles) marking whether each vertex - # in a triangle is behind the clipping plane - faces_clipped_verts = face_verts_unclipped[:, :, 2] < z_clip_value - - # (F) dim tensor containing the number of clipped vertices in each triangle - faces_num_clipped_verts = faces_clipped_verts.sum(1) - else: - faces_num_clipped_verts = torch.zeros([F], device=device) - - # If no triangles need to be clipped or culled, avoid unnecessary computation - # and return early - if faces_num_clipped_verts.sum().item() == 0 and faces_culled.sum().item() == 0: - return ClippedFaces( - face_verts=face_verts_unclipped, - mesh_to_face_first_idx=mesh_to_face_first_idx, - num_faces_per_mesh=num_faces_per_mesh, - ) - - ##################################################################################### - # Classify faces into the 4 relevant cases: - # 1) The triangle is completely in front of the clipping plane (it is left - # unchanged) - # 2) The triangle is completely behind the clipping plane (it is culled) - # 3) The triangle has exactly two vertices behind the clipping plane (it is - # clipped into a smaller triangle) - # 4) The triangle has exactly one vertex behind the clipping plane (it is clipped - # into a smaller quadrilateral and split into two triangles) - ##################################################################################### - - faces_unculled = ~faces_culled - # Case 1: no clipped verts or culled faces - cases1_unclipped = (faces_num_clipped_verts == 0) & faces_unculled - case1_unclipped_idx = cases1_unclipped.nonzero(as_tuple=True)[0] - # Case 2: all verts clipped - case2_unclipped = (faces_num_clipped_verts == 3) | faces_culled - # Case 3: two verts clipped - case3_unclipped = (faces_num_clipped_verts == 2) & faces_unculled - case3_unclipped_idx = case3_unclipped.nonzero(as_tuple=True)[0] - # Case 4: one vert clipped - case4_unclipped = (faces_num_clipped_verts == 1) & faces_unculled - case4_unclipped_idx = case4_unclipped.nonzero(as_tuple=True)[0] - - # faces_unclipped_to_clipped_idx is an (F) dim tensor storing the index of each - # face to the corresponding face in face_verts_clipped. - # Each case 2 triangle will be culled (deleted from face_verts_clipped), - # while each case 4 triangle will be split into two smaller triangles - # (replaced by two consecutive triangles in face_verts_clipped) - - # case2_unclipped is an (F,) dim 0/1 tensor of all the case2 faces - # case4_unclipped is an (F,) dim 0/1 tensor of all the case4 faces - faces_delta = case4_unclipped.int() - case2_unclipped.int() - # faces_delta_cum gives the per face change in index. Faces which are - # clipped in the original mesh are mapped to the closest non clipped face - # in face_verts_clipped (this doesn't matter as they are not used - # during rasterization anyway). - faces_delta_cum = faces_delta.cumsum(0) - faces_delta - delta = 1 + case4_unclipped.int() - case2_unclipped.int() - faces_unclipped_to_clipped_idx = delta.cumsum(0) - delta - - ########################################### - # Allocate tensors for the output Meshes. - # These will then be filled in for each case. - ########################################### - F_clipped = ( - F - # pyre-fixme[58]: `+` is not supported for operand types `int` and - # `Union[bool, float, int]`. - + faces_delta_cum[-1].item() - # pyre-fixme[58]: `+` is not supported for operand types `int` and - # `Union[bool, float, int]`. - + faces_delta[-1].item() - ) # Total number of faces in the new Meshes - face_verts_clipped = torch.zeros( - (F_clipped, 3, 3), dtype=face_verts_unclipped.dtype, device=device - ) - faces_clipped_to_unclipped_idx = torch.zeros( - [F_clipped], dtype=torch.int64, device=device - ) - - # Update version of mesh_to_face_first_idx and num_faces_per_mesh applicable to - # face_verts_clipped - mesh_to_face_first_idx_clipped = faces_unclipped_to_clipped_idx[ - mesh_to_face_first_idx - ] - F_clipped_t = torch.full([1], F_clipped, dtype=torch.int64, device=device) - num_faces_next = torch.cat((mesh_to_face_first_idx_clipped[1:], F_clipped_t)) - num_faces_per_mesh_clipped = num_faces_next - mesh_to_face_first_idx_clipped - - ################# Start Case 1 ######################################## - - # Case 1: Triangles are fully visible, copy unchanged triangles into the - # appropriate position in the new list of faces - case1_clipped_idx = faces_unclipped_to_clipped_idx[case1_unclipped_idx] - face_verts_clipped[case1_clipped_idx] = face_verts_unclipped[case1_unclipped_idx] - faces_clipped_to_unclipped_idx[case1_clipped_idx] = case1_unclipped_idx - - # If no triangles need to be clipped but some triangles were culled, avoid - # unnecessary clipping computation - if case3_unclipped_idx.shape[0] + case4_unclipped_idx.shape[0] == 0: - return ClippedFaces( - face_verts=face_verts_clipped, - mesh_to_face_first_idx=mesh_to_face_first_idx_clipped, - num_faces_per_mesh=num_faces_per_mesh_clipped, - faces_clipped_to_unclipped_idx=faces_clipped_to_unclipped_idx, - ) - - ################# End Case 1 ########################################## - - ################# Start Case 3 ######################################## - - # Case 3: exactly two vertices are behind the camera, clipping the triangle into a - # triangle. In the diagram below, we clip the bottom part of the triangle, and add - # new vertices p4 and p5 by intersecting with the clipping plane. The updated - # triangle is the triangle between p4, p1, p5 - # - # p1 (unclipped vertex) - # /\ - # / \ - # / t \ - # _____________p4/______\p5__________ clip_value - # xxxxxxxxxxxxxx/ \xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx - # xxxxxxxxxxxxx/____ \xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx - # xxxxxxxxxx p2 xxxx---____\p3 xxxxxxxxxxxxxxxxxxxxxxxxxxx - # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx - faces_case3 = face_verts_unclipped[case3_unclipped_idx] - - # index (0, 1, or 2) of the vertex in front of the clipping plane - p1_face_ind = torch.where(~faces_clipped_verts[case3_unclipped_idx])[1] - - # Solve for the points p4, p5 that intersect the clipping plane - p, p_barycentric = _find_verts_intersecting_clipping_plane( - faces_case3, p1_face_ind, z_clip_value, perspective_correct - ) - - p1, _, _, p4, p5 = p - p1_barycentric, _, _, p4_barycentric, p5_barycentric = p_barycentric - - # Store clipped triangle - case3_clipped_idx = faces_unclipped_to_clipped_idx[case3_unclipped_idx] - t_barycentric = torch.stack((p4_barycentric, p5_barycentric, p1_barycentric), 2) - face_verts_clipped[case3_clipped_idx] = torch.stack((p4, p5, p1), 1) - faces_clipped_to_unclipped_idx[case3_clipped_idx] = case3_unclipped_idx - - ################# End Case 3 ########################################## - - ################# Start Case 4 ######################################## - - # Case 4: exactly one vertex is behind the camera, clip the triangle into a - # quadrilateral. In the diagram below, we clip the bottom part of the triangle, - # and add new vertices p4 and p5 by intersecting with the cliiping plane. The - # unclipped region is a quadrilateral, which is split into two triangles: - # t1: p4, p2, p5 - # t2: p5, p2, p3 - # - # p3_____________________p2 - # \ __--/ - # \ t2 __-- / - # \ __-- t1 / - # ______________p5\__--_________/p4_________clip_value - # xxxxxxxxxxxxxxxxx\ /xxxxxxxxxxxxxxxxxx - # xxxxxxxxxxxxxxxxxx\ /xxxxxxxxxxxxxxxxxxx - # xxxxxxxxxxxxxxxxxxx\ /xxxxxxxxxxxxxxxxxxxx - # xxxxxxxxxxxxxxxxxxxx\ /xxxxxxxxxxxxxxxxxxxxx - # xxxxxxxxxxxxxxxxxxxxx\ /xxxxxxxxxxxxxxxxxxxxx - # xxxxxxxxxxxxxxxxxxxxxx\ /xxxxxxxxxxxxxxxxxxxxx - # p1 (clipped vertex) - - faces_case4 = face_verts_unclipped[case4_unclipped_idx] - - # index (0, 1, or 2) of the vertex behind the clipping plane - p1_face_ind = torch.where(faces_clipped_verts[case4_unclipped_idx])[1] - - # Solve for the points p4, p5 that intersect the clipping plane - p, p_barycentric = _find_verts_intersecting_clipping_plane( - faces_case4, p1_face_ind, z_clip_value, perspective_correct - ) - _, p2, p3, p4, p5 = p - _, p2_barycentric, p3_barycentric, p4_barycentric, p5_barycentric = p_barycentric - - # Store clipped triangles - case4_clipped_idx = faces_unclipped_to_clipped_idx[case4_unclipped_idx] - face_verts_clipped[case4_clipped_idx] = torch.stack((p4, p2, p5), 1) - face_verts_clipped[case4_clipped_idx + 1] = torch.stack((p5, p2, p3), 1) - t1_barycentric = torch.stack((p4_barycentric, p2_barycentric, p5_barycentric), 2) - t2_barycentric = torch.stack((p5_barycentric, p2_barycentric, p3_barycentric), 2) - faces_clipped_to_unclipped_idx[case4_clipped_idx] = case4_unclipped_idx - faces_clipped_to_unclipped_idx[case4_clipped_idx + 1] = case4_unclipped_idx - - ##################### End Case 4 ######################### - - # Triangles that were clipped (case 3 & case 4) will require conversion of - # barycentric coordinates from being in terms of the smaller clipped triangle to in terms - # of the original big triangle. If there are T clipped triangles, - # barycentric_conversion is a (T, 3, 3) tensor, where barycentric_conversion[i, :, k] - # stores the barycentric weights in terms of the world coordinates of the original - # (big) triangle for the kth vertex in the clipped (small) triangle. If our - # rasterizer then expresses some NDC coordinate in terms of barycentric - # world coordinates for the clipped (small) triangle as alpha_clipped[i,:], - # alpha_unclipped[i, :] = barycentric_conversion[i, :, :]*alpha_clipped[i, :] - barycentric_conversion = torch.cat((t_barycentric, t1_barycentric, t2_barycentric)) - - # faces_clipped_to_conversion_idx is an (F_clipped,) shape tensor mapping each output - # face to the applicable row of barycentric_conversion (or set to -1 if conversion is - # not needed) - faces_to_convert_idx = torch.cat( - (case3_clipped_idx, case4_clipped_idx, case4_clipped_idx + 1), 0 - ) - barycentric_idx = torch.arange( - barycentric_conversion.shape[0], dtype=torch.int64, device=device - ) - faces_clipped_to_conversion_idx = torch.full( - [F_clipped], -1, dtype=torch.int64, device=device - ) - faces_clipped_to_conversion_idx[faces_to_convert_idx] = barycentric_idx - - # clipped_faces_quadrilateral_ind is an (F_clipped) dim tensor - # For case 4 clipped triangles (where a big triangle is split in two smaller triangles), - # store the index of the neighboring clipped triangle. - # This will be needed because if the soft rasterizer includes both - # triangles in the list of top K nearest triangles, we - # should only use the one with the smaller distance. - clipped_faces_neighbor_idx = torch.full( - [F_clipped], -1, dtype=torch.int64, device=device - ) - clipped_faces_neighbor_idx[case4_clipped_idx] = case4_clipped_idx + 1 - clipped_faces_neighbor_idx[case4_clipped_idx + 1] = case4_clipped_idx - - clipped_faces = ClippedFaces( - face_verts=face_verts_clipped, - mesh_to_face_first_idx=mesh_to_face_first_idx_clipped, - num_faces_per_mesh=num_faces_per_mesh_clipped, - faces_clipped_to_unclipped_idx=faces_clipped_to_unclipped_idx, - barycentric_conversion=barycentric_conversion, - faces_clipped_to_conversion_idx=faces_clipped_to_conversion_idx, - clipped_faces_neighbor_idx=clipped_faces_neighbor_idx, - ) - return clipped_faces - - -def convert_clipped_rasterization_to_original_faces( - pix_to_face_clipped, bary_coords_clipped, clipped_faces: ClippedFaces -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Convert rasterization Fragments (expressed as pix_to_face_clipped, - bary_coords_clipped, dists_clipped) of clipped Meshes computed using clip_faces() - to the corresponding rasterization Fragments where barycentric coordinates and - face indices are in terms of the original unclipped Meshes. The distances are - handled in the rasterizer C++/CUDA kernels (i.e. for Cases 1/3 the distance - can be used directly and for Case 4 triangles the distance of the pixel to - the closest of the two subdivided triangles is used). - - Args: - pix_to_face_clipped: LongTensor of shape (N, image_size, image_size, - faces_per_pixel) giving the indices of the nearest faces at each pixel, - sorted in ascending z-order. Concretely - ``pix_to_face_clipped[n, y, x, k] = f`` means that ``faces_verts_clipped[f]`` - is the kth closest face (in the z-direction) to pixel (y, x). Pixels that - are hit by fewer than faces_per_pixel are padded with -1. - bary_coords_clipped: FloatTensor of shape - (N, image_size, image_size, faces_per_pixel, 3) giving the barycentric - coordinates in world coordinates of the nearest faces at each pixel, sorted - in ascending z-order. Concretely, if ``pix_to_face_clipped[n, y, x, k] = f`` - then ``[w0, w1, w2] = bary_coords_clipped[n, y, x, k]`` gives the - barycentric coords for pixel (y, x) relative to the face defined by - ``unproject(face_verts_clipped[f])``. Pixels hit by fewer than - faces_per_pixel are padded with -1. - clipped_faces: an instance of ClippedFaces class giving the auxillary variables - for converting rasterization outputs from clipped to unclipped Meshes. - - Returns: - 3-tuple: (pix_to_face_unclipped, bary_coords_unclipped, dists_unclipped) that - have the same definition as (pix_to_face_clipped, bary_coords_clipped, - dists_clipped) except that they pertain to faces_verts_unclipped instead of - faces_verts_clipped (i.e the original meshes as opposed to the modified meshes) - """ - faces_clipped_to_unclipped_idx = clipped_faces.faces_clipped_to_unclipped_idx - - # If no clipping then return inputs - if ( - faces_clipped_to_unclipped_idx is None - or faces_clipped_to_unclipped_idx.numel() == 0 - ): - return pix_to_face_clipped, bary_coords_clipped - - device = pix_to_face_clipped.device - - # Convert pix_to_face indices to now refer to the faces in the unclipped Meshes. - # Init empty tensor to fill in all the background values which have pix_to_face=-1. - empty = torch.full(pix_to_face_clipped.shape, -1, device=device, dtype=torch.int64) - pix_to_face_unclipped = torch.where( - pix_to_face_clipped != -1, - faces_clipped_to_unclipped_idx[pix_to_face_clipped], - empty, - ) - - # For triangles that were clipped into smaller triangle(s), convert barycentric - # coordinates from being in terms of the clipped triangle to being in terms of the - # original unclipped triangle. - - # barycentric_conversion is a (T, 3, 3) tensor such that - # alpha_unclipped[i, :] = barycentric_conversion[i, :, :]*alpha_clipped[i, :] - barycentric_conversion = clipped_faces.barycentric_conversion - - # faces_clipped_to_conversion_idx is an (F_clipped,) shape tensor mapping each output - # face to the applicable row of barycentric_conversion (or set to -1 if conversion is - # not needed) - faces_clipped_to_conversion_idx = clipped_faces.faces_clipped_to_conversion_idx - - if barycentric_conversion is not None: - bary_coords_unclipped = bary_coords_clipped.clone() - - # Select the subset of faces that require conversion, where N is the sum - # number of case3/case4 triangles that are in the closest k triangles to some - # rasterized pixel. - pix_to_conversion_idx = torch.where( - pix_to_face_clipped != -1, - faces_clipped_to_conversion_idx[pix_to_face_clipped], - empty, - ) - faces_to_convert_mask = pix_to_conversion_idx != -1 - N = faces_to_convert_mask.sum().item() - - # Expand to (N, H, W, K, 3) to be the same shape as barycentric coordinates - faces_to_convert_mask_expanded = faces_to_convert_mask[:, :, :, :, None].expand( - -1, -1, -1, -1, 3 - ) - - # An (N,) dim tensor of indices into barycentric_conversion - conversion_idx_subset = pix_to_conversion_idx[faces_to_convert_mask] - - # An (N, 3, 1) tensor of barycentric coordinates in terms of the clipped triangles - bary_coords_clipped_subset = bary_coords_clipped[faces_to_convert_mask_expanded] - bary_coords_clipped_subset = bary_coords_clipped_subset.reshape((N, 3, 1)) - - # An (N, 3, 3) tensor storing matrices to convert from clipped to unclipped - # barycentric coordinates - bary_conversion_subset = barycentric_conversion[conversion_idx_subset] - - # An (N, 3, 1) tensor of barycentric coordinates in terms of the unclipped triangle - bary_coords_unclipped_subset = bary_conversion_subset.bmm( - bary_coords_clipped_subset - ) - - bary_coords_unclipped_subset = bary_coords_unclipped_subset.reshape([N * 3]) - bary_coords_unclipped[ - faces_to_convert_mask_expanded - ] = bary_coords_unclipped_subset - - # dists for case 4 faces will be handled in the rasterizer - # so no need to modify them here. - else: - bary_coords_unclipped = bary_coords_clipped - - return pix_to_face_unclipped, bary_coords_unclipped diff --git a/pytorch3d/pytorch3d/renderer/mesh/rasterize_meshes.py b/pytorch3d/pytorch3d/renderer/mesh/rasterize_meshes.py deleted file mode 100644 index afcd7496253ced111584ef11a34b750f7f1b3840..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/mesh/rasterize_meshes.py +++ /dev/null @@ -1,763 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -from typing import List, Optional, Tuple, Union - -import numpy as np -import torch -from pytorch3d import _C - -from ..utils import parse_image_size - -from .clip import ( - clip_faces, - ClipFrustum, - convert_clipped_rasterization_to_original_faces, -) - - -# TODO make the epsilon user configurable -kEpsilon = 1e-8 - -# Maximum number of faces per bins for -# coarse-to-fine rasterization -kMaxFacesPerBin = 22 - - -def rasterize_meshes( - meshes, - image_size: Union[int, List[int], Tuple[int, int]] = 256, - blur_radius: float = 0.0, - faces_per_pixel: int = 8, - bin_size: Optional[int] = None, - max_faces_per_bin: Optional[int] = None, - perspective_correct: bool = False, - clip_barycentric_coords: bool = False, - cull_backfaces: bool = False, - z_clip_value: Optional[float] = None, - cull_to_frustum: bool = False, -): - """ - Rasterize a batch of meshes given the shape of the desired output image. - Each mesh is rasterized onto a separate image of shape - (H, W) if `image_size` is a tuple or (image_size, image_size) if it - is an int. - - If the desired image size is non square (i.e. a tuple of (H, W) where H != W) - the aspect ratio needs special consideration. There are two aspect ratios - to be aware of: - - the aspect ratio of each pixel - - the aspect ratio of the output image - The camera can be used to set the pixel aspect ratio. In the rasterizer, - we assume square pixels, but variable image aspect ratio (i.e rectangle images). - - In most cases you will want to set the camera aspect ratio to - 1.0 (i.e. square pixels) and only vary the - `image_size` (i.e. the output image dimensions in pixels). - - Args: - meshes: A Meshes object representing a batch of meshes, batch size N. - image_size: Size in pixels of the output image to be rasterized. - Can optionally be a tuple of (H, W) in the case of non square images. - blur_radius: Float distance in the range [0, 2] used to expand the face - bounding boxes for rasterization. Setting blur radius - results in blurred edges around the shape instead of a - hard boundary. Set to 0 for no blur. - faces_per_pixel (Optional): Number of faces to save per pixel, returning - the nearest faces_per_pixel points along the z-axis. - bin_size: Size of bins to use for coarse-to-fine rasterization. Setting - bin_size=0 uses naive rasterization; setting bin_size=None attempts to - set it heuristically based on the shape of the input. This should not - affect the output, but can affect the speed of the forward pass. - max_faces_per_bin: Only applicable when using coarse-to-fine rasterization - (bin_size > 0); this is the maximum number of faces allowed within each - bin. This should not affect the output values, but can affect - the memory usage in the forward pass. - perspective_correct: Bool, Whether to apply perspective correction when computing - barycentric coordinates for pixels. This should be set to True if a perspective - camera is used. - clip_barycentric_coords: Whether, after any perspective correction is applied - but before the depth is calculated (e.g. for z clipping), - to "correct" a location outside the face (i.e. with a negative - barycentric coordinate) to a position on the edge of the face. - cull_backfaces: Bool, Whether to only rasterize mesh faces which are - visible to the camera. This assumes that vertices of - front-facing triangles are ordered in an anti-clockwise - fashion, and triangles that face away from the camera are - in a clockwise order relative to the current view - direction. NOTE: This will only work if the mesh faces are - consistently defined with counter-clockwise ordering when - viewed from the outside. - z_clip_value: if not None, then triangles will be clipped (and possibly - subdivided into smaller triangles) such that z >= z_clip_value. - This avoids camera projections that go to infinity as z->0. - Default is None as clipping affects rasterization speed and - should only be turned on if explicitly needed. - See clip.py for all the extra computation that is required. - cull_to_frustum: if True, triangles outside the view frustum will be culled. - Culling involves removing all faces which fall outside view frustum. - Default is False so that it is turned on only when needed. - - Returns: - 4-element tuple containing - - - **pix_to_face**: LongTensor of shape - (N, image_size, image_size, faces_per_pixel) - giving the indices of the nearest faces at each pixel, - sorted in ascending z-order. - Concretely ``pix_to_face[n, y, x, k] = f`` means that - ``faces_verts[f]`` is the kth closest face (in the z-direction) - to pixel (y, x). Pixels that are hit by fewer than - faces_per_pixel are padded with -1. - - **zbuf**: FloatTensor of shape (N, image_size, image_size, faces_per_pixel) - giving the NDC z-coordinates of the nearest faces at each pixel, - sorted in ascending z-order. - Concretely, if ``pix_to_face[n, y, x, k] = f`` then - ``zbuf[n, y, x, k] = face_verts[f, 2]``. Pixels hit by fewer than - faces_per_pixel are padded with -1. - - **barycentric**: FloatTensor of shape - (N, image_size, image_size, faces_per_pixel, 3) - giving the barycentric coordinates in NDC units of the - nearest faces at each pixel, sorted in ascending z-order. - Concretely, if ``pix_to_face[n, y, x, k] = f`` then - ``[w0, w1, w2] = barycentric[n, y, x, k]`` gives - the barycentric coords for pixel (y, x) relative to the face - defined by ``face_verts[f]``. Pixels hit by fewer than - faces_per_pixel are padded with -1. - - **pix_dists**: FloatTensor of shape - (N, image_size, image_size, faces_per_pixel) - giving the signed Euclidean distance (in NDC units) in the - x/y plane of each point closest to the pixel. Concretely if - ``pix_to_face[n, y, x, k] = f`` then ``pix_dists[n, y, x, k]`` is the - squared distance between the pixel (y, x) and the face given - by vertices ``face_verts[f]``. Pixels hit with fewer than - ``faces_per_pixel`` are padded with -1. - - In the case that image_size is a tuple of (H, W) then the outputs - will be of shape `(N, H, W, ...)`. - """ - verts_packed = meshes.verts_packed() - faces_packed = meshes.faces_packed() - face_verts = verts_packed[faces_packed] - mesh_to_face_first_idx = meshes.mesh_to_faces_packed_first_idx() - num_faces_per_mesh = meshes.num_faces_per_mesh() - - # In the case that H != W use the max image size to set the bin_size - # to accommodate the num bins constraint in the coarse rasterizer. - # If the ratio of H:W is large this might cause issues as the smaller - # dimension will have fewer bins. - # TODO: consider a better way of setting the bin size. - im_size = parse_image_size(image_size) - max_image_size = max(*im_size) - - clipped_faces_neighbor_idx = None - - if z_clip_value is not None or cull_to_frustum: - # Cull faces outside the view frustum, and clip faces that are partially - # behind the camera into the portion of the triangle in front of the - # camera. This may change the number of faces - frustum = ClipFrustum( - left=-1, - right=1, - top=-1, - bottom=1, - perspective_correct=perspective_correct, - z_clip_value=z_clip_value, - cull=cull_to_frustum, - ) - clipped_faces = clip_faces( - face_verts, mesh_to_face_first_idx, num_faces_per_mesh, frustum=frustum - ) - face_verts = clipped_faces.face_verts - mesh_to_face_first_idx = clipped_faces.mesh_to_face_first_idx - num_faces_per_mesh = clipped_faces.num_faces_per_mesh - - # For case 4 clipped triangles (where a big triangle is split in two smaller triangles), - # need the index of the neighboring clipped triangle as only one can be in - # in the top K closest faces in the rasterization step. - clipped_faces_neighbor_idx = clipped_faces.clipped_faces_neighbor_idx - - if clipped_faces_neighbor_idx is None: - # Set to the default which is all -1s. - clipped_faces_neighbor_idx = torch.full( - size=(face_verts.shape[0],), - fill_value=-1, - device=meshes.device, - dtype=torch.int64, - ) - - # TODO: Choose naive vs coarse-to-fine based on mesh size and image size. - if bin_size is None: - if not verts_packed.is_cuda: - # Binned CPU rasterization is not supported. - bin_size = 0 - else: - # TODO better heuristics for bin size. - if max_image_size <= 64: - bin_size = 8 - else: - # Heuristic based formula maps max_image_size -> bin_size as follows: - # max_image_size < 64 -> 8 - # 16 < max_image_size < 256 -> 16 - # 256 < max_image_size < 512 -> 32 - # 512 < max_image_size < 1024 -> 64 - # 1024 < max_image_size < 2048 -> 128 - bin_size = int(2 ** max(np.ceil(np.log2(max_image_size)) - 4, 4)) - - if bin_size != 0: - # There is a limit on the number of faces per bin in the cuda kernel. - faces_per_bin = 1 + (max_image_size - 1) // bin_size - if faces_per_bin >= kMaxFacesPerBin: - raise ValueError( - "bin_size too small, number of faces per bin must be less than %d; got %d" - % (kMaxFacesPerBin, faces_per_bin) - ) - - if max_faces_per_bin is None: - max_faces_per_bin = int(max(10000, meshes._F / 5)) - - pix_to_face, zbuf, barycentric_coords, dists = _RasterizeFaceVerts.apply( - face_verts, - mesh_to_face_first_idx, - num_faces_per_mesh, - clipped_faces_neighbor_idx, - im_size, - blur_radius, - faces_per_pixel, - bin_size, - max_faces_per_bin, - perspective_correct, - clip_barycentric_coords, - cull_backfaces, - ) - - if z_clip_value is not None or cull_to_frustum: - # If faces were clipped, map the rasterization result to be in terms of the - # original unclipped faces. This may involve converting barycentric - # coordinates - outputs = convert_clipped_rasterization_to_original_faces( - pix_to_face, - barycentric_coords, - # pyre-fixme[61]: `clipped_faces` may not be initialized here. - clipped_faces, - ) - pix_to_face, barycentric_coords = outputs - - return pix_to_face, zbuf, barycentric_coords, dists - - -class _RasterizeFaceVerts(torch.autograd.Function): - """ - Torch autograd wrapper for forward and backward pass of rasterize_meshes - implemented in C++/CUDA. - - Args: - face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions - for faces in all the meshes in the batch. Concretely, - face_verts[f, i] = [x, y, z] gives the coordinates for the - ith vertex of the fth face. These vertices are expected to - be in NDC coordinates in the range [-1, 1]. - mesh_to_face_first_idx: LongTensor of shape (N) giving the index in - faces_verts of the first face in each mesh in - the batch. - num_faces_per_mesh: LongTensor of shape (N) giving the number of faces - for each mesh in the batch. - image_size, blur_radius, faces_per_pixel: same as rasterize_meshes. - perspective_correct: same as rasterize_meshes. - cull_backfaces: same as rasterize_meshes. - - Returns: - same as rasterize_meshes function. - """ - - @staticmethod - # pyre-fixme[14]: `forward` overrides method defined in `Function` inconsistently. - def forward( - ctx, - face_verts: torch.Tensor, - mesh_to_face_first_idx: torch.Tensor, - num_faces_per_mesh: torch.Tensor, - clipped_faces_neighbor_idx: torch.Tensor, - image_size: Union[List[int], Tuple[int, int]] = (256, 256), - blur_radius: float = 0.01, - faces_per_pixel: int = 0, - bin_size: int = 0, - max_faces_per_bin: int = 0, - perspective_correct: bool = False, - clip_barycentric_coords: bool = False, - cull_backfaces: bool = False, - z_clip_value: Optional[float] = None, - cull_to_frustum: bool = True, - ): - # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`. - pix_to_face, zbuf, barycentric_coords, dists = _C.rasterize_meshes( - face_verts, - mesh_to_face_first_idx, - num_faces_per_mesh, - clipped_faces_neighbor_idx, - image_size, - blur_radius, - faces_per_pixel, - bin_size, - max_faces_per_bin, - perspective_correct, - clip_barycentric_coords, - cull_backfaces, - ) - - ctx.save_for_backward(face_verts, pix_to_face) - ctx.mark_non_differentiable(pix_to_face) - ctx.perspective_correct = perspective_correct - ctx.clip_barycentric_coords = clip_barycentric_coords - return pix_to_face, zbuf, barycentric_coords, dists - - @staticmethod - def backward(ctx, grad_pix_to_face, grad_zbuf, grad_barycentric_coords, grad_dists): - grad_face_verts = None - grad_mesh_to_face_first_idx = None - grad_num_faces_per_mesh = None - grad_clipped_faces_neighbor_idx = None - grad_image_size = None - grad_radius = None - grad_faces_per_pixel = None - grad_bin_size = None - grad_max_faces_per_bin = None - grad_perspective_correct = None - grad_clip_barycentric_coords = None - grad_cull_backfaces = None - face_verts, pix_to_face = ctx.saved_tensors - grad_face_verts = _C.rasterize_meshes_backward( - face_verts, - pix_to_face, - grad_zbuf, - grad_barycentric_coords, - grad_dists, - ctx.perspective_correct, - ctx.clip_barycentric_coords, - ) - grads = ( - grad_face_verts, - grad_mesh_to_face_first_idx, - grad_num_faces_per_mesh, - grad_clipped_faces_neighbor_idx, - grad_image_size, - grad_radius, - grad_faces_per_pixel, - grad_bin_size, - grad_max_faces_per_bin, - grad_perspective_correct, - grad_clip_barycentric_coords, - grad_cull_backfaces, - ) - return grads - - -def non_square_ndc_range(S1, S2): - """ - In the case of non square images, we scale the NDC range - to maintain the aspect ratio. The smaller dimension has NDC - range of 2.0. - - Args: - S1: dimension along with the NDC range is needed - S2: the other image dimension - - Returns: - ndc_range: NDC range for dimension S1 - """ - ndc_range = 2.0 - if S1 > S2: - ndc_range = (S1 / S2) * ndc_range - return ndc_range - - -def pix_to_non_square_ndc(i, S1, S2): - """ - The default value of the NDC range is [-1, 1]. - However in the case of non square images, we scale the NDC range - to maintain the aspect ratio. The smaller dimension has NDC - range from [-1, 1] and the other dimension is scaled by - the ratio of H:W. - e.g. for image size (H, W) = (64, 128) - Height NDC range: [-1, 1] - Width NDC range: [-2, 2] - - Args: - i: pixel position on axes S1 - S1: dimension along with i is given - S2: the other image dimension - - Returns: - pixel: NDC coordinate of point i for dimension S1 - """ - # NDC: x-offset + (i * pixel_width + half_pixel_width) - ndc_range = non_square_ndc_range(S1, S2) - offset = ndc_range / 2.0 - return -offset + (ndc_range * i + offset) / S1 - - -def rasterize_meshes_python( # noqa: C901 - meshes, - image_size: Union[int, Tuple[int, int]] = 256, - blur_radius: float = 0.0, - faces_per_pixel: int = 8, - perspective_correct: bool = False, - clip_barycentric_coords: bool = False, - cull_backfaces: bool = False, - z_clip_value: Optional[float] = None, - cull_to_frustum: bool = True, - clipped_faces_neighbor_idx: Optional[torch.Tensor] = None, -): - """ - Naive PyTorch implementation of mesh rasterization with the same inputs and - outputs as the rasterize_meshes function. - - This function is not optimized and is implemented as a comparison for the - C++/CUDA implementations. - """ - N = len(meshes) - H, W = image_size if isinstance(image_size, tuple) else (image_size, image_size) - - K = faces_per_pixel - device = meshes.device - - verts_packed = meshes.verts_packed() - faces_packed = meshes.faces_packed() - faces_verts = verts_packed[faces_packed] - mesh_to_face_first_idx = meshes.mesh_to_faces_packed_first_idx() - num_faces_per_mesh = meshes.num_faces_per_mesh() - - if z_clip_value is not None or cull_to_frustum: - # Cull faces outside the view frustum, and clip faces that are partially - # behind the camera into the portion of the triangle in front of the - # camera. This may change the number of faces - frustum = ClipFrustum( - left=-1, - right=1, - top=-1, - bottom=1, - perspective_correct=perspective_correct, - z_clip_value=z_clip_value, - cull=cull_to_frustum, - ) - clipped_faces = clip_faces( - faces_verts, mesh_to_face_first_idx, num_faces_per_mesh, frustum=frustum - ) - faces_verts = clipped_faces.face_verts - mesh_to_face_first_idx = clipped_faces.mesh_to_face_first_idx - num_faces_per_mesh = clipped_faces.num_faces_per_mesh - - # Initialize output tensors. - face_idxs = torch.full( - (N, H, W, K), fill_value=-1, dtype=torch.int64, device=device - ) - zbuf = torch.full((N, H, W, K), fill_value=-1, dtype=torch.float32, device=device) - bary_coords = torch.full( - (N, H, W, K, 3), fill_value=-1, dtype=torch.float32, device=device - ) - pix_dists = torch.full( - (N, H, W, K), fill_value=-1, dtype=torch.float32, device=device - ) - - # Calculate all face bounding boxes. - x_mins = torch.min(faces_verts[:, :, 0], dim=1, keepdim=True).values - x_maxs = torch.max(faces_verts[:, :, 0], dim=1, keepdim=True).values - y_mins = torch.min(faces_verts[:, :, 1], dim=1, keepdim=True).values - y_maxs = torch.max(faces_verts[:, :, 1], dim=1, keepdim=True).values - z_mins = torch.min(faces_verts[:, :, 2], dim=1, keepdim=True).values - - # Expand by blur radius. - x_mins = x_mins - np.sqrt(blur_radius) - kEpsilon - x_maxs = x_maxs + np.sqrt(blur_radius) + kEpsilon - y_mins = y_mins - np.sqrt(blur_radius) - kEpsilon - y_maxs = y_maxs + np.sqrt(blur_radius) + kEpsilon - - # Loop through meshes in the batch. - for n in range(N): - face_start_idx = mesh_to_face_first_idx[n] - face_stop_idx = face_start_idx + num_faces_per_mesh[n] - - # Iterate through the horizontal lines of the image from top to bottom. - for yi in range(H): - # Y coordinate of one end of the image. Reverse the ordering - # of yi so that +Y is pointing up in the image. - yfix = H - 1 - yi - yf = pix_to_non_square_ndc(yfix, H, W) - - # Iterate through pixels on this horizontal line, left to right. - for xi in range(W): - # X coordinate of one end of the image. Reverse the ordering - # of xi so that +X is pointing to the left in the image. - xfix = W - 1 - xi - xf = pix_to_non_square_ndc(xfix, W, H) - top_k_points = [] - - # Check whether each face in the mesh affects this pixel. - for f in range(face_start_idx, face_stop_idx): - face = faces_verts[f].squeeze() - v0, v1, v2 = face.unbind(0) - - face_area = edge_function(v0, v1, v2) - - # Ignore triangles facing away from the camera. - back_face = face_area < 0 - if cull_backfaces and back_face: - continue - - # Ignore faces which have zero area. - if face_area == 0.0: - continue - - outside_bbox = ( - xf < x_mins[f] - or xf > x_maxs[f] - or yf < y_mins[f] - or yf > y_maxs[f] - ) - - # Faces with at least one vertex behind the camera won't - # render correctly and should be removed or clipped before - # calling the rasterizer - if z_mins[f] < kEpsilon: - continue - - # Check if pixel is outside of face bbox. - if outside_bbox: - continue - - # Compute barycentric coordinates and pixel z distance. - pxy = torch.tensor([xf, yf], dtype=torch.float32, device=device) - - bary = barycentric_coordinates(pxy, v0[:2], v1[:2], v2[:2]) - if perspective_correct: - z0, z1, z2 = v0[2], v1[2], v2[2] - l0, l1, l2 = bary[0], bary[1], bary[2] - top0 = l0 * z1 * z2 - top1 = z0 * l1 * z2 - top2 = z0 * z1 * l2 - bot = top0 + top1 + top2 - bary = torch.stack([top0 / bot, top1 / bot, top2 / bot]) - - # Check if inside before clipping - inside = all(x > 0.0 for x in bary) - - # Barycentric clipping - if clip_barycentric_coords: - bary = barycentric_coordinates_clip(bary) - # use clipped barycentric coords to calculate the z value - pz = bary[0] * v0[2] + bary[1] * v1[2] + bary[2] * v2[2] - - # Check if point is behind the image. - if pz < 0: - continue - - # Calculate signed 2D distance from point to face. - # Points inside the triangle have negative distance. - dist = point_triangle_distance(pxy, v0[:2], v1[:2], v2[:2]) - - # Add an epsilon to prevent errors when comparing distance - # to blur radius. - if not inside and dist >= blur_radius: - continue - - # Handle the case where a face (f) partially behind the image plane is - # clipped to a quadrilateral and then split into two faces (t1, t2). - top_k_idx = -1 - if ( - clipped_faces_neighbor_idx is not None - and clipped_faces_neighbor_idx[f] != -1 - ): - neighbor_idx = clipped_faces_neighbor_idx[f] - # See if neighbor_idx is in top_k and find index - top_k_idx = [ - i - for i, val in enumerate(top_k_points) - if val[1] == neighbor_idx - ] - top_k_idx = top_k_idx[0] if len(top_k_idx) > 0 else -1 - - if top_k_idx != -1 and dist < top_k_points[top_k_idx][3]: - # Overwrite the neighbor with current face info - top_k_points[top_k_idx] = (pz, f, bary, dist, inside) - else: - # Handle as a normal face - top_k_points.append((pz, f, bary, dist, inside)) - - top_k_points.sort() - if len(top_k_points) > K: - top_k_points = top_k_points[:K] - - # Save to output tensors. - for k, (pz, f, bary, dist, inside) in enumerate(top_k_points): - zbuf[n, yi, xi, k] = pz - face_idxs[n, yi, xi, k] = f - bary_coords[n, yi, xi, k, 0] = bary[0] - bary_coords[n, yi, xi, k, 1] = bary[1] - bary_coords[n, yi, xi, k, 2] = bary[2] - # Write the signed distance - pix_dists[n, yi, xi, k] = -dist if inside else dist - - if z_clip_value is not None or cull_to_frustum: - # If faces were clipped, map the rasterization result to be in terms of the - # original unclipped faces. This may involve converting barycentric - # coordinates - (face_idxs, bary_coords,) = convert_clipped_rasterization_to_original_faces( - face_idxs, - bary_coords, - # pyre-fixme[61]: `clipped_faces` may not be initialized here. - clipped_faces, - ) - - return face_idxs, zbuf, bary_coords, pix_dists - - -def edge_function(p, v0, v1): - r""" - Determines whether a point p is on the right side of a 2D line segment - given by the end points v0, v1. - - Args: - p: (x, y) Coordinates of a point. - v0, v1: (x, y) Coordinates of the end points of the edge. - - Returns: - area: The signed area of the parallelogram given by the vectors - - .. code-block:: python - - B = p - v0 - A = v1 - v0 - - v1 ________ - /\ / - A / \ / - / \ / - v0 /______\/ - B p - - The area can also be interpreted as the cross product A x B. - If the sign of the area is positive, the point p is on the - right side of the edge. Negative area indicates the point is on - the left side of the edge. i.e. for an edge v1 - v0 - - .. code-block:: python - - v1 - / - / - - / + - / - / - v0 - """ - return (p[0] - v0[0]) * (v1[1] - v0[1]) - (p[1] - v0[1]) * (v1[0] - v0[0]) - - -def barycentric_coordinates_clip(bary): - """ - Clip negative barycentric coordinates to 0.0 and renormalize so - the barycentric coordinates for a point sum to 1. When the blur_radius - is greater than 0, a face will still be recorded as overlapping a pixel - if the pixel is outside the face. In this case at least one of the - barycentric coordinates for the pixel relative to the face will be negative. - Clipping will ensure that the texture and z buffer are interpolated correctly. - - Args: - bary: tuple of barycentric coordinates - - Returns - bary_clip: (w0, w1, w2) barycentric coordinates with no negative values. - """ - # Only negative values are clamped to 0.0. - w0_clip = torch.clamp(bary[0], min=0.0) - w1_clip = torch.clamp(bary[1], min=0.0) - w2_clip = torch.clamp(bary[2], min=0.0) - bary_sum = torch.clamp(w0_clip + w1_clip + w2_clip, min=1e-5) - w0_clip = w0_clip / bary_sum - w1_clip = w1_clip / bary_sum - w2_clip = w2_clip / bary_sum - - return (w0_clip, w1_clip, w2_clip) - - -def barycentric_coordinates(p, v0, v1, v2): - """ - Compute the barycentric coordinates of a point relative to a triangle. - - Args: - p: Coordinates of a point. - v0, v1, v2: Coordinates of the triangle vertices. - - Returns - bary: (w0, w1, w2) barycentric coordinates in the range [0, 1]. - """ - area = edge_function(v2, v0, v1) + kEpsilon # 2 x face area. - w0 = edge_function(p, v1, v2) / area - w1 = edge_function(p, v2, v0) / area - w2 = edge_function(p, v0, v1) / area - return (w0, w1, w2) - - -def point_line_distance(p, v0, v1): - """ - Return minimum distance between line segment (v1 - v0) and point p. - - Args: - p: Coordinates of a point. - v0, v1: Coordinates of the end points of the line segment. - - Returns: - non-square distance to the boundary of the triangle. - - Consider the line extending the segment - this can be parameterized as - ``v0 + t (v1 - v0)``. - - First find the projection of point p onto the line. It falls where - ``t = [(p - v0) . (v1 - v0)] / |v1 - v0|^2`` - where . is the dot product. - - The parameter t is clamped from [0, 1] to handle points outside the - segment (v1 - v0). - - Once the projection of the point on the segment is known, the distance from - p to the projection gives the minimum distance to the segment. - """ - if p.shape != v0.shape != v1.shape: - raise ValueError("All points must have the same number of coordinates") - - v1v0 = v1 - v0 - l2 = v1v0.dot(v1v0) # |v1 - v0|^2 - if l2 <= kEpsilon: - return (p - v1).dot(p - v1) # v0 == v1 - - t = v1v0.dot(p - v0) / l2 - t = torch.clamp(t, min=0.0, max=1.0) - p_proj = v0 + t * v1v0 - delta_p = p_proj - p - return delta_p.dot(delta_p) - - -def point_triangle_distance(p, v0, v1, v2): - """ - Return shortest distance between a point and a triangle. - - Args: - p: Coordinates of a point. - v0, v1, v2: Coordinates of the three triangle vertices. - - Returns: - shortest absolute distance from the point to the triangle. - """ - if p.shape != v0.shape != v1.shape != v2.shape: - raise ValueError("All points must have the same number of coordinates") - - e01_dist = point_line_distance(p, v0, v1) - e02_dist = point_line_distance(p, v0, v2) - e12_dist = point_line_distance(p, v1, v2) - edge_dists_min = torch.min(torch.min(e01_dist, e02_dist), e12_dist) - - return edge_dists_min diff --git a/pytorch3d/pytorch3d/renderer/mesh/rasterizer.py b/pytorch3d/pytorch3d/renderer/mesh/rasterizer.py deleted file mode 100644 index 5ef2760393bdebcf42f34a6bd3972cbafe2383f2..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/mesh/rasterizer.py +++ /dev/null @@ -1,271 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from dataclasses import dataclass -from typing import Optional, Tuple, Union - -import torch -import torch.nn as nn -from pytorch3d.renderer.cameras import try_get_projection_transform - -from .rasterize_meshes import rasterize_meshes - - -@dataclass(frozen=True) -class Fragments: - """ - A dataclass representing the outputs of a rasterizer. Can be detached from the - computational graph in order to stop the gradients from flowing through the - rasterizer. - - Members: - pix_to_face: - LongTensor of shape (N, image_size, image_size, faces_per_pixel) giving - the indices of the nearest faces at each pixel, sorted in ascending - z-order. Concretely ``pix_to_face[n, y, x, k] = f`` means that - ``faces_verts[f]`` is the kth closest face (in the z-direction) to pixel - (y, x). Pixels that are hit by fewer than faces_per_pixel are padded with - -1. - - zbuf: - FloatTensor of shape (N, image_size, image_size, faces_per_pixel) giving - the NDC z-coordinates of the nearest faces at each pixel, sorted in - ascending z-order. Concretely, if ``pix_to_face[n, y, x, k] = f`` then - ``zbuf[n, y, x, k] = face_verts[f, 2]``. Pixels hit by fewer than - faces_per_pixel are padded with -1. - - bary_coords: - FloatTensor of shape (N, image_size, image_size, faces_per_pixel, 3) - giving the barycentric coordinates in NDC units of the nearest faces at - each pixel, sorted in ascending z-order. Concretely, if ``pix_to_face[n, - y, x, k] = f`` then ``[w0, w1, w2] = barycentric[n, y, x, k]`` gives the - barycentric coords for pixel (y, x) relative to the face defined by - ``face_verts[f]``. Pixels hit by fewer than faces_per_pixel are padded - with -1. - - dists: - FloatTensor of shape (N, image_size, image_size, faces_per_pixel) giving - the signed Euclidean distance (in NDC units) in the x/y plane of each - point closest to the pixel. Concretely if ``pix_to_face[n, y, x, k] = f`` - then ``pix_dists[n, y, x, k]`` is the squared distance between the pixel - (y, x) and the face given by vertices ``face_verts[f]``. Pixels hit with - fewer than ``faces_per_pixel`` are padded with -1. - """ - - pix_to_face: torch.Tensor - zbuf: torch.Tensor - bary_coords: torch.Tensor - dists: Optional[torch.Tensor] - - def detach(self) -> "Fragments": - return Fragments( - pix_to_face=self.pix_to_face, - zbuf=self.zbuf.detach(), - bary_coords=self.bary_coords.detach(), - dists=self.dists.detach() if self.dists is not None else self.dists, - ) - - -@dataclass -class RasterizationSettings: - """ - Class to store the mesh rasterization params with defaults - - Members: - image_size: Either common height and width or (height, width), in pixels. - blur_radius: Float distance in the range [0, 2] used to expand the face - bounding boxes for rasterization. Setting blur radius - results in blurred edges around the shape instead of a - hard boundary. Set to 0 for no blur. - faces_per_pixel: (int) Number of faces to keep track of per pixel. - We return the nearest faces_per_pixel faces along the z-axis. - bin_size: Size of bins to use for coarse-to-fine rasterization. Setting - bin_size=0 uses naive rasterization; setting bin_size=None attempts - to set it heuristically based on the shape of the input. This should - not affect the output, but can affect the speed of the forward pass. - max_faces_opengl: Max number of faces in any mesh we will rasterize. Used only by - MeshRasterizerOpenGL to pre-allocate OpenGL memory. - max_faces_per_bin: Only applicable when using coarse-to-fine - rasterization (bin_size != 0); this is the maximum number of faces - allowed within each bin. This should not affect the output values, - but can affect the memory usage in the forward pass. - Setting max_faces_per_bin=None attempts to set with a heuristic. - perspective_correct: Whether to apply perspective correction when - computing barycentric coordinates for pixels. - None (default) means make correction if the camera uses perspective. - clip_barycentric_coords: Whether, after any perspective correction - is applied but before the depth is calculated (e.g. for - z clipping), to "correct" a location outside the face (i.e. with - a negative barycentric coordinate) to a position on the edge of the - face. None (default) means clip if blur_radius > 0, which is a condition - under which such outside-face-points are likely. - cull_backfaces: Whether to only rasterize mesh faces which are - visible to the camera. This assumes that vertices of - front-facing triangles are ordered in an anti-clockwise - fashion, and triangles that face away from the camera are - in a clockwise order relative to the current view - direction. NOTE: This will only work if the mesh faces are - consistently defined with counter-clockwise ordering when - viewed from the outside. - z_clip_value: if not None, then triangles will be clipped (and possibly - subdivided into smaller triangles) such that z >= z_clip_value. - This avoids camera projections that go to infinity as z->0. - Default is None as clipping affects rasterization speed and - should only be turned on if explicitly needed. - See clip.py for all the extra computation that is required. - cull_to_frustum: Whether to cull triangles outside the view frustum. - Culling involves removing all faces which fall outside view frustum. - Default is False for performance as often not needed. - """ - - image_size: Union[int, Tuple[int, int]] = 256 - blur_radius: float = 0.0 - faces_per_pixel: int = 1 - bin_size: Optional[int] = None - max_faces_opengl: int = 10_000_000 - max_faces_per_bin: Optional[int] = None - perspective_correct: Optional[bool] = None - clip_barycentric_coords: Optional[bool] = None - cull_backfaces: bool = False - z_clip_value: Optional[float] = None - cull_to_frustum: bool = False - - -class MeshRasterizer(nn.Module): - """ - This class implements methods for rasterizing a batch of heterogeneous - Meshes. - """ - - def __init__(self, cameras=None, raster_settings=None) -> None: - """ - Args: - cameras: A cameras object which has a `transform_points` method - which returns the transformed points after applying the - world-to-view and view-to-ndc transformations. - raster_settings: the parameters for rasterization. This should be a - named tuple. - - All these initial settings can be overridden by passing keyword - arguments to the forward function. - """ - super().__init__() - if raster_settings is None: - raster_settings = RasterizationSettings() - - self.cameras = cameras - self.raster_settings = raster_settings - - def to(self, device): - # Manually move to device cameras as it is not a subclass of nn.Module - if self.cameras is not None: - self.cameras = self.cameras.to(device) - return self - - def transform(self, meshes_world, **kwargs) -> torch.Tensor: - """ - Args: - meshes_world: a Meshes object representing a batch of meshes with - vertex coordinates in world space. - - Returns: - meshes_proj: a Meshes object with the vertex positions projected - in NDC space - - NOTE: keeping this as a separate function for readability but it could - be moved into forward. - """ - cameras = kwargs.get("cameras", self.cameras) - if cameras is None: - msg = "Cameras must be specified either at initialization \ - or in the forward pass of MeshRasterizer" - raise ValueError(msg) - - n_cameras = len(cameras) - if n_cameras != 1 and n_cameras != len(meshes_world): - msg = "Wrong number (%r) of cameras for %r meshes" - raise ValueError(msg % (n_cameras, len(meshes_world))) - - verts_world = meshes_world.verts_padded() - - # NOTE: Retaining view space z coordinate for now. - # TODO: Revisit whether or not to transform z coordinate to [-1, 1] or - # [0, 1] range. - eps = kwargs.get("eps", None) - verts_view = cameras.get_world_to_view_transform(**kwargs).transform_points( - verts_world, eps=eps - ) - to_ndc_transform = cameras.get_ndc_camera_transform(**kwargs) - projection_transform = try_get_projection_transform(cameras, kwargs) - if projection_transform is not None: - projection_transform = projection_transform.compose(to_ndc_transform) - verts_ndc = projection_transform.transform_points(verts_view, eps=eps) - else: - # Call transform_points instead of explicitly composing transforms to handle - # the case, where camera class does not have a projection matrix form. - verts_proj = cameras.transform_points(verts_world, eps=eps) - verts_ndc = to_ndc_transform.transform_points(verts_proj, eps=eps) - - verts_ndc[..., 2] = verts_view[..., 2] - meshes_ndc = meshes_world.update_padded(new_verts_padded=verts_ndc) - return meshes_ndc - - def forward(self, meshes_world, **kwargs) -> Fragments: - """ - Args: - meshes_world: a Meshes object representing a batch of meshes with - coordinates in world space. - Returns: - Fragments: Rasterization outputs as a named tuple. - """ - meshes_proj = self.transform(meshes_world, **kwargs) - raster_settings = kwargs.get("raster_settings", self.raster_settings) - - # By default, turn on clip_barycentric_coords if blur_radius > 0. - # When blur_radius > 0, a face can be matched to a pixel that is outside the - # face, resulting in negative barycentric coordinates. - clip_barycentric_coords = raster_settings.clip_barycentric_coords - if clip_barycentric_coords is None: - clip_barycentric_coords = raster_settings.blur_radius > 0.0 - - # If not specified, infer perspective_correct and z_clip_value from the camera - cameras = kwargs.get("cameras", self.cameras) - if raster_settings.perspective_correct is not None: - perspective_correct = raster_settings.perspective_correct - else: - perspective_correct = cameras.is_perspective() - if raster_settings.z_clip_value is not None: - z_clip = raster_settings.z_clip_value - else: - znear = cameras.get_znear() - if isinstance(znear, torch.Tensor): - znear = znear.min().item() - z_clip = None if not perspective_correct or znear is None else znear / 2 - - # By default, turn on clip_barycentric_coords if blur_radius > 0. - # When blur_radius > 0, a face can be matched to a pixel that is outside the - # face, resulting in negative barycentric coordinates. - - pix_to_face, zbuf, bary_coords, dists = rasterize_meshes( - meshes_proj, - image_size=raster_settings.image_size, - blur_radius=raster_settings.blur_radius, - faces_per_pixel=raster_settings.faces_per_pixel, - bin_size=raster_settings.bin_size, - max_faces_per_bin=raster_settings.max_faces_per_bin, - clip_barycentric_coords=clip_barycentric_coords, - perspective_correct=perspective_correct, - cull_backfaces=raster_settings.cull_backfaces, - z_clip_value=z_clip, - cull_to_frustum=raster_settings.cull_to_frustum, - ) - - return Fragments( - pix_to_face=pix_to_face, - zbuf=zbuf, - bary_coords=bary_coords, - dists=dists, - ) diff --git a/pytorch3d/pytorch3d/renderer/mesh/renderer.py b/pytorch3d/pytorch3d/renderer/mesh/renderer.py deleted file mode 100644 index 98576a9fcc6797090b493cfbd05e2234e79ba66b..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/mesh/renderer.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Tuple - -import torch -import torch.nn as nn - -from ...structures.meshes import Meshes - -# A renderer class should be initialized with a -# function for rasterization and a function for shading. -# The rasterizer should: -# - transform inputs from world -> screen space -# - rasterize inputs -# - return fragments -# The shader can take fragments as input along with any other properties of -# the scene and generate images. - -# E.g. rasterize inputs and then shade -# -# fragments = self.rasterize(meshes) -# images = self.shader(fragments, meshes) -# return images - - -class MeshRenderer(nn.Module): - """ - A class for rendering a batch of heterogeneous meshes. The class should - be initialized with a rasterizer (a MeshRasterizer or a MeshRasterizerOpenGL) - and shader class which each have a forward function. - """ - - def __init__(self, rasterizer, shader) -> None: - super().__init__() - self.rasterizer = rasterizer - self.shader = shader - - def to(self, device): - # Rasterizer and shader have submodules which are not of type nn.Module - self.rasterizer.to(device) - self.shader.to(device) - return self - - def forward(self, meshes_world: Meshes, **kwargs) -> torch.Tensor: - """ - Render a batch of images from a batch of meshes by rasterizing and then - shading. - - NOTE: If the blur radius for rasterization is > 0.0, some pixels can - have one or more barycentric coordinates lying outside the range [0, 1]. - For a pixel with out of bounds barycentric coordinates with respect to a - face f, clipping is required before interpolating the texture uv - coordinates and z buffer so that the colors and depths are limited to - the range for the corresponding face. - For this set rasterizer.raster_settings.clip_barycentric_coords=True - """ - fragments = self.rasterizer(meshes_world, **kwargs) - images = self.shader(fragments, meshes_world, **kwargs) - - return images - - -class MeshRendererWithFragments(nn.Module): - """ - A class for rendering a batch of heterogeneous meshes. The class should - be initialized with a rasterizer (a MeshRasterizer or a MeshRasterizerOpenGL) - and shader class which each have a forward function. - - In the forward pass this class returns the `fragments` from which intermediate - values such as the depth map can be easily extracted e.g. - - .. code-block:: python - images, fragments = renderer(meshes) - depth = fragments.zbuf - """ - - def __init__(self, rasterizer, shader) -> None: - super().__init__() - self.rasterizer = rasterizer - self.shader = shader - - def to(self, device): - # Rasterizer and shader have submodules which are not of type nn.Module - self.rasterizer.to(device) - self.shader.to(device) - return self - - def forward( - self, meshes_world: Meshes, **kwargs - ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Render a batch of images from a batch of meshes by rasterizing and then - shading. - - NOTE: If the blur radius for rasterization is > 0.0, some pixels can - have one or more barycentric coordinates lying outside the range [0, 1]. - For a pixel with out of bounds barycentric coordinates with respect to a - face f, clipping is required before interpolating the texture uv - coordinates and z buffer so that the colors and depths are limited to - the range for the corresponding face. - For this set rasterizer.raster_settings.clip_barycentric_coords=True - """ - fragments = self.rasterizer(meshes_world, **kwargs) - images = self.shader(fragments, meshes_world, **kwargs) - - return images, fragments diff --git a/pytorch3d/pytorch3d/renderer/mesh/shader.py b/pytorch3d/pytorch3d/renderer/mesh/shader.py deleted file mode 100644 index 40e9cd17d0e12c6cff0d03a48e89a0b151a228de..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/mesh/shader.py +++ /dev/null @@ -1,442 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import warnings -from typing import Optional - -import torch -import torch.nn as nn - -from ...common.datatypes import Device -from ...structures.meshes import Meshes -from ..blending import ( - BlendParams, - hard_rgb_blend, - sigmoid_alpha_blend, - softmax_rgb_blend, -) -from ..lighting import PointLights -from ..materials import Materials -from ..splatter_blend import SplatterBlender -from ..utils import TensorProperties -from .rasterizer import Fragments -from .shading import ( - _phong_shading_with_pixels, - flat_shading, - gouraud_shading, - phong_shading, -) - - -# A Shader should take as input fragments from the output of rasterization -# along with scene params and output images. A shader could perform operations -# such as: -# - interpolate vertex attributes for all the fragments -# - sample colors from a texture map -# - apply per pixel lighting -# - blend colors across top K faces per pixel. -class ShaderBase(nn.Module): - def __init__( - self, - device: Device = "cpu", - cameras: Optional[TensorProperties] = None, - lights: Optional[TensorProperties] = None, - materials: Optional[Materials] = None, - blend_params: Optional[BlendParams] = None, - ) -> None: - super().__init__() - self.lights = lights if lights is not None else PointLights(device=device) - self.materials = ( - materials if materials is not None else Materials(device=device) - ) - self.cameras = cameras - self.blend_params = blend_params if blend_params is not None else BlendParams() - - def _get_cameras(self, **kwargs): - cameras = kwargs.get("cameras", self.cameras) - if cameras is None: - msg = "Cameras must be specified either at initialization \ - or in the forward pass of the shader." - raise ValueError(msg) - - return cameras - - # pyre-fixme[14]: `to` overrides method defined in `Module` inconsistently. - def to(self, device: Device): - # Manually move to device modules which are not subclasses of nn.Module - cameras = self.cameras - if cameras is not None: - self.cameras = cameras.to(device) - self.materials = self.materials.to(device) - self.lights = self.lights.to(device) - return self - - -class HardPhongShader(ShaderBase): - """ - Per pixel lighting - the lighting model is applied using the interpolated - coordinates and normals for each pixel. The blending function hard assigns - the color of the closest face for each pixel. - - To use the default values, simply initialize the shader with the desired - device e.g. - - .. code-block:: - - shader = HardPhongShader(device=torch.device("cuda:0")) - """ - - def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor: - cameras = super()._get_cameras(**kwargs) - texels = meshes.sample_textures(fragments) - lights = kwargs.get("lights", self.lights) - materials = kwargs.get("materials", self.materials) - blend_params = kwargs.get("blend_params", self.blend_params) - colors = phong_shading( - meshes=meshes, - fragments=fragments, - texels=texels, - lights=lights, - cameras=cameras, - materials=materials, - ) - images = hard_rgb_blend(colors, fragments, blend_params) - return images - - -class SoftPhongShader(ShaderBase): - """ - Per pixel lighting - the lighting model is applied using the interpolated - coordinates and normals for each pixel. The blending function returns the - soft aggregated color using all the faces per pixel. - - To use the default values, simply initialize the shader with the desired - device e.g. - - .. code-block:: - - shader = SoftPhongShader(device=torch.device("cuda:0")) - """ - - def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor: - cameras = super()._get_cameras(**kwargs) - texels = meshes.sample_textures(fragments) - lights = kwargs.get("lights", self.lights) - materials = kwargs.get("materials", self.materials) - blend_params = kwargs.get("blend_params", self.blend_params) - colors = phong_shading( - meshes=meshes, - fragments=fragments, - texels=texels, - lights=lights, - cameras=cameras, - materials=materials, - ) - znear = kwargs.get("znear", getattr(cameras, "znear", 1.0)) - zfar = kwargs.get("zfar", getattr(cameras, "zfar", 100.0)) - images = softmax_rgb_blend( - colors, fragments, blend_params, znear=znear, zfar=zfar - ) - return images - - -class HardGouraudShader(ShaderBase): - """ - Per vertex lighting - the lighting model is applied to the vertex colors and - the colors are then interpolated using the barycentric coordinates to - obtain the colors for each pixel. The blending function hard assigns - the color of the closest face for each pixel. - - To use the default values, simply initialize the shader with the desired - device e.g. - - .. code-block:: - - shader = HardGouraudShader(device=torch.device("cuda:0")) - """ - - def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor: - cameras = super()._get_cameras(**kwargs) - lights = kwargs.get("lights", self.lights) - materials = kwargs.get("materials", self.materials) - blend_params = kwargs.get("blend_params", self.blend_params) - - # As Gouraud shading applies the illumination to the vertex - # colors, the interpolated pixel texture is calculated in the - # shading step. In comparison, for Phong shading, the pixel - # textures are computed first after which the illumination is - # applied. - pixel_colors = gouraud_shading( - meshes=meshes, - fragments=fragments, - lights=lights, - cameras=cameras, - materials=materials, - ) - images = hard_rgb_blend(pixel_colors, fragments, blend_params) - return images - - -class SoftGouraudShader(ShaderBase): - """ - Per vertex lighting - the lighting model is applied to the vertex colors and - the colors are then interpolated using the barycentric coordinates to - obtain the colors for each pixel. The blending function returns the - soft aggregated color using all the faces per pixel. - - To use the default values, simply initialize the shader with the desired - device e.g. - - .. code-block:: - - shader = SoftGouraudShader(device=torch.device("cuda:0")) - """ - - def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor: - cameras = super()._get_cameras(**kwargs) - lights = kwargs.get("lights", self.lights) - materials = kwargs.get("materials", self.materials) - pixel_colors = gouraud_shading( - meshes=meshes, - fragments=fragments, - lights=lights, - cameras=cameras, - materials=materials, - ) - znear = kwargs.get("znear", getattr(cameras, "znear", 1.0)) - zfar = kwargs.get("zfar", getattr(cameras, "zfar", 100.0)) - images = softmax_rgb_blend( - pixel_colors, fragments, self.blend_params, znear=znear, zfar=zfar - ) - return images - - -def TexturedSoftPhongShader( - device: Device = "cpu", - cameras: Optional[TensorProperties] = None, - lights: Optional[TensorProperties] = None, - materials: Optional[Materials] = None, - blend_params: Optional[BlendParams] = None, -) -> SoftPhongShader: - """ - TexturedSoftPhongShader class has been DEPRECATED. Use SoftPhongShader instead. - Preserving TexturedSoftPhongShader as a function for backwards compatibility. - """ - warnings.warn( - """TexturedSoftPhongShader is now deprecated; - use SoftPhongShader instead.""", - PendingDeprecationWarning, - ) - return SoftPhongShader( - device=device, - cameras=cameras, - lights=lights, - materials=materials, - blend_params=blend_params, - ) - - -class HardFlatShader(ShaderBase): - """ - Per face lighting - the lighting model is applied using the average face - position and the face normal. The blending function hard assigns - the color of the closest face for each pixel. - - To use the default values, simply initialize the shader with the desired - device e.g. - - .. code-block:: - - shader = HardFlatShader(device=torch.device("cuda:0")) - """ - - def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor: - cameras = super()._get_cameras(**kwargs) - texels = meshes.sample_textures(fragments) - lights = kwargs.get("lights", self.lights) - materials = kwargs.get("materials", self.materials) - blend_params = kwargs.get("blend_params", self.blend_params) - colors = flat_shading( - meshes=meshes, - fragments=fragments, - texels=texels, - lights=lights, - cameras=cameras, - materials=materials, - ) - images = hard_rgb_blend(colors, fragments, blend_params) - return images - - -class SoftSilhouetteShader(nn.Module): - """ - Calculate the silhouette by blending the top K faces for each pixel based - on the 2d euclidean distance of the center of the pixel to the mesh face. - - Use this shader for generating silhouettes similar to SoftRasterizer [0]. - - .. note:: - - To be consistent with SoftRasterizer, initialize the - RasterizationSettings for the rasterizer with - `blur_radius = np.log(1. / 1e-4 - 1.) * blend_params.sigma` - - [0] Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based - 3D Reasoning', ICCV 2019 - """ - - def __init__(self, blend_params: Optional[BlendParams] = None) -> None: - super().__init__() - self.blend_params = blend_params if blend_params is not None else BlendParams() - - def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor: - """ - Only want to render the silhouette so RGB values can be ones. - There is no need for lighting or texturing - """ - colors = torch.ones_like(fragments.bary_coords) - blend_params = kwargs.get("blend_params", self.blend_params) - images = sigmoid_alpha_blend(colors, fragments, blend_params) - return images - - -class SplatterPhongShader(ShaderBase): - """ - Per pixel lighting - the lighting model is applied using the interpolated - coordinates and normals for each pixel. The blending function returns the - color aggregated using splats from surrounding pixels (see [0]). - - To use the default values, simply initialize the shader with the desired - device e.g. - - .. code-block:: - - shader = SplatterPhongShader(device=torch.device("cuda:0")) - - [0] Cole, F. et al., "Differentiable Surface Rendering via Non-differentiable - Sampling". - """ - - def __init__(self, **kwargs): - self.splatter_blender = None - super().__init__(**kwargs) - - def to(self, device: Device): - if self.splatter_blender: - self.splatter_blender.to(device) - return super().to(device) - - def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor: - cameras = super()._get_cameras(**kwargs) - texels = meshes.sample_textures(fragments) - lights = kwargs.get("lights", self.lights) - materials = kwargs.get("materials", self.materials) - - colors, pixel_coords_cameras = _phong_shading_with_pixels( - meshes=meshes, - fragments=fragments.detach(), - texels=texels, - lights=lights, - cameras=cameras, - materials=materials, - ) - - if not self.splatter_blender: - # Init only once, to avoid re-computing constants. - N, H, W, K, _ = colors.shape - self.splatter_blender = SplatterBlender((N, H, W, K), colors.device) - - blend_params = kwargs.get("blend_params", self.blend_params) - self.check_blend_params(blend_params) - - images = self.splatter_blender( - colors, - pixel_coords_cameras, - cameras, - fragments.pix_to_face < 0, - kwargs.get("blend_params", self.blend_params), - ) - - return images - - def check_blend_params(self, blend_params): - if blend_params.sigma != 0.5: - warnings.warn( - f"SplatterPhongShader received sigma={blend_params.sigma}. sigma is " - "defined in pixel units, and any value other than 0.5 is highly " - "unexpected. Only use other values if you know what you are doing. " - ) - - -class HardDepthShader(ShaderBase): - """ - Renders the Z distances of the closest face for each pixel. If no face is - found it returns the zfar value of the camera. - - Output from this shader is [N, H, W, 1] since it's only depth. - - To use the default values, simply initialize the shader with the desired - device e.g. - - .. code-block:: - - shader = HardDepthShader(device=torch.device("cuda:0")) - """ - - def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor: - cameras = super()._get_cameras(**kwargs) - - zfar = kwargs.get("zfar", getattr(cameras, "zfar", 100.0)) - mask = fragments.pix_to_face[..., 0:1] < 0 - - zbuf = fragments.zbuf[..., 0:1].clone() - zbuf[mask] = zfar - return zbuf - - -class SoftDepthShader(ShaderBase): - """ - Renders the Z distances using an aggregate of the distances of each face - based off of the point distance. If no face is found it returns the zfar - value of the camera. - - Output from this shader is [N, H, W, 1] since it's only depth. - - To use the default values, simply initialize the shader with the desired - device e.g. - - .. code-block:: - - shader = SoftDepthShader(device=torch.device("cuda:0")) - """ - - def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor: - if fragments.dists is None: - raise ValueError("SoftDepthShader requires Fragments.dists to be present.") - - cameras = super()._get_cameras(**kwargs) - - N, H, W, K = fragments.pix_to_face.shape - device = fragments.zbuf.device - mask = fragments.pix_to_face >= 0 - - zfar = kwargs.get("zfar", getattr(cameras, "zfar", 100.0)) - - # Sigmoid probability map based on the distance of the pixel to the face. - prob_map = torch.sigmoid(-fragments.dists / self.blend_params.sigma) * mask - - # append extra face for zfar - dists = torch.cat( - (fragments.zbuf, torch.ones((N, H, W, 1), device=device) * zfar), dim=3 - ) - probs = torch.cat((prob_map, torch.ones((N, H, W, 1), device=device)), dim=3) - - # compute weighting based off of probabilities using cumsum - probs = probs.cumsum(dim=3) - probs = probs.clamp(max=1) - probs = probs.diff(dim=3, prepend=torch.zeros((N, H, W, 1), device=device)) - - return (probs * dists).sum(dim=3).unsqueeze(3) diff --git a/pytorch3d/pytorch3d/renderer/mesh/shading.py b/pytorch3d/pytorch3d/renderer/mesh/shading.py deleted file mode 100644 index 05cb66ade8c465f42437244e3c76bb36bc5bb07a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/mesh/shading.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -from typing import Tuple - -import torch -from pytorch3d.ops import interpolate_face_attributes - -from .textures import TexturesVertex - - -def _apply_lighting( - points, normals, lights, cameras, materials -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Args: - points: torch tensor of shape (N, ..., 3) or (P, 3). - normals: torch tensor of shape (N, ..., 3) or (P, 3) - lights: instance of the Lights class. - cameras: instance of the Cameras class. - materials: instance of the Materials class. - - Returns: - ambient_color: same shape as materials.ambient_color - diffuse_color: same shape as the input points - specular_color: same shape as the input points - """ - light_diffuse = lights.diffuse(normals=normals, points=points) - light_specular = lights.specular( - normals=normals, - points=points, - camera_position=cameras.get_camera_center(), - shininess=materials.shininess, - ) - ambient_color = materials.ambient_color * lights.ambient_color - diffuse_color = materials.diffuse_color * light_diffuse - specular_color = materials.specular_color * light_specular - - if normals.dim() == 2 and points.dim() == 2: - # If given packed inputs remove batch dim in output. - return ( - ambient_color.squeeze(), - diffuse_color.squeeze(), - specular_color.squeeze(), - ) - - if ambient_color.ndim != diffuse_color.ndim: - # Reshape from (N, 3) to have dimensions compatible with - # diffuse_color which is of shape (N, H, W, K, 3) - ambient_color = ambient_color[:, None, None, None, :] - return ambient_color, diffuse_color, specular_color - - -def _phong_shading_with_pixels( - meshes, fragments, lights, cameras, materials, texels -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Apply per pixel shading. First interpolate the vertex normals and - vertex coordinates using the barycentric coordinates to get the position - and normal at each pixel. Then compute the illumination for each pixel. - The pixel color is obtained by multiplying the pixel textures by the ambient - and diffuse illumination and adding the specular component. - - Args: - meshes: Batch of meshes - fragments: Fragments named tuple with the outputs of rasterization - lights: Lights class containing a batch of lights - cameras: Cameras class containing a batch of cameras - materials: Materials class containing a batch of material properties - texels: texture per pixel of shape (N, H, W, K, 3) - - Returns: - colors: (N, H, W, K, 3) - pixel_coords: (N, H, W, K, 3), camera coordinates of each intersection. - """ - verts = meshes.verts_packed() # (V, 3) - faces = meshes.faces_packed() # (F, 3) - vertex_normals = meshes.verts_normals_packed() # (V, 3) - faces_verts = verts[faces] - faces_normals = vertex_normals[faces] - pixel_coords_in_camera = interpolate_face_attributes( - fragments.pix_to_face, fragments.bary_coords, faces_verts - ) - pixel_normals = interpolate_face_attributes( - fragments.pix_to_face, fragments.bary_coords, faces_normals - ) - ambient, diffuse, specular = _apply_lighting( - pixel_coords_in_camera, pixel_normals, lights, cameras, materials - ) - colors = (ambient + diffuse) * texels + specular - return colors, pixel_coords_in_camera - - -def phong_shading( - meshes, fragments, lights, cameras, materials, texels -) -> torch.Tensor: - """ - Apply per pixel shading. First interpolate the vertex normals and - vertex coordinates using the barycentric coordinates to get the position - and normal at each pixel. Then compute the illumination for each pixel. - The pixel color is obtained by multiplying the pixel textures by the ambient - and diffuse illumination and adding the specular component. - - Args: - meshes: Batch of meshes - fragments: Fragments named tuple with the outputs of rasterization - lights: Lights class containing a batch of lights - cameras: Cameras class containing a batch of cameras - materials: Materials class containing a batch of material properties - texels: texture per pixel of shape (N, H, W, K, 3) - - Returns: - colors: (N, H, W, K, 3) - """ - colors, _ = _phong_shading_with_pixels( - meshes, fragments, lights, cameras, materials, texels - ) - return colors - - -def gouraud_shading(meshes, fragments, lights, cameras, materials) -> torch.Tensor: - """ - Apply per vertex shading. First compute the vertex illumination by applying - ambient, diffuse and specular lighting. If vertex color is available, - combine the ambient and diffuse vertex illumination with the vertex color - and add the specular component to determine the vertex shaded color. - Then interpolate the vertex shaded colors using the barycentric coordinates - to get a color per pixel. - - Gouraud shading is only supported for meshes with texture type `TexturesVertex`. - This is because the illumination is applied to the vertex colors. - - Args: - meshes: Batch of meshes - fragments: Fragments named tuple with the outputs of rasterization - lights: Lights class containing a batch of lights parameters - cameras: Cameras class containing a batch of cameras parameters - materials: Materials class containing a batch of material properties - - Returns: - colors: (N, H, W, K, 3) - """ - if not isinstance(meshes.textures, TexturesVertex): - raise ValueError("Mesh textures must be an instance of TexturesVertex") - - faces = meshes.faces_packed() # (F, 3) - verts = meshes.verts_packed() # (V, 3) - verts_normals = meshes.verts_normals_packed() # (V, 3) - verts_colors = meshes.textures.verts_features_packed() # (V, D) - vert_to_mesh_idx = meshes.verts_packed_to_mesh_idx() - - # Format properties of lights and materials so they are compatible - # with the packed representation of the vertices. This transforms - # all tensor properties in the class from shape (N, ...) -> (V, ...) where - # V is the number of packed vertices. If the number of meshes in the - # batch is one then this is not necessary. - if len(meshes) > 1: - lights = lights.clone().gather_props(vert_to_mesh_idx) - cameras = cameras.clone().gather_props(vert_to_mesh_idx) - materials = materials.clone().gather_props(vert_to_mesh_idx) - - # Calculate the illumination at each vertex - ambient, diffuse, specular = _apply_lighting( - verts, verts_normals, lights, cameras, materials - ) - - verts_colors_shaded = verts_colors * (ambient + diffuse) + specular - face_colors = verts_colors_shaded[faces] - colors = interpolate_face_attributes( - fragments.pix_to_face, fragments.bary_coords, face_colors - ) - return colors - - -def flat_shading(meshes, fragments, lights, cameras, materials, texels) -> torch.Tensor: - """ - Apply per face shading. Use the average face position and the face normals - to compute the ambient, diffuse and specular lighting. Apply the ambient - and diffuse color to the pixel color and add the specular component to - determine the final pixel color. - - Args: - meshes: Batch of meshes - fragments: Fragments named tuple with the outputs of rasterization - lights: Lights class containing a batch of lights parameters - cameras: Cameras class containing a batch of cameras parameters - materials: Materials class containing a batch of material properties - texels: texture per pixel of shape (N, H, W, K, 3) - - Returns: - colors: (N, H, W, K, 3) - """ - verts = meshes.verts_packed() # (V, 3) - faces = meshes.faces_packed() # (F, 3) - face_normals = meshes.faces_normals_packed() # (V, 3) - faces_verts = verts[faces] - face_coords = faces_verts.mean(dim=-2) # (F, 3, XYZ) mean xyz across verts - - # Replace empty pixels in pix_to_face with 0 in order to interpolate. - mask = fragments.pix_to_face == -1 - pix_to_face = fragments.pix_to_face.clone() - pix_to_face[mask] = 0 - - N, H, W, K = pix_to_face.shape - idx = pix_to_face.view(N * H * W * K, 1).expand(N * H * W * K, 3) - - # gather pixel coords - pixel_coords = face_coords.gather(0, idx).view(N, H, W, K, 3) - pixel_coords[mask] = 0.0 - # gather pixel normals - pixel_normals = face_normals.gather(0, idx).view(N, H, W, K, 3) - pixel_normals[mask] = 0.0 - - # Calculate the illumination at each face - ambient, diffuse, specular = _apply_lighting( - pixel_coords, pixel_normals, lights, cameras, materials - ) - colors = (ambient + diffuse) * texels + specular - return colors diff --git a/pytorch3d/pytorch3d/renderer/mesh/textures.py b/pytorch3d/pytorch3d/renderer/mesh/textures.py deleted file mode 100644 index 599271554ef86678e178e27733642973df390d84..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/mesh/textures.py +++ /dev/null @@ -1,1669 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import itertools -import warnings -from typing import Dict, List, Optional, Tuple, TYPE_CHECKING, Union - -import torch -import torch.nn.functional as F -from pytorch3d.ops import interpolate_face_attributes -from pytorch3d.structures.utils import list_to_packed, list_to_padded, padded_to_list -from torch.nn.functional import interpolate - -from .utils import pack_unique_rectangles, PackedRectangle, Rectangle - - -# This file contains classes and helper functions for texturing. -# There are three types of textures: TexturesVertex, TexturesAtlas -# and TexturesUV which inherit from a base textures class TexturesBase. -# -# Each texture class has a method 'sample_textures' to sample a -# value given barycentric coordinates. -# -# All the textures accept either list or padded inputs. The values -# are stored as either per face values (TexturesAtlas, TexturesUV), -# or per face vertex features (TexturesVertex). - - -def _list_to_padded_wrapper( - x: List[torch.Tensor], - pad_size: Union[list, tuple, None] = None, - pad_value: float = 0.0, -) -> torch.Tensor: - r""" - This is a wrapper function for - pytorch3d.structures.utils.list_to_padded function which only accepts - 3-dimensional inputs. - - For this use case, the input x is of shape (F, 3, ...) where only F - is different for each element in the list - - Transforms a list of N tensors each of shape (Mi, ...) into a single tensor - of shape (N, pad_size, ...), or (N, max(Mi), ...) - if pad_size is None. - - Args: - x: list of Tensors - pad_size: int specifying the size of the first dimension - of the padded tensor - pad_value: float value to be used to fill the padded tensor - - Returns: - x_padded: tensor consisting of padded input tensors - """ - N = len(x) - dims = x[0].ndim - reshape_dims = x[0].shape[1:] - D = torch.prod(torch.tensor(reshape_dims)).item() - x_reshaped = [] - for y in x: - if y.ndim != dims and y.shape[1:] != reshape_dims: - msg = ( - "list_to_padded requires tensors to have the same number of dimensions" - ) - raise ValueError(msg) - # pyre-fixme[6]: For 2nd param expected `int` but got `Union[bool, float, int]`. - x_reshaped.append(y.reshape(-1, D)) - x_padded = list_to_padded(x_reshaped, pad_size=pad_size, pad_value=pad_value) - # pyre-fixme[58]: `+` is not supported for operand types `Tuple[int, int]` and - # `Size`. - return x_padded.reshape((N, -1) + reshape_dims) - - -def _padded_to_list_wrapper( - x: torch.Tensor, split_size: Union[list, tuple, None] = None -) -> List[torch.Tensor]: - r""" - This is a wrapper function for pytorch3d.structures.utils.padded_to_list - which only accepts 3-dimensional inputs. - - For this use case, the input x is of shape (N, F, ...) where F - is the number of faces which is different for each tensor in the batch. - - This function transforms a padded tensor of shape (N, M, ...) into a - list of N tensors of shape (Mi, ...) where (Mi) is specified in - split_size(i), or of shape (M,) if split_size is None. - - Args: - x: padded Tensor - split_size: list of ints defining the number of items for each tensor - in the output list. - - Returns: - x_list: a list of tensors - """ - N, M = x.shape[:2] - reshape_dims = x.shape[2:] - D = torch.prod(torch.tensor(reshape_dims)).item() - # pyre-fixme[6]: For 3rd param expected `int` but got `Union[bool, float, int]`. - x_reshaped = x.reshape(N, M, D) - x_list = padded_to_list(x_reshaped, split_size=split_size) - # pyre-fixme[58]: `+` is not supported for operand types `Tuple[typing.Any]` and - # `Size`. - x_list = [xl.reshape((xl.shape[0],) + reshape_dims) for xl in x_list] - return x_list - - -def _pad_texture_maps( - images: Union[Tuple[torch.Tensor], List[torch.Tensor]], align_corners: bool -) -> torch.Tensor: - """ - Pad all texture images so they have the same height and width. - - Args: - images: list of N tensors of shape (H_i, W_i, C) - align_corners: used for interpolation - - Returns: - tex_maps: Tensor of shape (N, max_H, max_W, C) - """ - tex_maps = [] - max_H = 0 - max_W = 0 - for im in images: - h, w, _C = im.shape - if h > max_H: - max_H = h - if w > max_W: - max_W = w - tex_maps.append(im) - max_shape = (max_H, max_W) - - for i, image in enumerate(tex_maps): - if image.shape[:2] != max_shape: - image_BCHW = image.permute(2, 0, 1)[None] - new_image_BCHW = interpolate( - image_BCHW, - size=max_shape, - mode="bilinear", - align_corners=align_corners, - ) - tex_maps[i] = new_image_BCHW[0].permute(1, 2, 0) - tex_maps = torch.stack(tex_maps, dim=0) # (num_tex_maps, max_H, max_W, C) - return tex_maps - - -# A base class for defining a batch of textures -# with helper methods. -# This is also useful to have so that inside `Meshes` -# we can allow the input textures to be any texture -# type which is an instance of the base class. -class TexturesBase: - def isempty(self): - if self._N is not None and self.valid is not None: - return self._N == 0 or self.valid.eq(False).all() - return False - - def to(self, device): - for k in dir(self): - v = getattr(self, k) - if isinstance(v, (list, tuple)) and all( - torch.is_tensor(elem) for elem in v - ): - v = [elem.to(device) for elem in v] - setattr(self, k, v) - if torch.is_tensor(v) and v.device != device: - setattr(self, k, v.to(device)) - self.device = device - return self - - def _extend(self, N: int, props: List[str]) -> Dict[str, Union[torch.Tensor, List]]: - """ - Create a dict with the specified properties - repeated N times per batch element. - - Args: - N: number of new copies of each texture - in the batch. - props: a List of strings which refer to either - class attributes or class methods which - return tensors or lists. - - Returns: - Dict with the same keys as props. The values are the - extended properties. - """ - if not isinstance(N, int): - raise ValueError("N must be an integer.") - if N <= 0: - raise ValueError("N must be > 0.") - - new_props = {} - for p in props: - t = getattr(self, p) - if callable(t): - t = t() # class method - if isinstance(t, list): - if not all(isinstance(elem, (int, float)) for elem in t): - raise ValueError("Extend only supports lists of scalars") - t = [[ti] * N for ti in t] - new_props[p] = list(itertools.chain(*t)) - elif torch.is_tensor(t): - new_props[p] = t.repeat_interleave(N, dim=0) - return new_props - - def _getitem(self, index: Union[int, slice], props: List[str]): - """ - Helper function for __getitem__ - """ - new_props = {} - if isinstance(index, (int, slice)): - for p in props: - t = getattr(self, p) - if callable(t): - t = t() # class method - new_props[p] = t[index] - elif isinstance(index, list): - index = torch.tensor(index) - if isinstance(index, torch.Tensor): - if index.dtype == torch.bool: - index = index.nonzero() - index = index.squeeze(1) if index.numel() > 0 else index - index = index.tolist() - for p in props: - t = getattr(self, p) - if callable(t): - t = t() # class method - new_props[p] = [t[i] for i in index] - - return new_props - - def sample_textures(self) -> torch.Tensor: - """ - Different texture classes sample textures in different ways - e.g. for vertex textures, the values at each vertex - are interpolated across the face using the barycentric - coordinates. - Each texture class should implement a sample_textures - method to take the `fragments` from rasterization. - Using `fragments.pix_to_face` and `fragments.bary_coords` - this function should return the sampled texture values for - each pixel in the output image. - """ - raise NotImplementedError() - - def submeshes( - self, - vertex_ids_list: List[List[torch.LongTensor]], - faces_ids_list: List[List[torch.LongTensor]], - ) -> "TexturesBase": - """ - Extract sub-textures used for submeshing. - """ - raise NotImplementedError(f"{self.__class__} does not support submeshes") - - def faces_verts_textures_packed(self) -> torch.Tensor: - """ - Returns the texture for each vertex for each face in the mesh. - For N meshes, this function returns sum(Fi)x3xC where Fi is the - number of faces in the i-th mesh and C is the dimensional of - the feature (C = 3 for RGB textures). - You can use the utils function in structures.utils to convert the - packed representation to a list or padded. - """ - raise NotImplementedError() - - def clone(self) -> "TexturesBase": - """ - Each texture class should implement a method - to clone all necessary internal tensors. - """ - raise NotImplementedError() - - def detach(self) -> "TexturesBase": - """ - Each texture class should implement a method - to detach all necessary internal tensors. - """ - raise NotImplementedError() - - def __getitem__(self, index) -> "TexturesBase": - """ - Each texture class should implement a method - to get the texture properties for the - specified elements in the batch. - The TexturesBase._getitem(i) method - can be used as a helper function to retrieve the - class attributes for item i. Then, a new - instance of the child class can be created with - the attributes. - """ - raise NotImplementedError() - - -def Textures( - maps: Optional[Union[List[torch.Tensor], torch.Tensor]] = None, - faces_uvs: Optional[torch.Tensor] = None, - verts_uvs: Optional[torch.Tensor] = None, - verts_rgb: Optional[torch.Tensor] = None, -) -> TexturesBase: - """ - Textures class has been DEPRECATED. - Preserving Textures as a function for backwards compatibility. - - Args: - maps: texture map per mesh. This can either be a list of maps - [(H, W, C)] or a padded tensor of shape (N, H, W, C). - faces_uvs: (N, F, 3) tensor giving the index into verts_uvs for each - vertex in the face. Padding value is assumed to be -1. - verts_uvs: (N, V, 2) tensor giving the uv coordinate per vertex. - verts_rgb: (N, V, C) tensor giving the color per vertex. Padding - value is assumed to be -1. (C=3 for RGB.) - - - Returns: - a Textures class which is an instance of TexturesBase e.g. TexturesUV, - TexturesAtlas, TexturesVertex - - """ - - warnings.warn( - """Textures class is deprecated, - use TexturesUV, TexturesAtlas, TexturesVertex instead. - Textures class will be removed in future releases.""", - PendingDeprecationWarning, - ) - - if faces_uvs is not None and verts_uvs is not None and maps is not None: - return TexturesUV(maps=maps, faces_uvs=faces_uvs, verts_uvs=verts_uvs) - - if verts_rgb is not None: - return TexturesVertex(verts_features=verts_rgb) - - raise ValueError( - "Textures either requires all three of (faces uvs, verts uvs, maps) or verts rgb" - ) - - -class TexturesAtlas(TexturesBase): - def __init__(self, atlas: Union[torch.Tensor, List[torch.Tensor]]) -> None: - """ - A texture representation where each face has a square texture map. - This is based on the implementation from SoftRasterizer [1]. - - Args: - atlas: (N, F, R, R, C) tensor giving the per face texture map. - The atlas can be created during obj loading with the - pytorch3d.io.load_obj function - in the input arguments - set `create_texture_atlas=True`. The atlas will be - returned in aux.texture_atlas. - - - The padded and list representations of the textures are stored - and the packed representations is computed on the fly and - not cached. - - [1] Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based - 3D Reasoning', ICCV 2019 - See also https://github.com/ShichenLiu/SoftRas/issues/21 - """ - if isinstance(atlas, (list, tuple)): - correct_format = all( - ( - torch.is_tensor(elem) - and elem.ndim == 4 - and elem.shape[1] == elem.shape[2] - and elem.shape[1] == atlas[0].shape[1] - ) - for elem in atlas - ) - if not correct_format: - msg = ( - "Expected atlas to be a list of tensors of shape (F, R, R, C) " - "with the same value of R." - ) - raise ValueError(msg) - self._atlas_list = atlas - self._atlas_padded = None - self.device = torch.device("cpu") - - # These values may be overridden when textures is - # passed into the Meshes constructor. For more details - # refer to the __init__ of Meshes. - self._N = len(atlas) - self._num_faces_per_mesh = [len(a) for a in atlas] - - if self._N > 0: - self.device = atlas[0].device - - elif torch.is_tensor(atlas): - if atlas.ndim != 5: - msg = "Expected atlas to be of shape (N, F, R, R, C); got %r" - raise ValueError(msg % repr(atlas.ndim)) - self._atlas_padded = atlas - self._atlas_list = None - self.device = atlas.device - - # These values may be overridden when textures is - # passed into the Meshes constructor. For more details - # refer to the __init__ of Meshes. - self._N = len(atlas) - max_F = atlas.shape[1] - self._num_faces_per_mesh = [max_F] * self._N - else: - raise ValueError("Expected atlas to be a tensor or list") - - # The num_faces_per_mesh, N and valid - # are reset inside the Meshes object when textures is - # passed into the Meshes constructor. For more details - # refer to the __init__ of Meshes. - self.valid = torch.ones((self._N,), dtype=torch.bool, device=self.device) - - def clone(self) -> "TexturesAtlas": - tex = self.__class__(atlas=self.atlas_padded().clone()) - if self._atlas_list is not None: - tex._atlas_list = [atlas.clone() for atlas in self._atlas_list] - num_faces = ( - self._num_faces_per_mesh.clone() - if torch.is_tensor(self._num_faces_per_mesh) - else self._num_faces_per_mesh - ) - tex.valid = self.valid.clone() - tex._num_faces_per_mesh = num_faces - return tex - - def detach(self) -> "TexturesAtlas": - tex = self.__class__(atlas=self.atlas_padded().detach()) - if self._atlas_list is not None: - tex._atlas_list = [atlas.detach() for atlas in self._atlas_list] - num_faces = ( - self._num_faces_per_mesh.detach() - if torch.is_tensor(self._num_faces_per_mesh) - else self._num_faces_per_mesh - ) - tex.valid = self.valid.detach() - tex._num_faces_per_mesh = num_faces - return tex - - def __getitem__(self, index) -> "TexturesAtlas": - props = ["atlas_list", "_num_faces_per_mesh"] - new_props = self._getitem(index, props=props) - atlas = new_props["atlas_list"] - if isinstance(atlas, list): - # multiple batch elements - new_tex = self.__class__(atlas=atlas) - elif torch.is_tensor(atlas): - # single element - new_tex = self.__class__(atlas=[atlas]) - else: - raise ValueError("Not all values are provided in the correct format") - new_tex._num_faces_per_mesh = new_props["_num_faces_per_mesh"] - return new_tex - - def atlas_padded(self) -> torch.Tensor: - if self._atlas_padded is None: - if self.isempty(): - self._atlas_padded = torch.zeros( - (self._N, 0, 0, 0, 3), dtype=torch.float32, device=self.device - ) - else: - self._atlas_padded = _list_to_padded_wrapper( - self._atlas_list, pad_value=0.0 - ) - return self._atlas_padded - - def atlas_list(self) -> List[torch.Tensor]: - if self._atlas_list is None: - if self.isempty(): - self._atlas_padded = [ - torch.empty((0, 0, 0, 3), dtype=torch.float32, device=self.device) - ] * self._N - self._atlas_list = _padded_to_list_wrapper( - self._atlas_padded, split_size=self._num_faces_per_mesh - ) - return self._atlas_list - - def atlas_packed(self) -> torch.Tensor: - if self.isempty(): - return torch.zeros( - (self._N, 0, 0, 3), dtype=torch.float32, device=self.device - ) - atlas_list = self.atlas_list() - return list_to_packed(atlas_list)[0] - - def extend(self, N: int) -> "TexturesAtlas": - new_props = self._extend(N, ["atlas_padded", "_num_faces_per_mesh"]) - new_tex = self.__class__(atlas=new_props["atlas_padded"]) - new_tex._num_faces_per_mesh = new_props["_num_faces_per_mesh"] - return new_tex - - # pyre-fixme[14]: `sample_textures` overrides method defined in `TexturesBase` - # inconsistently. - def sample_textures(self, fragments, **kwargs) -> torch.Tensor: - """ - This is similar to a nearest neighbor sampling and involves a - discretization step. The barycentric coordinates from - rasterization are used to find the nearest grid cell in the texture - atlas and the RGB is returned as the color. - This means that this step is differentiable with respect to the RGB - values of the texture atlas but not differentiable with respect to the - barycentric coordinates. - - TODO: Add a different sampling mode which interpolates the barycentric - coordinates to sample the texture and will be differentiable w.r.t - the barycentric coordinates. - - Args: - fragments: - The outputs of rasterization. From this we use - - - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices - of the faces (in the packed representation) which - overlap each pixel in the image. - - barycentric_coords: FloatTensor of shape (N, H, W, K, 3) specifying - the barycentric coordinates of each pixel - relative to the faces (in the packed - representation) which overlap the pixel. - - Returns: - texels: (N, H, W, K, C) - """ - N, H, W, K = fragments.pix_to_face.shape - atlas_packed = self.atlas_packed() - R = atlas_packed.shape[1] - bary = fragments.bary_coords - pix_to_face = fragments.pix_to_face - - bary_w01 = bary[..., :2] - # pyre-fixme[16]: `bool` has no attribute `__getitem__`. - mask = (pix_to_face < 0)[..., None] - bary_w01 = torch.where(mask, torch.zeros_like(bary_w01), bary_w01) - # If barycentric coordinates are > 1.0 (in the case of - # blur_radius > 0.0), wxy might be > R. We need to clamp this - # index to R-1 to index into the texture atlas. - w_xy = (bary_w01 * R).to(torch.int64).clamp(max=R - 1) # (N, H, W, K, 2) - - below_diag = ( - bary_w01.sum(dim=-1) * R - w_xy.float().sum(dim=-1) - ) <= 1.0 # (N, H, W, K) - w_x, w_y = w_xy.unbind(-1) - w_x = torch.where(below_diag, w_x, (R - 1 - w_x)) - w_y = torch.where(below_diag, w_y, (R - 1 - w_y)) - - texels = atlas_packed[pix_to_face, w_y, w_x] - texels = texels * (pix_to_face >= 0)[..., None].float() - - return texels - - def submeshes( - self, - vertex_ids_list: List[List[torch.LongTensor]], - faces_ids_list: List[List[torch.LongTensor]], - ) -> "TexturesAtlas": - """ - Extract a sub-texture for use in a submesh. - - If the meshes batch corresponding to this TextureAtlas contains - `n = len(faces_ids_list)` meshes, then self.atlas_list() - will be of length n. After submeshing, we obtain a batch of - `k = sum(len(v) for v in atlas_list` submeshes (see Meshes.submeshes). This - function creates a corresponding TexturesAtlas object with `atlas_list` - of length `k`. - """ - if len(faces_ids_list) != len(self.atlas_list()): - raise IndexError( - "faces_ids_list must be of " "the same length as atlas_list." - ) - - sub_features = [] - for atlas, faces_ids in zip(self.atlas_list(), faces_ids_list): - for faces_ids_submesh in faces_ids: - sub_features.append(atlas[faces_ids_submesh]) - - return self.__class__(sub_features) - - def faces_verts_textures_packed(self) -> torch.Tensor: - """ - Samples texture from each vertex for each face in the mesh. - For N meshes with {Fi} number of faces, it returns a - tensor of shape sum(Fi)x3xC (C = 3 for RGB). - You can use the utils function in structures.utils to convert the - packed representation to a list or padded. - """ - atlas_packed = self.atlas_packed() - # assume each face consists of (v0, v1, v2). - # to sample from the atlas we only need the first two barycentric coordinates. - # for details on how this texture sample works refer to the sample_textures function. - t0 = atlas_packed[:, 0, -1] # corresponding to v0 with bary = (1, 0) - t1 = atlas_packed[:, -1, 0] # corresponding to v1 with bary = (0, 1) - t2 = atlas_packed[:, 0, 0] # corresponding to v2 with bary = (0, 0) - return torch.stack((t0, t1, t2), dim=1) - - def join_batch(self, textures: List["TexturesAtlas"]) -> "TexturesAtlas": - """ - Join the list of textures given by `textures` to - self to create a batch of textures. Return a new - TexturesAtlas object with the combined textures. - - Args: - textures: List of TexturesAtlas objects - - Returns: - new_tex: TexturesAtlas object with the combined - textures from self and the list `textures`. - """ - tex_types_same = all(isinstance(tex, TexturesAtlas) for tex in textures) - if not tex_types_same: - raise ValueError("All textures must be of type TexturesAtlas.") - - atlas_list = [] - atlas_list += self.atlas_list() - num_faces_per_mesh = self._num_faces_per_mesh.copy() - for tex in textures: - atlas_list += tex.atlas_list() - num_faces_per_mesh += tex._num_faces_per_mesh - new_tex = self.__class__(atlas=atlas_list) - new_tex._num_faces_per_mesh = num_faces_per_mesh - return new_tex - - def join_scene(self) -> "TexturesAtlas": - """ - Return a new TexturesAtlas amalgamating the batch. - """ - return self.__class__(atlas=[torch.cat(self.atlas_list())]) - - def check_shapes( - self, batch_size: int, max_num_verts: int, max_num_faces: int - ) -> bool: - """ - Check if the dimensions of the atlas match that of the mesh faces - """ - # (N, F) should be the same - return self.atlas_padded().shape[0:2] == (batch_size, max_num_faces) - - -class TexturesUV(TexturesBase): - def __init__( - self, - maps: Union[torch.Tensor, List[torch.Tensor]], - faces_uvs: Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]], - verts_uvs: Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]], - padding_mode: str = "border", - align_corners: bool = True, - sampling_mode: str = "bilinear", - ) -> None: - """ - Textures are represented as a per mesh texture map and uv coordinates for each - vertex in each face. NOTE: this class only supports one texture map per mesh. - - Args: - maps: texture map per mesh. This can either be a list of maps - [(H, W, C)] or a padded tensor of shape (N, H, W, C). - For RGB, C = 3. - faces_uvs: (N, F, 3) LongTensor giving the index into verts_uvs - for each face - verts_uvs: (N, V, 2) tensor giving the uv coordinates per vertex - (a FloatTensor with values between 0 and 1). - align_corners: If true, the extreme values 0 and 1 for verts_uvs - indicate the centers of the edge pixels in the maps. - padding_mode: padding mode for outside grid values - ("zeros", "border" or "reflection"). - sampling_mode: type of interpolation used to sample the texture. - Corresponds to the mode parameter in PyTorch's - grid_sample ("nearest" or "bilinear"). - - The align_corners and padding_mode arguments correspond to the arguments - of the `grid_sample` torch function. There is an informative illustration of - the two align_corners options at - https://discuss.pytorch.org/t/22663/9 . - - An example of how the indexing into the maps, with align_corners=True, - works is as follows. - If maps[i] has shape [1001, 101] and the value of verts_uvs[i][j] - is [0.4, 0.3], then a value of j in faces_uvs[i] means a vertex - whose color is given by maps[i][700, 40]. padding_mode affects what - happens if a value in verts_uvs is less than 0 or greater than 1. - Note that increasing a value in verts_uvs[..., 0] increases an index - in maps, whereas increasing a value in verts_uvs[..., 1] _decreases_ - an _earlier_ index in maps. - - If align_corners=False, an example would be as follows. - If maps[i] has shape [1000, 100] and the value of verts_uvs[i][j] - is [0.405, 0.2995], then a value of j in faces_uvs[i] means a vertex - whose color is given by maps[i][700, 40]. - When align_corners=False, padding_mode even matters for values in - verts_uvs slightly above 0 or slightly below 1. In this case, the - padding_mode matters if the first value is outside the interval - [0.0005, 0.9995] or if the second is outside the interval - [0.005, 0.995]. - """ - self.padding_mode = padding_mode - self.align_corners = align_corners - self.sampling_mode = sampling_mode - if isinstance(faces_uvs, (list, tuple)): - for fv in faces_uvs: - if fv.ndim != 2 or fv.shape[-1] != 3: - msg = "Expected faces_uvs to be of shape (F, 3); got %r" - raise ValueError(msg % repr(fv.shape)) - self._faces_uvs_list = faces_uvs - self._faces_uvs_padded = None - self.device = torch.device("cpu") - - # These values may be overridden when textures is - # passed into the Meshes constructor. For more details - # refer to the __init__ of Meshes. - self._N = len(faces_uvs) - self._num_faces_per_mesh = [len(fv) for fv in faces_uvs] - - if self._N > 0: - self.device = faces_uvs[0].device - - elif torch.is_tensor(faces_uvs): - if faces_uvs.ndim != 3 or faces_uvs.shape[-1] != 3: - msg = "Expected faces_uvs to be of shape (N, F, 3); got %r" - raise ValueError(msg % repr(faces_uvs.shape)) - self._faces_uvs_padded = faces_uvs - self._faces_uvs_list = None - self.device = faces_uvs.device - - # These values may be overridden when textures is - # passed into the Meshes constructor. For more details - # refer to the __init__ of Meshes. - self._N = len(faces_uvs) - max_F = faces_uvs.shape[1] - self._num_faces_per_mesh = [max_F] * self._N - else: - raise ValueError("Expected faces_uvs to be a tensor or list") - - if isinstance(verts_uvs, (list, tuple)): - for fv in verts_uvs: - if fv.ndim != 2 or fv.shape[-1] != 2: - msg = "Expected verts_uvs to be of shape (V, 2); got %r" - raise ValueError(msg % repr(fv.shape)) - self._verts_uvs_list = verts_uvs - self._verts_uvs_padded = None - - if len(verts_uvs) != self._N: - raise ValueError( - "verts_uvs and faces_uvs must have the same batch dimension" - ) - if not all(v.device == self.device for v in verts_uvs): - raise ValueError("verts_uvs and faces_uvs must be on the same device") - - elif torch.is_tensor(verts_uvs): - if ( - verts_uvs.ndim != 3 - or verts_uvs.shape[-1] != 2 - or verts_uvs.shape[0] != self._N - ): - msg = "Expected verts_uvs to be of shape (N, V, 2); got %r" - raise ValueError(msg % repr(verts_uvs.shape)) - self._verts_uvs_padded = verts_uvs - self._verts_uvs_list = None - - if verts_uvs.device != self.device: - raise ValueError("verts_uvs and faces_uvs must be on the same device") - else: - raise ValueError("Expected verts_uvs to be a tensor or list") - - if isinstance(maps, (list, tuple)): - self._maps_list = maps - else: - self._maps_list = None - self._maps_padded = self._format_maps_padded(maps) - - if self._maps_padded.device != self.device: - raise ValueError("maps must be on the same device as verts/faces uvs.") - - self.valid = torch.ones((self._N,), dtype=torch.bool, device=self.device) - - def _format_maps_padded( - self, maps: Union[torch.Tensor, List[torch.Tensor]] - ) -> torch.Tensor: - if isinstance(maps, torch.Tensor): - if maps.ndim != 4 or maps.shape[0] != self._N: - msg = "Expected maps to be of shape (N, H, W, C); got %r" - raise ValueError(msg % repr(maps.shape)) - return maps - - if isinstance(maps, (list, tuple)): - if len(maps) != self._N: - raise ValueError("Expected one texture map per mesh in the batch.") - if self._N > 0: - if not all(map.ndim == 3 for map in maps): - raise ValueError("Invalid number of dimensions in texture maps") - if not all(map.shape[2] == maps[0].shape[2] for map in maps): - raise ValueError("Inconsistent number of channels in maps") - maps_padded = _pad_texture_maps(maps, align_corners=self.align_corners) - else: - maps_padded = torch.empty( - (self._N, 0, 0, 3), dtype=torch.float32, device=self.device - ) - return maps_padded - - raise ValueError("Expected maps to be a tensor or list of tensors.") - - def clone(self) -> "TexturesUV": - tex = self.__class__( - self.maps_padded().clone(), - self.faces_uvs_padded().clone(), - self.verts_uvs_padded().clone(), - align_corners=self.align_corners, - padding_mode=self.padding_mode, - sampling_mode=self.sampling_mode, - ) - if self._maps_list is not None: - tex._maps_list = [m.clone() for m in self._maps_list] - if self._verts_uvs_list is not None: - tex._verts_uvs_list = [v.clone() for v in self._verts_uvs_list] - if self._faces_uvs_list is not None: - tex._faces_uvs_list = [f.clone() for f in self._faces_uvs_list] - num_faces = ( - self._num_faces_per_mesh.clone() - if torch.is_tensor(self._num_faces_per_mesh) - else self._num_faces_per_mesh - ) - tex._num_faces_per_mesh = num_faces - tex.valid = self.valid.clone() - return tex - - def detach(self) -> "TexturesUV": - tex = self.__class__( - self.maps_padded().detach(), - self.faces_uvs_padded().detach(), - self.verts_uvs_padded().detach(), - align_corners=self.align_corners, - padding_mode=self.padding_mode, - sampling_mode=self.sampling_mode, - ) - if self._maps_list is not None: - tex._maps_list = [m.detach() for m in self._maps_list] - if self._verts_uvs_list is not None: - tex._verts_uvs_list = [v.detach() for v in self._verts_uvs_list] - if self._faces_uvs_list is not None: - tex._faces_uvs_list = [f.detach() for f in self._faces_uvs_list] - num_faces = ( - self._num_faces_per_mesh.detach() - if torch.is_tensor(self._num_faces_per_mesh) - else self._num_faces_per_mesh - ) - tex._num_faces_per_mesh = num_faces - tex.valid = self.valid.detach() - return tex - - def __getitem__(self, index) -> "TexturesUV": - props = ["verts_uvs_list", "faces_uvs_list", "maps_list", "_num_faces_per_mesh"] - new_props = self._getitem(index, props) - faces_uvs = new_props["faces_uvs_list"] - verts_uvs = new_props["verts_uvs_list"] - maps = new_props["maps_list"] - - # if index has multiple values then faces/verts/maps may be a list of tensors - if all(isinstance(f, (list, tuple)) for f in [faces_uvs, verts_uvs, maps]): - new_tex = self.__class__( - faces_uvs=faces_uvs, - verts_uvs=verts_uvs, - maps=maps, - padding_mode=self.padding_mode, - align_corners=self.align_corners, - sampling_mode=self.sampling_mode, - ) - elif all(torch.is_tensor(f) for f in [faces_uvs, verts_uvs, maps]): - new_tex = self.__class__( - faces_uvs=[faces_uvs], - verts_uvs=[verts_uvs], - maps=[maps], - padding_mode=self.padding_mode, - align_corners=self.align_corners, - sampling_mode=self.sampling_mode, - ) - else: - raise ValueError("Not all values are provided in the correct format") - new_tex._num_faces_per_mesh = new_props["_num_faces_per_mesh"] - return new_tex - - def faces_uvs_padded(self) -> torch.Tensor: - if self._faces_uvs_padded is None: - if self.isempty(): - self._faces_uvs_padded = torch.zeros( - (self._N, 0, 3), dtype=torch.float32, device=self.device - ) - else: - self._faces_uvs_padded = list_to_padded( - self._faces_uvs_list, pad_value=0.0 - ) - return self._faces_uvs_padded - - def faces_uvs_list(self) -> List[torch.Tensor]: - if self._faces_uvs_list is None: - if self.isempty(): - self._faces_uvs_list = [ - torch.empty((0, 3), dtype=torch.float32, device=self.device) - ] * self._N - else: - self._faces_uvs_list = padded_to_list( - self._faces_uvs_padded, split_size=self._num_faces_per_mesh - ) - return self._faces_uvs_list - - def verts_uvs_padded(self) -> torch.Tensor: - if self._verts_uvs_padded is None: - if self.isempty(): - self._verts_uvs_padded = torch.zeros( - (self._N, 0, 2), dtype=torch.float32, device=self.device - ) - else: - self._verts_uvs_padded = list_to_padded( - self._verts_uvs_list, pad_value=0.0 - ) - return self._verts_uvs_padded - - def verts_uvs_list(self) -> List[torch.Tensor]: - if self._verts_uvs_list is None: - if self.isempty(): - self._verts_uvs_list = [ - torch.empty((0, 2), dtype=torch.float32, device=self.device) - ] * self._N - else: - # The number of vertices in the mesh and in verts_uvs can differ - # e.g. if a vertex is shared between 3 faces, it can - # have up to 3 different uv coordinates. - self._verts_uvs_list = list(self._verts_uvs_padded.unbind(0)) - return self._verts_uvs_list - - # Currently only the padded maps are used. - def maps_padded(self) -> torch.Tensor: - return self._maps_padded - - def maps_list(self) -> List[torch.Tensor]: - if self._maps_list is not None: - return self._maps_list - return self._maps_padded.unbind(0) - - def extend(self, N: int) -> "TexturesUV": - new_props = self._extend( - N, - [ - "maps_padded", - "verts_uvs_padded", - "faces_uvs_padded", - "_num_faces_per_mesh", - ], - ) - new_tex = self.__class__( - maps=new_props["maps_padded"], - faces_uvs=new_props["faces_uvs_padded"], - verts_uvs=new_props["verts_uvs_padded"], - padding_mode=self.padding_mode, - align_corners=self.align_corners, - sampling_mode=self.sampling_mode, - ) - - new_tex._num_faces_per_mesh = new_props["_num_faces_per_mesh"] - return new_tex - - # pyre-fixme[14]: `sample_textures` overrides method defined in `TexturesBase` - # inconsistently. - def sample_textures(self, fragments, **kwargs) -> torch.Tensor: - """ - Interpolate a 2D texture map using uv vertex texture coordinates for each - face in the mesh. First interpolate the vertex uvs using barycentric coordinates - for each pixel in the rasterized output. Then interpolate the texture map - using the uv coordinate for each pixel. - - Args: - fragments: - The outputs of rasterization. From this we use - - - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices - of the faces (in the packed representation) which - overlap each pixel in the image. - - barycentric_coords: FloatTensor of shape (N, H, W, K, 3) specifying - the barycentric coordinates of each pixel - relative to the faces (in the packed - representation) which overlap the pixel. - - Returns: - texels: tensor of shape (N, H, W, K, C) giving the interpolated - texture for each pixel in the rasterized image. - """ - if self.isempty(): - faces_verts_uvs = torch.zeros( - (self._N, 3, 2), dtype=torch.float32, device=self.device - ) - else: - packing_list = [ - i[j] for i, j in zip(self.verts_uvs_list(), self.faces_uvs_list()) - ] - faces_verts_uvs = torch.cat(packing_list) - texture_maps = self.maps_padded() - - # pixel_uvs: (N, H, W, K, 2) - pixel_uvs = interpolate_face_attributes( - fragments.pix_to_face, fragments.bary_coords, faces_verts_uvs - ) - - N, H_out, W_out, K = fragments.pix_to_face.shape - N, H_in, W_in, C = texture_maps.shape # 3 for RGB - - # pixel_uvs: (N, H, W, K, 2) -> (N, K, H, W, 2) -> (NK, H, W, 2) - pixel_uvs = pixel_uvs.permute(0, 3, 1, 2, 4).reshape(N * K, H_out, W_out, 2) - - # textures.map: - # (N, H, W, C) -> (N, C, H, W) -> (1, N, C, H, W) - # -> expand (K, N, C, H, W) -> reshape (N*K, C, H, W) - texture_maps = ( - texture_maps.permute(0, 3, 1, 2)[None, ...] - .expand(K, -1, -1, -1, -1) - .transpose(0, 1) - .reshape(N * K, C, H_in, W_in) - ) - - # Textures: (N*K, C, H, W), pixel_uvs: (N*K, H, W, 2) - # Now need to format the pixel uvs and the texture map correctly! - # From pytorch docs, grid_sample takes `grid` and `input`: - # grid specifies the sampling pixel locations normalized by - # the input spatial dimensions It should have most - # values in the range of [-1, 1]. Values x = -1, y = -1 - # is the left-top pixel of input, and values x = 1, y = 1 is the - # right-bottom pixel of input. - - # map to a range of [-1, 1] and flip the y axis - pixel_uvs = torch.lerp( - pixel_uvs.new_tensor([-1.0, 1.0]), - pixel_uvs.new_tensor([1.0, -1.0]), - pixel_uvs, - ) - - if texture_maps.device != pixel_uvs.device: - texture_maps = texture_maps.to(pixel_uvs.device) - texels = F.grid_sample( - texture_maps, - pixel_uvs, - mode=self.sampling_mode, - align_corners=self.align_corners, - padding_mode=self.padding_mode, - ) - # texels now has shape (NK, C, H_out, W_out) - texels = texels.reshape(N, K, C, H_out, W_out).permute(0, 3, 4, 1, 2) - return texels - - def faces_verts_textures_packed(self) -> torch.Tensor: - """ - Samples texture from each vertex and for each face in the mesh. - For N meshes with {Fi} number of faces, it returns a - tensor of shape sum(Fi)x3xC (C = 3 for RGB). - You can use the utils function in structures.utils to convert the - packed representation to a list or padded. - """ - if self.isempty(): - return torch.zeros( - (0, 3, self.maps_padded().shape[-1]), - dtype=torch.float32, - device=self.device, - ) - else: - packing_list = [ - i[j] for i, j in zip(self.verts_uvs_list(), self.faces_uvs_list()) - ] - faces_verts_uvs = _list_to_padded_wrapper( - packing_list, pad_value=0.0 - ) # Nxmax(Fi)x3x2 - texture_maps = self.maps_padded() # NxHxWxC - texture_maps = texture_maps.permute(0, 3, 1, 2) # NxCxHxW - - # map to a range of [-1, 1] and flip the y axis - faces_verts_uvs = torch.lerp( - faces_verts_uvs.new_tensor([-1.0, 1.0]), - faces_verts_uvs.new_tensor([1.0, -1.0]), - faces_verts_uvs, - ) - - textures = F.grid_sample( - texture_maps, - faces_verts_uvs, - mode=self.sampling_mode, - align_corners=self.align_corners, - padding_mode=self.padding_mode, - ) # NxCxmax(Fi)x3 - - textures = textures.permute(0, 2, 3, 1) # Nxmax(Fi)x3xC - textures = _padded_to_list_wrapper( - textures, split_size=self._num_faces_per_mesh - ) # list of N {Fix3xC} tensors - return list_to_packed(textures)[0] - - def join_batch(self, textures: List["TexturesUV"]) -> "TexturesUV": - """ - Join the list of textures given by `textures` to - self to create a batch of textures. Return a new - TexturesUV object with the combined textures. - - Args: - textures: List of TexturesUV objects - - Returns: - new_tex: TexturesUV object with the combined - textures from self and the list `textures`. - """ - tex_types_same = all(isinstance(tex, TexturesUV) for tex in textures) - if not tex_types_same: - raise ValueError("All textures must be of type TexturesUV.") - - padding_modes_same = all( - tex.padding_mode == self.padding_mode for tex in textures - ) - if not padding_modes_same: - raise ValueError("All textures must have the same padding_mode.") - align_corners_same = all( - tex.align_corners == self.align_corners for tex in textures - ) - if not align_corners_same: - raise ValueError("All textures must have the same align_corners value.") - sampling_mode_same = all( - tex.sampling_mode == self.sampling_mode for tex in textures - ) - if not sampling_mode_same: - raise ValueError("All textures must have the same sampling_mode.") - - verts_uvs_list = [] - faces_uvs_list = [] - maps_list = [] - faces_uvs_list += self.faces_uvs_list() - verts_uvs_list += self.verts_uvs_list() - maps_list += self.maps_list() - num_faces_per_mesh = self._num_faces_per_mesh.copy() - for tex in textures: - verts_uvs_list += tex.verts_uvs_list() - faces_uvs_list += tex.faces_uvs_list() - num_faces_per_mesh += tex._num_faces_per_mesh - maps_list += tex.maps_list() - - new_tex = self.__class__( - maps=maps_list, - verts_uvs=verts_uvs_list, - faces_uvs=faces_uvs_list, - padding_mode=self.padding_mode, - align_corners=self.align_corners, - sampling_mode=self.sampling_mode, - ) - new_tex._num_faces_per_mesh = num_faces_per_mesh - return new_tex - - def _place_map_into_single_map( - self, single_map: torch.Tensor, map_: torch.Tensor, location: PackedRectangle - ) -> None: - """ - Copy map into a larger tensor single_map at the destination specified by location. - If align_corners is False, we add the needed border around the destination. - - Used by join_scene. - - Args: - single_map: (total_H, total_W, C) - map_: (H, W, C) source data - location: where to place map - """ - do_flip = location.flipped - source = map_.transpose(0, 1) if do_flip else map_ - border_width = 0 if self.align_corners else 1 - lower_u = location.x + border_width - lower_v = location.y + border_width - upper_u = lower_u + source.shape[0] - upper_v = lower_v + source.shape[1] - single_map[lower_u:upper_u, lower_v:upper_v] = source - - if self.padding_mode != "zeros" and not self.align_corners: - single_map[lower_u - 1, lower_v:upper_v] = single_map[ - lower_u, lower_v:upper_v - ] - single_map[upper_u, lower_v:upper_v] = single_map[ - upper_u - 1, lower_v:upper_v - ] - single_map[lower_u:upper_u, lower_v - 1] = single_map[ - lower_u:upper_u, lower_v - ] - single_map[lower_u:upper_u, upper_v] = single_map[ - lower_u:upper_u, upper_v - 1 - ] - single_map[lower_u - 1, lower_v - 1] = single_map[lower_u, lower_v] - single_map[lower_u - 1, upper_v] = single_map[lower_u, upper_v - 1] - single_map[upper_u, lower_v - 1] = single_map[upper_u - 1, lower_v] - single_map[upper_u, upper_v] = single_map[upper_u - 1, upper_v - 1] - - def join_scene(self) -> "TexturesUV": - """ - Return a new TexturesUV amalgamating the batch. - - We calculate a large single map which contains the original maps, - and find verts_uvs to point into it. This will not replicate - behavior of padding for verts_uvs values outside [0,1]. - - If align_corners=False, we need to add an artificial border around - every map. - - We use the function `pack_unique_rectangles` to provide a layout for - the single map. This means that if self was created with a list of maps, - and to() has not been called, and there were two maps which were exactly - the same tensor object, then they will become the same data in the unified map. - _place_map_into_single_map is used to copy the maps into the single map. - The merging of verts_uvs and faces_uvs is handled locally in this function. - """ - maps = self.maps_list() - heights_and_widths = [] - extra_border = 0 if self.align_corners else 2 - for map_ in maps: - heights_and_widths.append( - Rectangle( - map_.shape[0] + extra_border, map_.shape[1] + extra_border, id(map_) - ) - ) - merging_plan = pack_unique_rectangles(heights_and_widths) - C = maps[0].shape[-1] - single_map = maps[0].new_zeros((*merging_plan.total_size, C)) - verts_uvs = self.verts_uvs_list() - verts_uvs_merged = [] - - for map_, loc, uvs in zip(maps, merging_plan.locations, verts_uvs): - new_uvs = uvs.clone() - if loc.is_first: - self._place_map_into_single_map(single_map, map_, loc) - do_flip = loc.flipped - x_shape = map_.shape[1] if do_flip else map_.shape[0] - y_shape = map_.shape[0] if do_flip else map_.shape[1] - - if do_flip: - # Here we have flipped / transposed the map. - # In uvs, the y values are decreasing from 1 to 0 and the x - # values increase from 0 to 1. We subtract all values from 1 - # as the x's become y's and the y's become x's. - new_uvs = 1.0 - new_uvs[:, [1, 0]] - if TYPE_CHECKING: - new_uvs = torch.Tensor(new_uvs) - - # If align_corners is True, then an index of x (where x is in - # the range 0 .. map_.shape[1]-1) in one of the input maps - # was hit by a u of x/(map_.shape[1]-1). - # That x is located at the index loc[1] + x in the single_map, and - # to hit that we need u to equal (loc[1] + x) / (total_size[1]-1) - # so the old u should be mapped to - # { u*(map_.shape[1]-1) + loc[1] } / (total_size[1]-1) - - # Also, an index of y (where y is in - # the range 0 .. map_.shape[0]-1) in one of the input maps - # was hit by a v of 1 - y/(map_.shape[0]-1). - # That y is located at the index loc[0] + y in the single_map, and - # to hit that we need v to equal 1 - (loc[0] + y) / (total_size[0]-1) - # so the old v should be mapped to - # 1 - { (1-v)*(map_.shape[0]-1) + loc[0] } / (total_size[0]-1) - # = - # { v*(map_.shape[0]-1) + total_size[0] - map.shape[0] - loc[0] } - # / (total_size[0]-1) - - # If align_corners is False, then an index of x (where x is in - # the range 1 .. map_.shape[1]-2) in one of the input maps - # was hit by a u of (x+0.5)/(map_.shape[1]). - # That x is located at the index loc[1] + 1 + x in the single_map, - # (where the 1 is for the border) - # and to hit that we need u to equal (loc[1] + 1 + x + 0.5) / (total_size[1]) - # so the old u should be mapped to - # { loc[1] + 1 + u*map_.shape[1]-0.5 + 0.5 } / (total_size[1]) - # = { loc[1] + 1 + u*map_.shape[1] } / (total_size[1]) - - # Also, an index of y (where y is in - # the range 1 .. map_.shape[0]-2) in one of the input maps - # was hit by a v of 1 - (y+0.5)/(map_.shape[0]). - # That y is located at the index loc[0] + 1 + y in the single_map, - # (where the 1 is for the border) - # and to hit that we need v to equal 1 - (loc[0] + 1 + y + 0.5) / (total_size[0]) - # so the old v should be mapped to - # 1 - { loc[0] + 1 + (1-v)*map_.shape[0]-0.5 + 0.5 } / (total_size[0]) - # = { total_size[0] - loc[0] -1 - (1-v)*map_.shape[0] } - # / (total_size[0]) - # = { total_size[0] - loc[0] - map.shape[0] - 1 + v*map_.shape[0] } - # / (total_size[0]) - - # We change the y's in new_uvs for the scaling of height, - # and the x's for the scaling of width. - # That is why the 1's and 0's are mismatched in these lines. - one_if_align = 1 if self.align_corners else 0 - one_if_not_align = 1 - one_if_align - denom_x = merging_plan.total_size[0] - one_if_align - scale_x = x_shape - one_if_align - denom_y = merging_plan.total_size[1] - one_if_align - scale_y = y_shape - one_if_align - new_uvs[:, 1] *= scale_x / denom_x - new_uvs[:, 1] += ( - merging_plan.total_size[0] - x_shape - loc.x - one_if_not_align - ) / denom_x - new_uvs[:, 0] *= scale_y / denom_y - new_uvs[:, 0] += (loc.y + one_if_not_align) / denom_y - - verts_uvs_merged.append(new_uvs) - - faces_uvs_merged = [] - offset = 0 - for faces_uvs_, verts_uvs_ in zip(self.faces_uvs_list(), verts_uvs): - faces_uvs_merged.append(offset + faces_uvs_) - offset += verts_uvs_.shape[0] - - return self.__class__( - maps=[single_map], - verts_uvs=[torch.cat(verts_uvs_merged)], - faces_uvs=[torch.cat(faces_uvs_merged)], - align_corners=self.align_corners, - padding_mode=self.padding_mode, - sampling_mode=self.sampling_mode, - ) - - def centers_for_image(self, index: int) -> torch.Tensor: - """ - Return the locations in the texture map which correspond to the given - verts_uvs, for one of the meshes. This is potentially useful for - visualizing the data. See the texturesuv_image_matplotlib and - texturesuv_image_PIL functions. - - Args: - index: batch index of the mesh whose centers to return. - - Returns: - centers: coordinates of points in the texture image - - a FloatTensor of shape (V,2) - """ - if self._N != 1: - raise ValueError( - "This function only supports plotting textures for one mesh." - ) - texture_image = self.maps_padded() - verts_uvs = self.verts_uvs_list()[index][None] - _, H, W, _3 = texture_image.shape - coord1 = torch.arange(W).expand(H, W) - coord2 = torch.arange(H)[:, None].expand(H, W) - coords = torch.stack([coord1, coord2])[None] - with torch.no_grad(): - # Get xy cartesian coordinates based on the uv coordinates - centers = F.grid_sample( - torch.flip(coords.to(texture_image), [2]), - # Convert from [0, 1] -> [-1, 1] range expected by grid sample - verts_uvs[:, None] * 2.0 - 1, - mode=self.sampling_mode, - align_corners=self.align_corners, - padding_mode=self.padding_mode, - ).cpu() - centers = centers[0, :, 0].T - return centers - - def check_shapes( - self, batch_size: int, max_num_verts: int, max_num_faces: int - ) -> bool: - """ - Check if the dimensions of the verts/faces uvs match that of the mesh - """ - # (N, F) should be the same - # (N, V) is not guaranteed to be the same - return (self.faces_uvs_padded().shape[0:2] == (batch_size, max_num_faces)) and ( - self.verts_uvs_padded().shape[0] == batch_size - ) - - def submeshes( - self, - vertex_ids_list: List[List[torch.LongTensor]], - faces_ids_list: List[List[torch.LongTensor]], - ) -> "TexturesUV": - """ - Extract a sub-texture for use in a submesh. - - If the meshes batch corresponding to this TexturesUV contains - `n = len(faces_ids_list)` meshes, then self.faces_uvs_padded() - will be of length n. After submeshing, we obtain a batch of - `k = sum(len(f) for f in faces_ids_list` submeshes (see Meshes.submeshes). This - function creates a corresponding TexturesUV object with `faces_uvs_padded` - of length `k`. - - Args: - vertex_ids_list: Not used when submeshing TexturesUV. - - face_ids_list: A list of length equal to self.faces_uvs_padded. Each - element is a LongTensor listing the face ids that the submesh keeps in - each respective mesh. - - - Returns: - A "TexturesUV in which faces_uvs_padded, verts_uvs_padded, and maps_padded - have length sum(len(faces) for faces in faces_ids_list) - """ - - if len(faces_ids_list) != len(self.faces_uvs_padded()): - raise IndexError( - "faces_uvs_padded must be of " "the same length as face_ids_list." - ) - - sub_faces_uvs, sub_verts_uvs, sub_maps = [], [], [] - for faces_ids, faces_uvs, verts_uvs, map_ in zip( - faces_ids_list, - self.faces_uvs_padded(), - self.verts_uvs_padded(), - self.maps_padded(), - ): - for faces_ids_submesh in faces_ids: - sub_faces_uvs.append(faces_uvs[faces_ids_submesh]) - sub_verts_uvs.append(verts_uvs) - sub_maps.append(map_) - - return self.__class__( - sub_maps, - sub_faces_uvs, - sub_verts_uvs, - self.padding_mode, - self.align_corners, - self.sampling_mode, - ) - - -class TexturesVertex(TexturesBase): - def __init__( - self, - verts_features: Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]], - ) -> None: - """ - Batched texture representation where each vertex in a mesh - has a C dimensional feature vector. - - Args: - verts_features: list of (Vi, C) or (N, V, C) tensor giving a feature - vector with arbitrary dimensions for each vertex. - """ - if isinstance(verts_features, (tuple, list)): - correct_shape = all( - (torch.is_tensor(v) and v.ndim == 2) for v in verts_features - ) - if not correct_shape: - raise ValueError( - "Expected verts_features to be a list of tensors of shape (V, C)." - ) - - self._verts_features_list = verts_features - self._verts_features_padded = None - self.device = torch.device("cpu") - - # These values may be overridden when textures is - # passed into the Meshes constructor. For more details - # refer to the __init__ of Meshes. - self._N = len(verts_features) - self._num_verts_per_mesh = [len(fv) for fv in verts_features] - - if self._N > 0: - self.device = verts_features[0].device - - elif torch.is_tensor(verts_features): - if verts_features.ndim != 3: - msg = "Expected verts_features to be of shape (N, V, C); got %r" - raise ValueError(msg % repr(verts_features.shape)) - self._verts_features_padded = verts_features - self._verts_features_list = None - self.device = verts_features.device - - # These values may be overridden when textures is - # passed into the Meshes constructor. For more details - # refer to the __init__ of Meshes. - self._N = len(verts_features) - max_F = verts_features.shape[1] - self._num_verts_per_mesh = [max_F] * self._N - else: - raise ValueError("verts_features must be a tensor or list of tensors") - - # This is set inside the Meshes object when textures is - # passed into the Meshes constructor. For more details - # refer to the __init__ of Meshes. - self.valid = torch.ones((self._N,), dtype=torch.bool, device=self.device) - - def clone(self) -> "TexturesVertex": - tex = self.__class__(self.verts_features_padded().clone()) - if self._verts_features_list is not None: - tex._verts_features_list = [f.clone() for f in self._verts_features_list] - tex._num_verts_per_mesh = self._num_verts_per_mesh.copy() - tex.valid = self.valid.clone() - return tex - - def detach(self) -> "TexturesVertex": - tex = self.__class__(self.verts_features_padded().detach()) - if self._verts_features_list is not None: - tex._verts_features_list = [f.detach() for f in self._verts_features_list] - tex._num_verts_per_mesh = self._num_verts_per_mesh.copy() - tex.valid = self.valid.detach() - return tex - - def __getitem__(self, index) -> "TexturesVertex": - props = ["verts_features_list", "_num_verts_per_mesh"] - new_props = self._getitem(index, props) - verts_features = new_props["verts_features_list"] - if isinstance(verts_features, list): - # Handle the case of an empty list - if len(verts_features) == 0: - verts_features = torch.empty( - size=(0, 0, 3), - dtype=torch.float32, - device=self.verts_features_padded().device, - ) - new_tex = self.__class__(verts_features=verts_features) - elif torch.is_tensor(verts_features): - new_tex = self.__class__(verts_features=[verts_features]) - else: - raise ValueError("Not all values are provided in the correct format") - new_tex._num_verts_per_mesh = new_props["_num_verts_per_mesh"] - return new_tex - - def verts_features_padded(self) -> torch.Tensor: - if self._verts_features_padded is None: - if self.isempty(): - self._verts_features_padded = torch.zeros( - (self._N, 0, 3, 0), dtype=torch.float32, device=self.device - ) - else: - self._verts_features_padded = list_to_padded( - self._verts_features_list, pad_value=0.0 - ) - return self._verts_features_padded - - def verts_features_list(self) -> List[torch.Tensor]: - if self._verts_features_list is None: - if self.isempty(): - self._verts_features_list = [ - torch.empty((0, 3), dtype=torch.float32, device=self.device) - ] * self._N - else: - self._verts_features_list = padded_to_list( - self._verts_features_padded, split_size=self._num_verts_per_mesh - ) - return self._verts_features_list - - def verts_features_packed(self) -> torch.Tensor: - if self.isempty(): - return torch.zeros((self._N, 3, 0), dtype=torch.float32, device=self.device) - verts_features_list = self.verts_features_list() - return list_to_packed(verts_features_list)[0] - - def extend(self, N: int) -> "TexturesVertex": - new_props = self._extend(N, ["verts_features_padded", "_num_verts_per_mesh"]) - new_tex = self.__class__(verts_features=new_props["verts_features_padded"]) - new_tex._num_verts_per_mesh = new_props["_num_verts_per_mesh"] - return new_tex - - # pyre-fixme[14]: `sample_textures` overrides method defined in `TexturesBase` - # inconsistently. - def sample_textures(self, fragments, faces_packed=None) -> torch.Tensor: - """ - Determine the color for each rasterized face. Interpolate the colors for - vertices which form the face using the barycentric coordinates. - Args: - fragments: - The outputs of rasterization. From this we use - - - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices - of the faces (in the packed representation) which - overlap each pixel in the image. - - barycentric_coords: FloatTensor of shape (N, H, W, K, 3) specifying - the barycentric coordinates of each pixel - relative to the faces (in the packed - representation) which overlap the pixel. - - Returns: - texels: An texture per pixel of shape (N, H, W, K, C). - There will be one C dimensional value for each element in - fragments.pix_to_face. - """ - verts_features_packed = self.verts_features_packed() - faces_verts_features = verts_features_packed[faces_packed] - - texels = interpolate_face_attributes( - fragments.pix_to_face, fragments.bary_coords, faces_verts_features - ) - return texels - - def submeshes( - self, - vertex_ids_list: List[List[torch.LongTensor]], - faces_ids_list: List[List[torch.LongTensor]], - ) -> "TexturesVertex": - """ - Extract a sub-texture for use in a submesh. - - If the meshes batch corresponding to this TexturesVertex contains - `n = len(vertex_ids_list)` meshes, then self.verts_features_list() - will be of length n. After submeshing, we obtain a batch of - `k = sum(len(v) for v in vertex_ids_list` submeshes (see Meshes.submeshes). This - function creates a corresponding TexturesVertex object with `verts_features_list` - of length `k`. - - Args: - vertex_ids_list: A list of length equal to self.verts_features_list. Each - element is a LongTensor listing the vertices that the submesh keeps in - each respective mesh. - - face_ids_list: Not used when submeshing TexturesVertex. - - Returns: - A TexturesVertex in which verts_features_list has length - sum(len(vertices) for vertices in vertex_ids_list). Each element contains - vertex features corresponding to the subset of vertices in that submesh. - """ - if len(vertex_ids_list) != len(self.verts_features_list()): - raise IndexError( - "verts_features_list must be of " "the same length as vertex_ids_list." - ) - - sub_features = [] - for vertex_ids, features in zip(vertex_ids_list, self.verts_features_list()): - for vertex_ids_submesh in vertex_ids: - sub_features.append(features[vertex_ids_submesh]) - - return self.__class__(sub_features) - - def faces_verts_textures_packed(self, faces_packed=None) -> torch.Tensor: - """ - Samples texture from each vertex and for each face in the mesh. - For N meshes with {Fi} number of faces, it returns a - tensor of shape sum(Fi)x3xC (C = 3 for RGB). - You can use the utils function in structures.utils to convert the - packed representation to a list or padded. - """ - verts_features_packed = self.verts_features_packed() - faces_verts_features = verts_features_packed[faces_packed] - return faces_verts_features - - def join_batch(self, textures: List["TexturesVertex"]) -> "TexturesVertex": - """ - Join the list of textures given by `textures` to - self to create a batch of textures. Return a new - TexturesVertex object with the combined textures. - - Args: - textures: List of TexturesVertex objects - - Returns: - new_tex: TexturesVertex object with the combined - textures from self and the list `textures`. - """ - tex_types_same = all(isinstance(tex, TexturesVertex) for tex in textures) - if not tex_types_same: - raise ValueError("All textures must be of type TexturesVertex.") - - verts_features_list = [] - verts_features_list += self.verts_features_list() - num_verts_per_mesh = self._num_verts_per_mesh.copy() - for tex in textures: - verts_features_list += tex.verts_features_list() - num_verts_per_mesh += tex._num_verts_per_mesh - - new_tex = self.__class__(verts_features=verts_features_list) - new_tex._num_verts_per_mesh = num_verts_per_mesh - return new_tex - - def join_scene(self) -> "TexturesVertex": - """ - Return a new TexturesVertex amalgamating the batch. - """ - return self.__class__(verts_features=[torch.cat(self.verts_features_list())]) - - def check_shapes( - self, batch_size: int, max_num_verts: int, max_num_faces: int - ) -> bool: - """ - Check if the dimensions of the verts features match that of the mesh verts - """ - # (N, V) should be the same - return self.verts_features_padded().shape[:-1] == (batch_size, max_num_verts) diff --git a/pytorch3d/pytorch3d/renderer/mesh/utils.py b/pytorch3d/pytorch3d/renderer/mesh/utils.py deleted file mode 100644 index 6157c8704e2865e64d0b2bfbb733fa710650b2cf..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/mesh/utils.py +++ /dev/null @@ -1,318 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, NamedTuple, Tuple - -import torch -from pytorch3d.ops import interpolate_face_attributes - - -def _clip_barycentric_coordinates(bary) -> torch.Tensor: - """ - Args: - bary: barycentric coordinates of shape (...., 3) where `...` represents - an arbitrary number of dimensions - - Returns: - bary: Barycentric coordinates clipped (i.e any values < 0 are set to 0) - and renormalized. We only clip the negative values. Values > 1 will fall - into the [0, 1] range after renormalization. - The output is the same shape as the input. - """ - if bary.shape[-1] != 3: - msg = "Expected barycentric coords to have last dim = 3; got %r" - raise ValueError(msg % (bary.shape,)) - ndims = bary.ndim - 1 - mask = bary.eq(-1).all(dim=-1, keepdim=True).expand(*((-1,) * ndims + (3,))) - clipped = bary.clamp(min=0.0) - clipped[mask] = 0.0 - clipped_sum = torch.clamp(clipped.sum(dim=-1, keepdim=True), min=1e-5) - clipped = clipped / clipped_sum - clipped[mask] = -1.0 - return clipped - - -def _interpolate_zbuf( - pix_to_face: torch.Tensor, barycentric_coords: torch.Tensor, meshes -) -> torch.Tensor: - """ - A helper function to calculate the z buffer for each pixel in the - rasterized output. - - Args: - pix_to_face: LongTensor of shape (N, H, W, K) specifying the indices - of the faces (in the packed representation) which - overlap each pixel in the image. - barycentric_coords: FloatTensor of shape (N, H, W, K, 3) specifying - the barycentric coordinates of each pixel - relative to the faces (in the packed - representation) which overlap the pixel. - meshes: Meshes object representing a batch of meshes. - - Returns: - zbuffer: (N, H, W, K) FloatTensor - """ - verts = meshes.verts_packed() - faces = meshes.faces_packed() - faces_verts_z = verts[faces][..., 2][..., None] # (F, 3, 1) - zbuf = interpolate_face_attributes(pix_to_face, barycentric_coords, faces_verts_z)[ - ..., 0 - ] # (1, H, W, K) - zbuf[pix_to_face == -1] = -1 - return zbuf - - -# ----------- Rectangle Packing -------------------- # - - -class Rectangle(NamedTuple): - xsize: int - ysize: int - identifier: int - - -class PackedRectangle(NamedTuple): - x: int - y: int - flipped: bool - is_first: bool - - -class PackedRectangles(NamedTuple): - total_size: Tuple[int, int] - locations: List[PackedRectangle] - - -# Note the order of members matters here because it determines the queue order. -# We want to place longer rectangles first. -class _UnplacedRectangle(NamedTuple): - size: Tuple[int, int] - ind: int - flipped: bool - - -def _try_place_rectangle( - rect: _UnplacedRectangle, - placed_so_far: List[PackedRectangle], - occupied: List[Tuple[int, int]], -) -> bool: - """ - Try to place rect within the current bounding box. - Part of the implementation of pack_rectangles. - - Note that the arguments `placed_so_far` and `occupied` are modified. - - Args: - rect: rectangle to place - placed_so_far: the locations decided upon so far - a list of - (x, y, whether flipped). The nth element is the - location of the nth rectangle if it has been decided. - (modified in place) - occupied: the nodes of the graph of extents of rightmost placed - rectangles - (modified in place) - - Returns: - True on success. - - Example: - (We always have placed the first rectangle horizontally and other - rectangles above it.) - Let's say the placed boxes 1-4 are laid out like this. - The coordinates of the points marked X are stored in occupied. - It is to the right of the X's that we seek to place rect. - - +-----------------------X - |2 | - | +---X - | |4 | - | | | - | +---+X - | |3 | - | | | - +-----------------------+----+------X - y |1 | - ^ | --->x | - | +-----------------------------------+ - - We want to place this rectangle. - - +-+ - |5| - | | - | | = rect - | | - | | - | | - +-+ - - The call will succeed, returning True, leaving us with - - +-----------------------X - |2 | +-X - | +---+|5| - | |4 || | - | | || | - | +---++ | - | |3 | | - | | | | - +-----------------------+----+-+----X - |1 | - | | - +-----------------------------------+ . - - """ - total_width = occupied[0][0] - needed_height = rect.size[1] - current_start_idx = None - current_max_width = 0 - previous_height = 0 - currently_packed = 0 - for idx, interval in enumerate(occupied): - if interval[0] <= total_width - rect.size[0]: - currently_packed += interval[1] - previous_height - current_max_width = max(interval[0], current_max_width) - if current_start_idx is None: - current_start_idx = idx - if currently_packed >= needed_height: - current_max_width = max(interval[0], current_max_width) - placed_so_far[rect.ind] = PackedRectangle( - current_max_width, - occupied[current_start_idx - 1][1], - rect.flipped, - True, - ) - new_occupied = ( - current_max_width + rect.size[0], - occupied[current_start_idx - 1][1] + needed_height, - ) - if currently_packed == needed_height: - occupied[idx] = new_occupied - del occupied[current_start_idx:idx] - elif idx > current_start_idx: - occupied[idx - 1] = new_occupied - del occupied[current_start_idx : (idx - 1)] - else: - occupied.insert(idx, new_occupied) - return True - else: - current_start_idx = None - current_max_width = 0 - currently_packed = 0 - previous_height = interval[1] - return False - - -def pack_rectangles(sizes: List[Tuple[int, int]]) -> PackedRectangles: - """ - Naive rectangle packing in to a large rectangle. Flipping (i.e. rotating - a rectangle by 90 degrees) is allowed. - - This is used to join several uv maps into a single scene, see - TexturesUV.join_scene. - - Args: - sizes: List of sizes of rectangles to pack - - Returns: - total_size: size of total large rectangle - rectangles: location for each of the input rectangles. - This includes whether they are flipped. - The is_first field is always True. - """ - - if len(sizes) < 2: - raise ValueError("Cannot pack less than two boxes") - - queue = [] - for i, size in enumerate(sizes): - if size[0] < size[1]: - queue.append(_UnplacedRectangle((size[1], size[0]), i, True)) - else: - queue.append(_UnplacedRectangle((size[0], size[1]), i, False)) - queue.sort() - placed_so_far = [PackedRectangle(-1, -1, False, False)] * len(sizes) - - biggest = queue.pop() - total_width, current_height = biggest.size - placed_so_far[biggest.ind] = PackedRectangle(0, 0, biggest.flipped, True) - - second = queue.pop() - placed_so_far[second.ind] = PackedRectangle(0, current_height, second.flipped, True) - current_height += second.size[1] - occupied = [biggest.size, (second.size[0], current_height)] - - for rect in reversed(queue): - if _try_place_rectangle(rect, placed_so_far, occupied): - continue - - rotated = _UnplacedRectangle( - (rect.size[1], rect.size[0]), rect.ind, not rect.flipped - ) - if _try_place_rectangle(rotated, placed_so_far, occupied): - continue - - # rect wasn't placed in the current bounding box, - # so we add extra space to fit it in. - placed_so_far[rect.ind] = PackedRectangle(0, current_height, rect.flipped, True) - current_height += rect.size[1] - occupied.append((rect.size[0], current_height)) - - return PackedRectangles((total_width, current_height), placed_so_far) - - -def pack_unique_rectangles(rectangles: List[Rectangle]) -> PackedRectangles: - """ - Naive rectangle packing in to a large rectangle. Flipping (i.e. rotating - a rectangle by 90 degrees) is allowed. Inputs are deduplicated by their - identifier. - - This is a wrapper around pack_rectangles, where inputs come with an - identifier. In particular, it calls pack_rectangles for the deduplicated inputs, - then returns the values for all the inputs. The output for all rectangles with - the same identifier will be the same, except that only the first one will have - the is_first field True. - - This is used to join several uv maps into a single scene, see - TexturesUV.join_scene. - - Args: - rectangles: List of sizes of rectangles to pack - - Returns: - total_size: size of total large rectangle - rectangles: location for each of the input rectangles. - This includes whether they are flipped. - The is_first field is true for the first rectangle - with each identifier. - """ - - if len(rectangles) < 2: - raise ValueError("Cannot pack less than two boxes") - - input_map = {} - input_indices: List[Tuple[int, bool]] = [] - unique_input_sizes: List[Tuple[int, int]] = [] - for rectangle in rectangles: - if rectangle.identifier not in input_map: - unique_index = len(unique_input_sizes) - unique_input_sizes.append((rectangle.xsize, rectangle.ysize)) - input_map[rectangle.identifier] = unique_index - input_indices.append((unique_index, True)) - else: - unique_index = input_map[rectangle.identifier] - input_indices.append((unique_index, False)) - - if len(unique_input_sizes) == 1: - first = [PackedRectangle(0, 0, False, True)] - rest = (len(rectangles) - 1) * [PackedRectangle(0, 0, False, False)] - return PackedRectangles(unique_input_sizes[0], first + rest) - - total_size, unique_locations = pack_rectangles(unique_input_sizes) - full_locations = [] - for input_index, first in input_indices: - full_locations.append(unique_locations[input_index]._replace(is_first=first)) - - return PackedRectangles(total_size, full_locations) diff --git a/pytorch3d/pytorch3d/renderer/opengl/__init__.py b/pytorch3d/pytorch3d/renderer/opengl/__init__.py deleted file mode 100644 index f0f6b4c170125529009029391431083001f86d68..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/opengl/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# If we can access EGL, import MeshRasterizerOpenGL. -def _can_import_egl_and_pycuda(): - import os - import warnings - - try: - os.environ["PYOPENGL_PLATFORM"] = "egl" - import OpenGL.EGL - except (AttributeError, ImportError, ModuleNotFoundError): - warnings.warn( - "Can't import EGL, not importing MeshRasterizerOpenGL. This might happen if" - " your Python application imported OpenGL with a non-EGL backend before" - " importing PyTorch3D, or if you don't have pyopengl installed as part" - " of your Python distribution." - ) - return False - - try: - import pycuda.gl - except (ImportError, ImportError, ModuleNotFoundError): - warnings.warn("Can't import pycuda.gl, not importing MeshRasterizerOpenGL.") - return False - - return True - - -if _can_import_egl_and_pycuda(): - from .opengl_utils import EGLContext, global_device_context_store - from .rasterizer_opengl import MeshRasterizerOpenGL - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/renderer/opengl/opengl_utils.py b/pytorch3d/pytorch3d/renderer/opengl/opengl_utils.py deleted file mode 100644 index b854f067378b9dfb6ef5cedd2f34972a539e137a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/opengl/opengl_utils.py +++ /dev/null @@ -1,448 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Utilities useful for OpenGL rendering. -# -# NOTE: This module MUST be imported before any other OpenGL modules in this Python -# session, unless you set PYOPENGL_PLATFORM to egl *before* importing other modules. -# Otherwise, the imports below will throw an error. -# -# This module (as well as rasterizer_opengl) will not be imported into pytorch3d if -# you do not have pycuda.gl and pyopengl installed. - -import contextlib -import ctypes -import os -import threading -from typing import Any, Dict - - -os.environ["PYOPENGL_PLATFORM"] = "egl" -import OpenGL.EGL as egl # noqa - -import pycuda.driver as cuda # noqa -from OpenGL._opaque import opaque_pointer_cls # noqa -from OpenGL.raw.EGL._errors import EGLError # noqa - -# A few constants necessary to use EGL extensions, see links for details. - -# https://www.khronos.org/registry/EGL/extensions/EXT/EGL_EXT_platform_device.txt -EGL_PLATFORM_DEVICE_EXT = 0x313F -# https://www.khronos.org/registry/EGL/extensions/NV/EGL_NV_device_cuda.txt -EGL_CUDA_DEVICE_NV = 0x323A - - -# To use EGL extensions, we need to tell OpenGL about them. For details, see -# https://developer.nvidia.com/blog/egl-eye-opengl-visualization-without-x-server/. -# To avoid garbage collection of the protos, we'll store them in a module-global list. -def _define_egl_extension(name: str, type): - if hasattr(egl, name): - return - addr = egl.eglGetProcAddress(name) - if addr is None: - raise RuntimeError(f"Cannot find EGL extension {name}.") - else: - proto = ctypes.CFUNCTYPE(type) - func = proto(addr) - setattr(egl, name, func) - return proto - - -_protos = [] -_protos.append(_define_egl_extension("eglGetPlatformDisplayEXT", egl.EGLDisplay)) -_protos.append(_define_egl_extension("eglQueryDevicesEXT", egl.EGLBoolean)) -_protos.append(_define_egl_extension("eglQueryDeviceAttribEXT", egl.EGLBoolean)) -_protos.append(_define_egl_extension("eglQueryDisplayAttribEXT", egl.EGLBoolean)) -_protos.append(_define_egl_extension("eglQueryDeviceStringEXT", ctypes.c_char_p)) - -if not hasattr(egl, "EGLDeviceEXT"): - egl.EGLDeviceEXT = opaque_pointer_cls("EGLDeviceEXT") - - -def _egl_convert_to_int_array(egl_attributes): - """ - Convert a Python dict of EGL attributes into an array of ints (some of which are - special EGL ints. - - Args: - egl_attributes: A dict where keys are EGL attributes, and values are their vals. - - Returns: - A c-list of length 2 * len(egl_attributes) + 1, of the form [key1, val1, ..., - keyN, valN, EGL_NONE] - """ - attributes_list = sum(([k, v] for k, v in egl_attributes.items()), []) + [ - egl.EGL_NONE - ] - return (egl.EGLint * len(attributes_list))(*attributes_list) - - -def _get_cuda_device(requested_device_id: int): - """ - Find an EGL device with a given CUDA device ID. - - Args: - requested_device_id: The desired CUDA device ID, e.g. "1" for "cuda:1". - - Returns: - EGL device with the desired CUDA ID. - """ - num_devices = egl.EGLint() - if ( - # pyre-ignore Undefined attribute [16] - not egl.eglQueryDevicesEXT(0, None, ctypes.pointer(num_devices)) - or num_devices.value < 1 - ): - raise RuntimeError("EGL requires a system that supports at least one device.") - devices = (egl.EGLDeviceEXT * num_devices.value)() # array of size num_devices - if ( - # pyre-ignore Undefined attribute [16] - not egl.eglQueryDevicesEXT( - num_devices.value, devices, ctypes.pointer(num_devices) - ) - or num_devices.value < 1 - ): - raise RuntimeError("EGL sees no available devices.") - if len(devices) < requested_device_id + 1: - raise ValueError( - f"Device {requested_device_id} not available. Found only {len(devices)} devices." - ) - - # Iterate over all the EGL devices, and check if their CUDA ID matches the request. - for device in devices: - available_device_id = egl.EGLAttrib(ctypes.c_int(-1)) - # pyre-ignore Undefined attribute [16] - egl.eglQueryDeviceAttribEXT(device, EGL_CUDA_DEVICE_NV, available_device_id) - if available_device_id.contents.value == requested_device_id: - return device - raise ValueError( - f"Found {len(devices)} CUDA devices, but none with CUDA id {requested_device_id}." - ) - - -def _get_egl_config(egl_dpy, surface_type): - """ - Get an EGL config with reasonable settings (for use with MeshRasterizerOpenGL). - - Args: - egl_dpy: An EGL display constant (int). - surface_type: An EGL surface_type int. - - Returns: - An EGL config object. - - Throws: - ValueError if the desired config is not available or invalid. - """ - egl_config_dict = { - egl.EGL_RED_SIZE: 8, - egl.EGL_GREEN_SIZE: 8, - egl.EGL_BLUE_SIZE: 8, - egl.EGL_ALPHA_SIZE: 8, - egl.EGL_DEPTH_SIZE: 24, - egl.EGL_STENCIL_SIZE: egl.EGL_DONT_CARE, - egl.EGL_RENDERABLE_TYPE: egl.EGL_OPENGL_BIT, - egl.EGL_SURFACE_TYPE: surface_type, - } - egl_config_array = _egl_convert_to_int_array(egl_config_dict) - egl_config = egl.EGLConfig() - num_configs = egl.EGLint() - if ( - not egl.eglChooseConfig( - egl_dpy, - egl_config_array, - ctypes.pointer(egl_config), - 1, - ctypes.pointer(num_configs), - ) - or num_configs.value == 0 - ): - raise ValueError("Invalid EGL config.") - return egl_config - - -class EGLContext: - """ - A class representing an EGL context. In short, EGL allows us to render OpenGL con- - tent in a headless mode, that is without an actual display to render to. This capa- - bility enables MeshRasterizerOpenGL to render on the GPU and then transfer the re- - sults to PyTorch3D. - """ - - def __init__(self, width: int, height: int, cuda_device_id: int = 0) -> None: - """ - Args: - width: Width of the "display" to render to. - height: Height of the "display" to render to. - cuda_device_id: Device ID to render to, in the CUDA convention (note that - this might be different than EGL's device numbering). - """ - # Lock used to prevent multiple threads from rendering on the same device - # at the same time, creating/destroying contexts at the same time, etc. - self.lock = threading.Lock() - self.cuda_device_id = cuda_device_id - self.device = _get_cuda_device(self.cuda_device_id) - self.width = width - self.height = height - self.dpy = egl.eglGetPlatformDisplayEXT( - EGL_PLATFORM_DEVICE_EXT, self.device, None - ) - major, minor = egl.EGLint(), egl.EGLint() - - # Initialize EGL components: the display, surface, and context - egl.eglInitialize(self.dpy, ctypes.pointer(major), ctypes.pointer(minor)) - - config = _get_egl_config(self.dpy, egl.EGL_PBUFFER_BIT) - pb_surf_attribs = _egl_convert_to_int_array( - { - egl.EGL_WIDTH: width, - egl.EGL_HEIGHT: height, - } - ) - self.surface = egl.eglCreatePbufferSurface(self.dpy, config, pb_surf_attribs) - if self.surface == egl.EGL_NO_SURFACE: - raise RuntimeError("Failed to create an EGL surface.") - - if not egl.eglBindAPI(egl.EGL_OPENGL_API): - raise RuntimeError("Failed to bind EGL to the OpenGL API.") - self.context = egl.eglCreateContext(self.dpy, config, egl.EGL_NO_CONTEXT, None) - if self.context == egl.EGL_NO_CONTEXT: - raise RuntimeError("Failed to create an EGL context.") - - @contextlib.contextmanager - def active_and_locked(self): - """ - A context manager used to make sure a given EGL context is only current in - a single thread at a single time. It is recommended to ALWAYS use EGL within - a `with context.active_and_locked():` context. - - Throws: - EGLError when the context cannot be made current or make non-current. - """ - self.lock.acquire() - egl.eglMakeCurrent(self.dpy, self.surface, self.surface, self.context) - try: - yield - finally: - egl.eglMakeCurrent( - self.dpy, egl.EGL_NO_SURFACE, egl.EGL_NO_SURFACE, egl.EGL_NO_CONTEXT - ) - self.lock.release() - - def get_context_info(self) -> Dict[str, Any]: - """ - Return context info. Useful for debugging. - - Returns: - A dict of keys and ints, representing the context's display, surface, - the context itself, and the current thread. - """ - return { - "dpy": self.dpy, - "surface": self.surface, - "context": self.context, - "thread": threading.get_ident(), - } - - def release(self): - """ - Release the context's resources. - """ - self.lock.acquire() - try: - if self.surface: - egl.eglDestroySurface(self.dpy, self.surface) - if self.context and self.dpy: - egl.eglDestroyContext(self.dpy, self.context) - egl.eglMakeCurrent( - self.dpy, egl.EGL_NO_SURFACE, egl.EGL_NO_SURFACE, egl.EGL_NO_CONTEXT - ) - if self.dpy: - egl.eglTerminate(self.dpy) - except EGLError as err: - print( - f"EGL could not release context on device cuda:{self.cuda_device_id}." - " This can happen if you created two contexts on the same device." - " Instead, you can use DeviceContextStore to use a single context" - " per device, and EGLContext.make_(in)active_in_current_thread to" - " (in)activate the context as needed." - ) - raise err - - egl.eglReleaseThread() - self.lock.release() - - -class _DeviceContextStore: - """ - DeviceContextStore provides thread-safe storage for EGL and pycuda contexts. It - should not be used directly. opengl_utils instantiates a module-global variable - called opengl_utils.global_device_context_store. MeshRasterizerOpenGL uses this - store to avoid unnecessary context creation and destruction. - - The EGL/CUDA contexts are not meant to be created and destroyed all the time, - and having multiple on a single device can be troublesome. Intended use is entirely - transparent to the user:: - - rasterizer1 = MeshRasterizerOpenGL(...some args...) - mesh1 = load_mesh_on_cuda_0() - - # Now rasterizer1 will request EGL/CUDA contexts from - # global_device_context_store on cuda:0, and since there aren't any, the - # store will create new ones. - rasterizer1.rasterize(mesh1) - - # rasterizer2 also needs EGL & CUDA contexts. But global_context_store - # already has them for cuda:0. Instead of creating new contexts, the store - # will tell rasterizer2 to use them. - rasterizer2 = MeshRasterizerOpenGL(dcs) - rasterize2.rasterize(mesh1) - - # When rasterizer1 needs to render on cuda:1, the store will create new contexts. - mesh2 = load_mesh_on_cuda_1() - rasterizer1.rasterize(mesh2) - - """ - - def __init__(self): - cuda.init() - # pycuda contexts, at most one per device. - self._cuda_contexts = {} - # EGL contexts, at most one per device. - self._egl_contexts = {} - # Any extra per-device data (e.g. precompiled GL objects). - self._context_data = {} - # Lock for DeviceContextStore used in multithreaded multidevice scenarios. - self._lock = threading.Lock() - # All EGL contexts created by this store will have this resolution. - self.max_egl_width = 2048 - self.max_egl_height = 2048 - - def get_cuda_context(self, device): - """ - Return a pycuda's CUDA context on a given CUDA device. If we have not created - such a context yet, create a new one and store it in a dict. The context is - popped (you need to call context.push() to start using it). This function - is thread-safe. - - Args: - device: A torch.device. - - Returns: A pycuda context corresponding to the given device. - """ - cuda_device_id = device.index - with self._lock: - if cuda_device_id not in self._cuda_contexts: - self._cuda_contexts[cuda_device_id] = _init_cuda_context(cuda_device_id) - self._cuda_contexts[cuda_device_id].pop() - return self._cuda_contexts[cuda_device_id] - - def get_egl_context(self, device): - """ - Return an EGL context on a given CUDA device. If we have not created such a - context yet, create a new one and store it in a dict. The context if not current - (you should use the `with egl_context.active_and_locked:` context manager when - you need it to be current). This function is thread-safe. - - Args: - device: A torch.device. - - Returns: An EGLContext on the requested device. The context will have size - self.max_egl_width and self.max_egl_height. - """ - cuda_device_id = device.index - with self._lock: - egl_context = self._egl_contexts.get(cuda_device_id, None) - if egl_context is None: - self._egl_contexts[cuda_device_id] = EGLContext( - self.max_egl_width, self.max_egl_height, cuda_device_id - ) - return self._egl_contexts[cuda_device_id] - - def set_context_data(self, device, value): - """ - Set arbitrary data in a per-device dict. - - This function is intended for storing precompiled OpenGL objects separately for - EGL contexts on different devices. Each such context needs a separate compiled - OpenGL program, but (in case e.g. of MeshRasterizerOpenGL) there's no need to - re-compile it each time we move the rasterizer to the same device repeatedly, - as it happens when using DataParallel. - - Args: - device: A torch.device - value: An arbitrary Python object. - """ - - cuda_device_id = device.index - self._context_data[cuda_device_id] = value - - def get_context_data(self, device): - """ - Get arbitrary data in a per-device dict. See set_context_data for more detail. - - Args: - device: A torch.device - - Returns: - The most recent object stored using set_context_data. - """ - cuda_device_id = device.index - return self._context_data.get(cuda_device_id, None) - - def release(self): - """ - Release all CUDA and EGL contexts. - """ - for context in self._cuda_contexts.values(): - context.detach() - - for context in self._egl_contexts.values(): - context.release() - - -def _init_cuda_context(device_id: int = 0): - """ - Initialize a pycuda context on a chosen device. - - Args: - device_id: int, specifies which GPU to use. - - Returns: - A pycuda Context. - """ - # pyre-ignore Undefined attribute [16] - device = cuda.Device(device_id) - cuda_context = device.make_context() - return cuda_context - - -def _torch_to_opengl(torch_tensor, cuda_context, cuda_buffer): - # CUDA access to the OpenGL buffer is only allowed within a map-unmap block. - cuda_context.push() - mapping_obj = cuda_buffer.map() - - # data_ptr points to the OpenGL shader storage buffer memory. - data_ptr, sz = mapping_obj.device_ptr_and_size() - - # Copy the torch tensor to the OpenGL buffer directly on device. - cuda_copy = cuda.Memcpy2D() - cuda_copy.set_src_device(torch_tensor.data_ptr()) - cuda_copy.set_dst_device(data_ptr) - cuda_copy.width_in_bytes = cuda_copy.src_pitch = cuda_copy.dst_ptch = ( - torch_tensor.shape[1] * 4 - ) - cuda_copy.height = torch_tensor.shape[0] - cuda_copy(False) - - # Unmap and pop the cuda context to make sure OpenGL won't interfere with - # PyTorch ops down the line. - mapping_obj.unmap() - cuda_context.pop() - - -# Initialize a global _DeviceContextStore. Almost always we will only need a single one. -global_device_context_store = _DeviceContextStore() diff --git a/pytorch3d/pytorch3d/renderer/opengl/rasterizer_opengl.py b/pytorch3d/pytorch3d/renderer/opengl/rasterizer_opengl.py deleted file mode 100644 index cf61d0d722bcc9f3a5bad7485277872d68f3a187..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/opengl/rasterizer_opengl.py +++ /dev/null @@ -1,711 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# NOTE: This module (as well as rasterizer_opengl) will not be imported into pytorch3d -# if you do not have pycuda.gl and pyopengl installed. In addition, please make sure -# your Python application *does not* import OpenGL before importing PyTorch3D, unless -# you are using the EGL backend. -import warnings -from typing import Optional, Tuple, Union - -import numpy as np -import OpenGL.GL as gl -import pycuda.gl -import torch - -import torch.nn as nn - -from pytorch3d.structures.meshes import Meshes - -from ..cameras import FoVOrthographicCameras, FoVPerspectiveCameras -from ..mesh.rasterizer import Fragments, RasterizationSettings -from ..utils import parse_image_size - -from .opengl_utils import _torch_to_opengl, global_device_context_store - -# Shader strings, used below to compile an OpenGL program. -vertex_shader = """ -// The vertex shader does nothing. -#version 430 - -void main() { } -""" - -geometry_shader = """ -#version 430 - -layout (points) in; -layout (triangle_strip, max_vertices = 3) out; - -out layout (location = 0) vec2 bary_coords; -out layout (location = 1) float depth; -out layout (location = 2) float p2f; - -layout(binding=0) buffer triangular_mesh { float mesh_buffer[]; }; - -uniform mat4 perspective_projection; - -vec3 get_vertex_position(int vertex_index) { - int offset = gl_PrimitiveIDIn * 9 + vertex_index * 3; - return vec3( - mesh_buffer[offset], - mesh_buffer[offset + 1], - mesh_buffer[offset + 2] - ); -} - -void main() { - vec3 positions[3] = { - get_vertex_position(0), - get_vertex_position(1), - get_vertex_position(2) - }; - vec4 projected_vertices[3] = { - perspective_projection * vec4(positions[0], 1.0), - perspective_projection * vec4(positions[1], 1.0), - perspective_projection * vec4(positions[2], 1.0) - }; - - for (int i = 0; i < 3; ++i) { - gl_Position = projected_vertices[i]; - bary_coords = vec2(i==0 ? 1.0 : 0.0, i==1 ? 1.0 : 0.0); - // At the moment, we output depth as the distance from the image plane in - // view coordinates -- NOT distance along the camera ray. - depth = positions[i][2]; - p2f = gl_PrimitiveIDIn; - EmitVertex(); - } - EndPrimitive(); -} -""" - -fragment_shader = """ -#version 430 - -in layout(location = 0) vec2 bary_coords; -in layout(location = 1) float depth; -in layout(location = 2) float p2f; - - -out vec4 bary_depth_p2f; - -void main() { - bary_depth_p2f = vec4(bary_coords, depth, round(p2f)); -} -""" - - -def _parse_and_verify_image_size( - image_size: Union[Tuple[int, int], int], -) -> Tuple[int, int]: - """ - Parse image_size as a tuple of ints. Throw ValueError if the size is incompatible - with the maximum renderable size as set in global_device_context_store. - """ - height, width = parse_image_size(image_size) - max_h = global_device_context_store.max_egl_height - max_w = global_device_context_store.max_egl_width - if height > max_h or width > max_w: - raise ValueError( - f"Max rasterization size is height={max_h}, width={max_w}. " - f"Cannot raster an image of size {height}, {width}. You can change max " - "allowed rasterization size by modifying the MAX_EGL_HEIGHT and " - "MAX_EGL_WIDTH environment variables." - ) - return height, width - - -class MeshRasterizerOpenGL(nn.Module): - """ - EXPERIMENTAL, USE WITH CAUTION - - This class implements methods for rasterizing a batch of heterogeneous - Meshes using OpenGL. This rasterizer, as opposed to MeshRasterizer, is - *not differentiable* and needs to be used with shading methods such as - SplatterPhongShader, which do not require differentiable rasterizerization. - It is, however, faster: on a 2M-faced mesh, about 20x so. - - Fragments output by MeshRasterizerOpenGL and MeshRasterizer should have near - identical pix_to_face, bary_coords and zbuf. However, MeshRasterizerOpenGL does not - return Fragments.dists which is only relevant to SoftPhongShader and - SoftSilhouetteShader. These do not work with MeshRasterizerOpenGL (because it is - not differentiable). - """ - - def __init__( - self, - cameras: Optional[Union[FoVOrthographicCameras, FoVPerspectiveCameras]] = None, - raster_settings=None, - ) -> None: - """ - Args: - cameras: A cameras object which has a `transform_points` method - which returns the transformed points after applying the - world-to-view and view-to-ndc transformations. Currently, only FoV - cameras are supported. - raster_settings: the parameters for rasterization. This should be a - named tuple. - """ - super().__init__() - if raster_settings is None: - raster_settings = RasterizationSettings() - self.raster_settings = raster_settings - _check_raster_settings(self.raster_settings) - self.cameras = cameras - self.image_size = _parse_and_verify_image_size(self.raster_settings.image_size) - - self.opengl_machinery = _OpenGLMachinery( - max_faces=self.raster_settings.max_faces_opengl, - ) - - def forward(self, meshes_world: Meshes, **kwargs) -> Fragments: - """ - Args: - meshes_world: a Meshes object representing a batch of meshes with - coordinates in world space. The batch must live on a GPU. - - Returns: - Fragments: Rasterization outputs as a named tuple. These are different than - Fragments returned by MeshRasterizer in two ways. First, we return no - `dist` which is only relevant to SoftPhongShader which doesn't work - with MeshRasterizerOpenGL (because it is not differentiable). Second, - the zbuf uses the opengl zbuf convention, where the z-vals are between 0 - (at projection plane) and 1 (at clipping distance), and are a non-linear - function of the depth values of the camera ray intersections. In - contrast, MeshRasterizer's zbuf values are simply the distance of each - ray intersection from the camera. - - Throws: - ValueError if meshes_world lives on the CPU. - """ - if meshes_world.device == torch.device("cpu"): - raise ValueError("MeshRasterizerOpenGL works only on CUDA devices.") - - raster_settings = kwargs.get("raster_settings", self.raster_settings) - _check_raster_settings(raster_settings) - - image_size = ( - _parse_and_verify_image_size(raster_settings.image_size) or self.image_size - ) - - # OpenGL needs vertices in NDC coordinates with un-flipped xy directions. - cameras_unpacked = kwargs.get("cameras", self.cameras) - _check_cameras(cameras_unpacked) - meshes_gl_ndc = _convert_meshes_to_gl_ndc( - meshes_world, image_size, cameras_unpacked, **kwargs - ) - - # Perspective projection will happen within the OpenGL rasterizer. - projection_matrix = cameras_unpacked.get_projection_transform(**kwargs)._matrix - - # Run OpenGL rasterization machinery. - pix_to_face, bary_coords, zbuf = self.opengl_machinery( - meshes_gl_ndc, projection_matrix, image_size - ) - - # Return the Fragments and detach, because gradients don't go through OpenGL. - return Fragments( - pix_to_face=pix_to_face, - zbuf=zbuf, - bary_coords=bary_coords, - dists=None, - ).detach() - - def to(self, device): - # Manually move to device cameras as it is not a subclass of nn.Module - if self.cameras is not None: - self.cameras = self.cameras.to(device) - - # Create a new OpenGLMachinery, as its member variables can be tied to a GPU. - self.opengl_machinery = _OpenGLMachinery( - max_faces=self.raster_settings.max_faces_opengl, - ) - - -class _OpenGLMachinery: - """ - A class holding OpenGL machinery used by MeshRasterizerOpenGL. - """ - - def __init__( - self, - max_faces: int = 10_000_000, - ) -> None: - self.max_faces = max_faces - self.program = None - - # These will be created on an appropriate GPU each time we render a new mesh on - # that GPU for the first time. - self.egl_context = None - self.cuda_context = None - self.perspective_projection_uniform = None - self.mesh_buffer_object = None - self.vao = None - self.fbo = None - self.cuda_buffer = None - - def __call__( - self, - meshes_gl_ndc: Meshes, - projection_matrix: torch.Tensor, - image_size: Tuple[int, int], - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Rasterize a batch of meshes, using a given batch of projection matrices and - image size. - - Args: - meshes_gl_ndc: A Meshes object, with vertices in the OpenGL NDC convention. - projection_matrix: A 3x3 camera projection matrix, or a tensor of projection - matrices equal in length to the number of meshes in meshes_gl_ndc. - image_size: Image size to rasterize. Must be smaller than the max height and - width stored in global_device_context_store. - - Returns: - pix_to_faces: A BHW1 tensor of ints, filled with -1 where no face projects - to a given pixel. - bary_coords: A BHW3 float tensor, filled with -1 where no face projects to - a given pixel. - zbuf: A BHW1 float tensor, filled with 1 where no face projects to a given - pixel. NOTE: this zbuf uses the opengl zbuf convention, where the z-vals - are between 0 (at projection plane) and 1 (at clipping distance), and - are a non-linear function of the depth values of the camera ray inter- - sections. - """ - - self.initialize_device_data(meshes_gl_ndc.device) - with self.egl_context.active_and_locked(): - # Perspective projection happens in OpenGL. Move the matrix over if there's only - # a single camera shared by all the meshes. - if projection_matrix.shape[0] == 1: - self._projection_matrix_to_opengl(projection_matrix) - - pix_to_faces = [] - bary_coords = [] - zbufs = [] - - # pyre-ignore Incompatible parameter type [6] - for mesh_id, mesh in enumerate(meshes_gl_ndc): - pix_to_face, bary_coord, zbuf = self._rasterize_mesh( - mesh, - image_size, - projection_matrix=projection_matrix[mesh_id] - if projection_matrix.shape[0] > 1 - else None, - ) - pix_to_faces.append(pix_to_face) - bary_coords.append(bary_coord) - zbufs.append(zbuf) - - return ( - torch.cat(pix_to_faces, dim=0), - torch.cat(bary_coords, dim=0), - torch.cat(zbufs, dim=0), - ) - - def initialize_device_data(self, device) -> None: - """ - Initialize data specific to a GPU device: the EGL and CUDA contexts, the OpenGL - program, as well as various buffer and array objects used to communicate with - OpenGL. - - Args: - device: A torch.device. - """ - self.egl_context = global_device_context_store.get_egl_context(device) - self.cuda_context = global_device_context_store.get_cuda_context(device) - - # self.program represents the OpenGL program we use for rasterization. - if global_device_context_store.get_context_data(device) is None: - with self.egl_context.active_and_locked(): - self.program = self._compile_and_link_gl_program() - self._set_up_gl_program_properties(self.program) - - # Create objects used to transfer data into and out of the program. - ( - self.perspective_projection_uniform, - self.mesh_buffer_object, - self.vao, - self.fbo, - ) = self._prepare_persistent_opengl_objects( - self.program, - self.max_faces, - ) - - # Register the input buffer with pycuda, to transfer data directly into it. - self.cuda_context.push() - self.cuda_buffer = pycuda.gl.RegisteredBuffer( - int(self.mesh_buffer_object), - pycuda.gl.graphics_map_flags.WRITE_DISCARD, - ) - self.cuda_context.pop() - - global_device_context_store.set_context_data( - device, - ( - self.program, - self.perspective_projection_uniform, - self.mesh_buffer_object, - self.vao, - self.fbo, - self.cuda_buffer, - ), - ) - ( - self.program, - self.perspective_projection_uniform, - self.mesh_buffer_object, - self.vao, - self.fbo, - self.cuda_buffer, - ) = global_device_context_store.get_context_data(device) - - def release(self) -> None: - """ - Release CUDA and OpenGL resources. - """ - # Finish all current operations. - torch.cuda.synchronize() - self.cuda_context.synchronize() - - # Free pycuda resources. - self.cuda_context.push() - self.cuda_buffer.unregister() - self.cuda_context.pop() - - # Free GL resources. - gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, self.fbo) - gl.glDeleteFramebuffers(1, [self.fbo]) - gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0) - del self.fbo - - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, self.mesh_buffer_object) - gl.glDeleteBuffers(1, [self.mesh_buffer_object]) - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, 0) - del self.mesh_buffer_object - - gl.glDeleteProgram(self.program) - self.egl_context.release() - - def _projection_matrix_to_opengl(self, projection_matrix: torch.Tensor) -> None: - """ - Transfer a torch projection matrix to OpenGL. - - Args: - projection matrix: A 3x3 float tensor. - """ - gl.glUseProgram(self.program) - gl.glUniformMatrix4fv( - self.perspective_projection_uniform, - 1, - gl.GL_FALSE, - projection_matrix.detach().flatten().cpu().numpy().astype(np.float32), - ) - gl.glUseProgram(0) - - def _rasterize_mesh( - self, - mesh: Meshes, - image_size: Tuple[int, int], - projection_matrix: Optional[torch.Tensor] = None, - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Rasterize a single mesh using OpenGL. - - Args: - mesh: A Meshes object, containing a single mesh only. - projection_matrix: A 3x3 camera projection matrix, or a tensor of projection - matrices equal in length to the number of meshes in meshes_gl_ndc. - image_size: Image size to rasterize. Must be smaller than the max height and - width stored in global_device_context_store. - - Returns: - pix_to_faces: A 1HW1 tensor of ints, filled with -1 where no face projects - to a given pixel. - bary_coords: A 1HW3 float tensor, filled with -1 where no face projects to - a given pixel. - zbuf: A 1HW1 float tensor, filled with 1 where no face projects to a given - pixel. NOTE: this zbuf uses the opengl zbuf convention, where the z-vals - are between 0 (at projection plane) and 1 (at clipping distance), and - are a non-linear function of the depth values of the camera ray inter- - sections. - """ - height, width = image_size - # Extract face_verts and move them to OpenGL as well. We use pycuda to - # directly move the vertices on the GPU, to avoid a costly torch/GPU -> CPU - # -> openGL/GPU trip. - verts_packed = mesh.verts_packed().detach() - faces_packed = mesh.faces_packed().detach() - face_verts = verts_packed[faces_packed].reshape(-1, 9) - _torch_to_opengl(face_verts, self.cuda_context, self.cuda_buffer) - - if projection_matrix is not None: - self._projection_matrix_to_opengl(projection_matrix) - - # Start OpenGL operations. - gl.glUseProgram(self.program) - - # Render an image of size (width, height). - gl.glViewport(0, 0, width, height) - - gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, self.fbo) - # Clear the output framebuffer. The "background" value for both pix_to_face - # as well as bary_coords is -1 (background = pixels which the rasterizer - # projected no triangle to). - gl.glClearColor(-1.0, -1.0, -1.0, -1.0) - gl.glClearDepth(1.0) - # pyre-ignore Unsupported operand [58] - gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT) - - # Run the actual rendering. The face_verts were transported to the OpenGL - # program into a shader storage buffer which is used directly in the geometry - # shader. Here, we only pass the number of these vertices to the vertex shader - # (which doesn't do anything and passes directly to the geometry shader). - gl.glBindVertexArray(self.vao) - gl.glDrawArrays(gl.GL_POINTS, 0, len(face_verts)) - gl.glBindVertexArray(0) - - # Read out the result. We ignore the depth buffer. The RGBA color buffer stores - # barycentrics in the RGB component and pix_to_face in the A component. - bary_depth_p2f_gl = gl.glReadPixels( - 0, - 0, - width, - height, - gl.GL_RGBA, - gl.GL_FLOAT, - ) - - gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0) - gl.glUseProgram(0) - - # Create torch tensors containing the results. - bary_depth_p2f = ( - torch.frombuffer(bary_depth_p2f_gl, dtype=torch.float) - .reshape(1, height, width, 1, -1) - .to(verts_packed.device) - ) - - # Read out barycentrics. GL only outputs the first two, so we need to compute - # the third one and make sure we still leave no-intersection pixels with -1. - barycentric_coords = torch.cat( - [ - bary_depth_p2f[..., :2], - 1.0 - bary_depth_p2f[..., 0:1] - bary_depth_p2f[..., 1:2], - ], - dim=-1, - ) - barycentric_coords = torch.where( - barycentric_coords == 3, -1, barycentric_coords - ) - depth = bary_depth_p2f[..., 2:3].squeeze(-1) - pix_to_face = bary_depth_p2f[..., -1].long() - - return pix_to_face, barycentric_coords, depth - - @staticmethod - def _compile_and_link_gl_program(): - """ - Compile the vertex, geometry, and fragment shaders and link them into an OpenGL - program. The shader sources are strongly inspired by https://github.com/tensorflow/ - graphics/blob/master/tensorflow_graphics/rendering/opengl/rasterization_backend.py. - - Returns: - An OpenGL program for mesh rasterization. - """ - program = gl.glCreateProgram() - shader_objects = [] - - for shader_string, shader_type in zip( - [vertex_shader, geometry_shader, fragment_shader], - [gl.GL_VERTEX_SHADER, gl.GL_GEOMETRY_SHADER, gl.GL_FRAGMENT_SHADER], - ): - shader_objects.append(gl.glCreateShader(shader_type)) - gl.glShaderSource(shader_objects[-1], shader_string) - - gl.glCompileShader(shader_objects[-1]) - status = gl.glGetShaderiv(shader_objects[-1], gl.GL_COMPILE_STATUS) - if status == gl.GL_FALSE: - gl.glDeleteShader(shader_objects[-1]) - gl.glDeleteProgram(program) - error_msg = gl.glGetShaderInfoLog(shader_objects[-1]).decode("utf-8") - raise RuntimeError(f"Compilation failure:\n {error_msg}") - - gl.glAttachShader(program, shader_objects[-1]) - gl.glDeleteShader(shader_objects[-1]) - - gl.glLinkProgram(program) - status = gl.glGetProgramiv(program, gl.GL_LINK_STATUS) - - if status == gl.GL_FALSE: - gl.glDeleteProgram(program) - error_msg = gl.glGetProgramInfoLog(program) - raise RuntimeError(f"Link failure:\n {error_msg}") - - return program - - @staticmethod - def _set_up_gl_program_properties(program) -> None: - """ - Set basic OpenGL program properties: disable blending, enable depth testing, - and disable face culling. - """ - gl.glUseProgram(program) - gl.glDisable(gl.GL_BLEND) - gl.glEnable(gl.GL_DEPTH_TEST) - gl.glDisable(gl.GL_CULL_FACE) - gl.glUseProgram(0) - - @staticmethod - def _prepare_persistent_opengl_objects(program, max_faces: int): - """ - Prepare OpenGL objects that we want to persist between rasterizations. - - Args: - program: The OpenGL program the resources will be tied to. - max_faces: Max number of faces of any mesh we will rasterize. - - Returns: - perspective_projection_uniform: An OpenGL object pointing to a location of - the perspective projection matrix in OpenGL memory. - mesh_buffer_object: An OpenGL object pointing to the location of the mesh - buffer object in OpenGL memory. - vao: The OpenGL input array object. - fbo: The OpenGL output framebuffer. - - """ - gl.glUseProgram(program) - # Get location of the "uniform" (that is, an internal OpenGL variable available - # to the shaders) that we'll load the projection matrices to. - perspective_projection_uniform = gl.glGetUniformLocation( - program, "perspective_projection" - ) - - # Mesh buffer object -- our main input point. We'll copy the mesh here - # from pytorch/cuda. The buffer needs enough space to store the three vertices - # of each face, that is its size in bytes is - # max_faces * 3 (vertices) * 3 (coordinates) * 4 (bytes) - mesh_buffer_object = gl.glGenBuffers(1) - gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, mesh_buffer_object) - - gl.glBufferData( - gl.GL_SHADER_STORAGE_BUFFER, - max_faces * 9 * 4, - np.zeros((max_faces, 9), dtype=np.float32), - gl.GL_DYNAMIC_COPY, - ) - - # Input vertex array object. We will only use it implicitly for indexing the - # vertices, but the actual input data is passed in the shader storage buffer. - vao = gl.glGenVertexArrays(1) - - # Create the framebuffer object (fbo) where we'll store output data. - MAX_EGL_WIDTH = global_device_context_store.max_egl_width - MAX_EGL_HEIGHT = global_device_context_store.max_egl_height - color_buffer = gl.glGenRenderbuffers(1) - gl.glBindRenderbuffer(gl.GL_RENDERBUFFER, color_buffer) - gl.glRenderbufferStorage( - gl.GL_RENDERBUFFER, gl.GL_RGBA32F, MAX_EGL_WIDTH, MAX_EGL_HEIGHT - ) - gl.glBindRenderbuffer(gl.GL_RENDERBUFFER, 0) - - depth_buffer = gl.glGenRenderbuffers(1) - gl.glBindRenderbuffer(gl.GL_RENDERBUFFER, depth_buffer) - gl.glRenderbufferStorage( - gl.GL_RENDERBUFFER, gl.GL_DEPTH_COMPONENT, MAX_EGL_WIDTH, MAX_EGL_HEIGHT - ) - gl.glBindRenderbuffer(gl.GL_RENDERBUFFER, 0) - - fbo = gl.glGenFramebuffers(1) - gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo) - gl.glFramebufferRenderbuffer( - gl.GL_FRAMEBUFFER, gl.GL_COLOR_ATTACHMENT0, gl.GL_RENDERBUFFER, color_buffer - ) - gl.glFramebufferRenderbuffer( - gl.GL_FRAMEBUFFER, gl.GL_DEPTH_ATTACHMENT, gl.GL_RENDERBUFFER, depth_buffer - ) - gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0) - - gl.glUseProgram(0) - return perspective_projection_uniform, mesh_buffer_object, vao, fbo - - -def _check_cameras(cameras) -> None: - # Check that the cameras are non-None and compatible with MeshRasterizerOpenGL. - if cameras is None: - msg = "Cameras must be specified either at initialization \ - or in the forward pass of MeshRasterizer" - raise ValueError(msg) - if type(cameras).__name__ in {"PerspectiveCameras", "OrthographicCameras"}: - raise ValueError( - "MeshRasterizerOpenGL only works with FoVPerspectiveCameras and " - "FoVOrthographicCameras, which are OpenGL compatible." - ) - - -def _check_raster_settings(raster_settings) -> None: - # Check that the rasterizer's settings are compatible with MeshRasterizerOpenGL. - if raster_settings.faces_per_pixel > 1: - warnings.warn( - "MeshRasterizerOpenGL currently works only with one face per pixel." - ) - if raster_settings.cull_backfaces: - warnings.warn( - "MeshRasterizerOpenGL cannot cull backfaces yet, rasterizing without culling." - ) - if raster_settings.cull_to_frustum: - warnings.warn( - "MeshRasterizerOpenGL cannot cull to frustum yet, rasterizing without culling." - ) - if raster_settings.z_clip_value is not None: - raise NotImplementedError("MeshRasterizerOpenGL cannot do z-clipping yet.") - if raster_settings.perspective_correct is False: - raise ValueError( - "MeshRasterizerOpenGL always uses perspective-correct interpolation." - ) - - -def _convert_meshes_to_gl_ndc( - meshes_world: Meshes, image_size: Tuple[int, int], camera, **kwargs -) -> Meshes: - """ - Convert a batch of world-coordinate meshes to GL NDC coordinates. - - Args: - meshes_world: Meshes in the world coordinate system. - image_size: Image height and width, used to modify mesh coords for rendering in - non-rectangular images. OpenGL will expand anything within the [-1, 1] NDC - range to fit the width and height of the screen, so we will squeeze the NDCs - appropriately if rendering a rectangular image. - camera: FoV cameras. - kwargs['R'], kwargs['T']: If present, used to define the world-view transform. - """ - height, width = image_size - verts_ndc = ( - camera.get_world_to_view_transform(**kwargs) - .compose(camera.get_ndc_camera_transform(**kwargs)) - .transform_points(meshes_world.verts_padded(), eps=None) - ) - verts_ndc[..., 0] = -verts_ndc[..., 0] - verts_ndc[..., 1] = -verts_ndc[..., 1] - - # In case of a non-square viewport, transform the vertices. OpenGL will expand - # the anything within the [-1, 1] NDC range to fit the width and height of the - # screen. So to work with PyTorch3D cameras, we need to squeeze the NDCs - # appropriately. - dtype, device = verts_ndc.dtype, verts_ndc.device - if height > width: - verts_ndc = verts_ndc * torch.tensor( - [1, width / height, 1], dtype=dtype, device=device - ) - elif width > height: - verts_ndc = verts_ndc * torch.tensor( - [height / width, 1, 1], dtype=dtype, device=device - ) - - meshes_gl_ndc = meshes_world.update_padded(new_verts_padded=verts_ndc) - - return meshes_gl_ndc diff --git a/pytorch3d/pytorch3d/renderer/points/__init__.py b/pytorch3d/pytorch3d/renderer/points/__init__.py deleted file mode 100644 index 2fde33bafbaedbb4ab63c945001acaf77082165c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/points/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .compositor import AlphaCompositor, NormWeightedCompositor -from .pulsar.unified import PulsarPointsRenderer -from .rasterize_points import rasterize_points -from .rasterizer import PointsRasterizationSettings, PointsRasterizer -from .renderer import PointsRenderer - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/renderer/points/compositor.py b/pytorch3d/pytorch3d/renderer/points/compositor.py deleted file mode 100644 index 0846e53e1ef65b9bfc53727859fe294ea43e5c65..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/points/compositor.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, Optional, Tuple, Union - -import torch -import torch.nn as nn - -from ..compositing import alpha_composite, norm_weighted_sum - - -# A compositor should take as input 3D points and some corresponding information. -# Given this information, the compositor can: -# - blend colors across the top K vertices at a pixel - - -class AlphaCompositor(nn.Module): - """ - Accumulate points using alpha compositing. - """ - - def __init__( - self, background_color: Optional[Union[Tuple, List, torch.Tensor]] = None - ) -> None: - super().__init__() - self.background_color = background_color - - def forward(self, fragments, alphas, ptclds, **kwargs) -> torch.Tensor: - background_color = kwargs.get("background_color", self.background_color) - images = alpha_composite(fragments, alphas, ptclds) - - # images are of shape (N, C, H, W) - # check for background color & feature size C (C=4 indicates rgba) - if background_color is not None: - return _add_background_color_to_images(fragments, images, background_color) - return images - - -class NormWeightedCompositor(nn.Module): - """ - Accumulate points using a normalized weighted sum. - """ - - def __init__( - self, background_color: Optional[Union[Tuple, List, torch.Tensor]] = None - ) -> None: - super().__init__() - self.background_color = background_color - - def forward(self, fragments, alphas, ptclds, **kwargs) -> torch.Tensor: - background_color = kwargs.get("background_color", self.background_color) - images = norm_weighted_sum(fragments, alphas, ptclds) - - # images are of shape (N, C, H, W) - # check for background color & feature size C (C=4 indicates rgba) - if background_color is not None: - return _add_background_color_to_images(fragments, images, background_color) - return images - - -def _add_background_color_to_images(pix_idxs, images, background_color): - """ - Mask pixels in images without corresponding points with a given background_color. - - Args: - pix_idxs: int32 Tensor of shape (N, points_per_pixel, image_size, image_size) - giving the indices of the nearest points at each pixel, sorted in z-order. - images: Tensor of shape (N, 4, image_size, image_size) giving the - accumulated features at each point, where 4 refers to a rgba feature. - background_color: Tensor, list, or tuple with 3 or 4 values indicating the rgb/rgba - value for the new background. Values should be in the interval [0,1]. - Returns: - images: Tensor of shape (N, 4, image_size, image_size), where pixels with - no nearest points have features set to the background color, and other - pixels with accumulated features have unchanged values. - """ - # Initialize background mask - background_mask = pix_idxs[:, 0] < 0 # (N, H, W) - - # Convert background_color to an appropriate tensor and check shape - if not torch.is_tensor(background_color): - background_color = images.new_tensor(background_color) - - if background_color.ndim == 0: - background_color = background_color.expand(images.shape[1]) - - if background_color.ndim > 1: - raise ValueError("Wrong shape of background_color") - - background_color = background_color.to(images) - - # add alpha channel if needed - if background_color.shape[0] + 1 == images.shape[1]: - alpha = images.new_ones(1) - background_color = torch.cat([background_color, alpha]) - - if images.shape[1] != background_color.shape[0]: - raise ValueError( - "Background color has %s channels not %s" - % (background_color.shape[0], images.shape[1]) - ) - - num_background_pixels = background_mask.sum() - - # permute so that features are the last dimension for masked_scatter to work - masked_images = images.permute(0, 2, 3, 1).masked_scatter( - background_mask[..., None], - background_color[None, :].expand(num_background_pixels, -1), - ) - - return masked_images.permute(0, 3, 1, 2) diff --git a/pytorch3d/pytorch3d/renderer/points/pulsar/__init__.py b/pytorch3d/pytorch3d/renderer/points/pulsar/__init__.py deleted file mode 100644 index 22fe5de613d2423645f0b936f53f18f0b86cfcf1..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/points/pulsar/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .renderer import Renderer # noqa: F401 diff --git a/pytorch3d/pytorch3d/renderer/points/pulsar/renderer.py b/pytorch3d/pytorch3d/renderer/points/pulsar/renderer.py deleted file mode 100644 index 9ba7dfd5da4c0f9297dd9399302d27866829c959..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/points/pulsar/renderer.py +++ /dev/null @@ -1,664 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -"""pulsar renderer PyTorch integration. - -Proper Python support for pytorch requires creating a torch.autograd.function -(independent of whether this is being done within the C++ module). This is done -here and a torch.nn.Module is exposed for the use in more complex models. -""" -import logging -import warnings -from typing import Optional, Tuple, Union - -import torch -from pytorch3d import _C -from pytorch3d.transforms import axis_angle_to_matrix, rotation_6d_to_matrix - - -LOGGER = logging.getLogger(__name__) -GAMMA_WARNING_EMITTED = False -AXANGLE_WARNING_EMITTED = False - - -class _Render(torch.autograd.Function): - """ - Differentiable rendering function for the Pulsar renderer. - - Usually this will be used through the `Renderer` module, which takes care of - setting up the buffers and putting them on the correct device. If you use - the function directly, you will have to do this manually. - - The steps for this are two-fold: first, you need to create a native Renderer - object to provide the required buffers. This is the `native_renderer` parameter - for this function. You can create it by creating a `pytorch3d._C.PulsarRenderer` - object (with parameters for width, height and maximum number of balls it should - be able to render). This object by default resides on the CPU. If you want to - shift the buffers to a different device, just assign an empty tensor on the target - device to its property `device_tracker`. - - To convert camera parameters from a more convenient representation to the - required vectors as in this function, you can use the static - function `pytorch3d.renderer.points.pulsar.Renderer._transform_cam_params`. - - Args: - * ctx: Pytorch context. - * vert_pos: vertex positions. [Bx]Nx3 tensor of positions in 3D space. - * vert_col: vertex colors. [Bx]NxK tensor of channels. - * vert_rad: vertex radii. [Bx]N tensor of radiuses, >0. - * cam_pos: camera position(s). [Bx]3 tensor in 3D coordinates. - * pixel_0_0_center: [Bx]3 tensor center(s) of the upper left pixel(s) in - world coordinates. - * pixel_vec_x: [Bx]3 tensor from one pixel center to the next in image x - direction in world coordinates. - * pixel_vec_y: [Bx]3 tensor from one pixel center to the next in image y - direction in world coordinates. - * focal_length: [Bx]1 tensor of focal lengths in world coordinates. - * principal_point_offsets: [Bx]2 tensor of principal point offsets in pixels. - * gamma: sphere transparency in [1.,1E-5], with 1 being mostly transparent. - [Bx]1. - * max_depth: maximum depth for spheres to render. Set this as tighly - as possible to have good numerical accuracy for gradients. - * native_renderer: a `pytorch3d._C.PulsarRenderer` object. - * min_depth: a float with the minimum depth a sphere must have to be renderer. - Must be 0. or > max(focal_length). - * bg_col: K tensor with a background color to use or None (uses all ones). - * opacity: [Bx]N tensor of opacity values in [0., 1.] or None (uses all ones). - * percent_allowed_difference: a float in [0., 1.[ with the maximum allowed - difference in color space. This is used to speed up the - computation. Default: 0.01. - * max_n_hits: a hard limit on the number of hits per ray. Default: max int. - * mode: render mode in {0, 1}. 0: render an image; 1: render the hit map. - * return_forward_info: whether to return a second map. This second map contains - 13 channels: first channel contains sm_m (the maximum exponent factor - observed), the second sm_d (the normalization denominator, the sum of all - coefficients), the third the maximum closest possible intersection for a - hit. The following channels alternate with the float encoded integer index - of a sphere and its weight. They are the five spheres with the highest - color contribution to this pixel color, ordered descending. - - Returns: - * image: [Bx]HxWxK float tensor with the resulting image. - * forw_info: [Bx]HxWx13 float forward information as described above, - if enabled. - """ - - @staticmethod - # pyre-fixme[14]: `forward` overrides method defined in `Function` inconsistently. - def forward( - ctx, - vert_pos, - vert_col, - vert_rad, - cam_pos, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - focal_length, - principal_point_offsets, - gamma, - max_depth, - native_renderer, - min_depth=0.0, - bg_col=None, - opacity=None, - percent_allowed_difference=0.01, - # pyre-fixme[16]: Module `_C` has no attribute `MAX_UINT`. - max_n_hits=_C.MAX_UINT, - mode=0, - return_forward_info=False, - ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - if mode != 0: - assert not return_forward_info, ( - "You are using a non-standard rendering mode. This does " - "not provide gradients, and also no `forward_info`. Please " - "set `return_forward_info` to `False`." - ) - ctx.gamma = gamma - ctx.max_depth = max_depth - ctx.min_depth = min_depth - ctx.percent_allowed_difference = percent_allowed_difference - ctx.max_n_hits = max_n_hits - ctx.mode = mode - ctx.native_renderer = native_renderer - image, info = ctx.native_renderer.forward( - vert_pos, - vert_col, - vert_rad, - cam_pos, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - focal_length, - principal_point_offsets, - gamma, - max_depth, - min_depth, - bg_col, - opacity, - percent_allowed_difference, - max_n_hits, - mode, - ) - if mode != 0: - # Backprop not possible! - info = None - # Prepare for backprop. - ctx.save_for_backward( - vert_pos, - vert_col, - vert_rad, - cam_pos, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - focal_length, - principal_point_offsets, - bg_col, - opacity, - image, - info, - ) - if return_forward_info: - return image, info - else: - return image - - @staticmethod - def backward(ctx, grad_im, *args): - global GAMMA_WARNING_EMITTED - ( - vert_pos, - vert_col, - vert_rad, - cam_pos, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - focal_length, - principal_point_offsets, - bg_col, - opacity, - image, - info, - ) = ctx.saved_tensors - if ( - ( - ctx.needs_input_grad[0] - or ctx.needs_input_grad[2] - or ctx.needs_input_grad[3] - or ctx.needs_input_grad[4] - or ctx.needs_input_grad[5] - or ctx.needs_input_grad[6] - or ctx.needs_input_grad[7] - ) - and ctx.gamma < 1e-3 - and not GAMMA_WARNING_EMITTED - ): - warnings.warn( - "Optimizing for non-color parameters and having a gamma value < 1E-3! " - "This is probably not going to produce usable gradients." - ) - GAMMA_WARNING_EMITTED = True - if ctx.mode == 0: - ( - grad_pos, - grad_col, - grad_rad, - grad_cam_pos, - grad_pixel_0_0_center, - grad_pixel_vec_x, - grad_pixel_vec_y, - grad_opacity, - ) = ctx.native_renderer.backward( - grad_im, - image, - info, - vert_pos, - vert_col, - vert_rad, - cam_pos, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - focal_length, - principal_point_offsets, - ctx.gamma, - ctx.max_depth, - ctx.min_depth, - bg_col, - opacity, - ctx.percent_allowed_difference, - ctx.max_n_hits, - ctx.mode, - ctx.needs_input_grad[0], - ctx.needs_input_grad[1], - ctx.needs_input_grad[2], - ctx.needs_input_grad[3] - or ctx.needs_input_grad[4] - or ctx.needs_input_grad[5] - or ctx.needs_input_grad[6] - or ctx.needs_input_grad[7], - ctx.needs_input_grad[14], - None, # No debug information provided. - ) - else: - raise ValueError( - "Performing a backward pass for a " - "rendering with `mode != 0`! This is not possible." - ) - return ( - grad_pos, - grad_col, - grad_rad, - grad_cam_pos, - grad_pixel_0_0_center, - grad_pixel_vec_x, - grad_pixel_vec_y, - None, # focal_length - None, # principal_point_offsets - None, # gamma - None, # max_depth - None, # native_renderer - None, # min_depth - None, # bg_col - grad_opacity, - None, # percent_allowed_difference - None, # max_n_hits - None, # mode - None, # return_forward_info - ) - - -class Renderer(torch.nn.Module): - """ - Differentiable rendering module for the Pulsar renderer. - - Set the maximum number of balls to a reasonable value. It is used to determine - several buffer sizes. It is no problem to render less balls than this number, - but never more. - - When optimizing for sphere positions, sphere radiuses or camera parameters you - have to use higher gamma values (closer to one) and larger sphere sizes: spheres - can only 'move' to areas that they cover, and only with higher gamma values exists - a gradient w.r.t. their color depending on their position. - - Args: - * width: result image width in pixels. - * height: result image height in pixels. - * max_num_balls: the maximum number of balls this renderer will handle. - * orthogonal_projection: use an orthogonal instead of perspective projection. - Default: False. - * right_handed_system: use a right-handed instead of a left-handed coordinate - system. This is relevant for compatibility with other drawing or scanning - systems. Pulsar by default assumes a left-handed world and camera coordinate - system as known from mathematics with x-axis to the right, y axis up and z - axis for increasing depth along the optical axis. In the image coordinate - system, only the y axis is pointing down, leading still to a left-handed - system. If you set this to True, it is assuming a right-handed world and - camera coordinate system with x axis to the right, y axis to the top and - z axis decreasing along the optical axis. Again, the image coordinate - system has a flipped y axis, remaining a right-handed system. - Default: False. - * background_normalized_depth: the normalized depth the background is placed - at. - This is on a scale from 0. to 1. between the specified min and max depth - (see the forward function). The value 0. is the most furthest depth whereas - 1. is the closest. Be careful when setting the background too far front - it - may hide elements in your scene. Default: EPS. - * n_channels: the number of image content channels to use. This is usually three - for regular color representations, but can be a higher or lower number. - Default: 3. - * n_track: the number of spheres to track for gradient calculation per pixel. - Only the closest n_track spheres will receive gradients. Default: 5. - """ - - def __init__( - self, - width: int, - height: int, - max_num_balls: int, - orthogonal_projection: bool = False, - right_handed_system: bool = False, - # pyre-fixme[16]: Module `_C` has no attribute `EPS`. - background_normalized_depth: float = _C.EPS, - n_channels: int = 3, - n_track: int = 5, - ) -> None: - super(Renderer, self).__init__() - # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`. - self._renderer = _C.PulsarRenderer( - width, - height, - max_num_balls, - orthogonal_projection, - right_handed_system, - background_normalized_depth, - n_channels, - n_track, - ) - self.register_buffer("device_tracker", torch.zeros(1)) - - @staticmethod - def sphere_ids_from_result_info_nograd(result_info: torch.Tensor) -> torch.Tensor: - """ - Get the sphere IDs from a result info tensor. - """ - if result_info.ndim == 3: - return Renderer.sphere_ids_from_result_info_nograd(result_info[None, ...]) - # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`. - return _C.pulsar_sphere_ids_from_result_info_nograd(result_info) - - @staticmethod - def depth_map_from_result_info_nograd(result_info: torch.Tensor) -> torch.Tensor: - """ - Get the depth map from a result info tensor. - - This returns a map of the same size as the image with just one channel - containing the closest intersection value at that position. Gradients - are not available for this tensor, but do note that you can use - `sphere_ids_from_result_info_nograd` to get the IDs of the spheres at - each position and directly create a loss on their depth if required. - - The depth map contains -1. at positions where no intersection has - been detected. - """ - return result_info[..., 4] - - @staticmethod - def _transform_cam_params( - cam_params: torch.Tensor, - width: int, - height: int, - orthogonal: bool, - right_handed: bool, - first_R_then_T: bool = False, - ) -> Tuple[ - torch.Tensor, - torch.Tensor, - torch.Tensor, - torch.Tensor, - torch.Tensor, - torch.Tensor, - ]: - """ - Transform 8 component camera parameter vector(s) to the internal camera - representation. - - The input vectors consists of: - * 3 components for camera position, - * 3 components for camera rotation (three rotation angles) or - 6 components as described in "On the Continuity of Rotation - Representations in Neural Networks" (Zhou et al.), - * focal length, - * the sensor width in world coordinates, - * [optional] the principal point offset in x and y. - - The sensor height is inferred by pixel size and sensor width to obtain - quadratic pixels. - - Args: - * cam_params: [Bx]{8, 10, 11, 13}, input tensors as described above. - * width: number of pixels in x direction. - * height: number of pixels in y direction. - * orthogonal: bool, whether an orthogonal projection is used - (does not use focal length). - * right_handed: bool, whether to use a right handed system - (negative z in camera direction). - * first_R_then_T: bool, whether to first rotate, then translate - the camera (PyTorch3D convention). - - Returns: - * pos_vec: the position vector in 3D, - * pixel_0_0_center: the center of the upper left pixel in world coordinates, - * pixel_vec_x: the step to move one pixel on the image x axis - in world coordinates, - * pixel_vec_y: the step to move one pixel on the image y axis - in world coordinates, - * focal_length: the focal lengths, - * principal_point_offsets: the principal point offsets in x, y. - """ - global AXANGLE_WARNING_EMITTED - # Set up all direction vectors, i.e., the sensor direction of all axes. - assert width > 0 - assert height > 0 - batch_processing = True - if cam_params.ndimension() == 1: - batch_processing = False - cam_params = cam_params[None, :] - batch_size = cam_params.size(0) - continuous_rep = True - if cam_params.shape[1] in [8, 10]: - if cam_params.requires_grad and not AXANGLE_WARNING_EMITTED: - warnings.warn( - "Using an axis angle representation for camera rotations. " - "This has discontinuities and should not be used for optimization. " - "Alternatively, use a six-component representation as described in " - "'On the Continuity of Rotation Representations in Neural Networks'" - " (Zhou et al.). " - "The `pytorch3d.transforms` module provides " - "facilities for using this representation." - ) - AXANGLE_WARNING_EMITTED = True - continuous_rep = False - else: - assert cam_params.shape[1] in [11, 13] - pos_vec: torch.Tensor = cam_params[:, :3] - principal_point_offsets: torch.Tensor = torch.zeros( - (cam_params.shape[0], 2), dtype=torch.int32, device=cam_params.device - ) - if continuous_rep: - rot_vec = cam_params[:, 3:9] - focal_length: torch.Tensor = cam_params[:, 9:10] - sensor_size_x = cam_params[:, 10:11] - if cam_params.shape[1] == 13: - principal_point_offsets: torch.Tensor = cam_params[:, 11:13].to( - torch.int32 - ) - else: - rot_vec = cam_params[:, 3:6] - focal_length: torch.Tensor = cam_params[:, 6:7] - sensor_size_x = cam_params[:, 7:8] - if cam_params.shape[1] == 10: - principal_point_offsets: torch.Tensor = cam_params[:, 8:10].to( - torch.int32 - ) - # Always get quadratic pixels. - pixel_size_x = sensor_size_x / float(width) - sensor_size_y = height * pixel_size_x - if continuous_rep: - rot_mat = rotation_6d_to_matrix(rot_vec) - else: - rot_mat = axis_angle_to_matrix(rot_vec) - if first_R_then_T: - pos_vec = torch.matmul(rot_mat, pos_vec[..., None])[:, :, 0] - sensor_dir_x = torch.matmul( - rot_mat, - torch.tensor( - [1.0, 0.0, 0.0], dtype=torch.float32, device=rot_mat.device - ).repeat(batch_size, 1)[:, :, None], - )[:, :, 0] - sensor_dir_y = torch.matmul( - rot_mat, - torch.tensor( - [0.0, -1.0, 0.0], dtype=torch.float32, device=rot_mat.device - ).repeat(batch_size, 1)[:, :, None], - )[:, :, 0] - sensor_dir_z = torch.matmul( - rot_mat, - torch.tensor( - [0.0, 0.0, 1.0], dtype=torch.float32, device=rot_mat.device - ).repeat(batch_size, 1)[:, :, None], - )[:, :, 0] - if right_handed: - sensor_dir_z *= -1 - if orthogonal: - sensor_center = pos_vec - else: - sensor_center = pos_vec + focal_length * sensor_dir_z - sensor_luc = ( # Sensor left upper corner. - sensor_center - - sensor_dir_x * (sensor_size_x / 2.0) - - sensor_dir_y * (sensor_size_y / 2.0) - ) - pixel_size_x = sensor_size_x / float(width) - pixel_size_y = sensor_size_y / float(height) - pixel_vec_x: torch.Tensor = sensor_dir_x * pixel_size_x - pixel_vec_y: torch.Tensor = sensor_dir_y * pixel_size_y - pixel_0_0_center = sensor_luc + 0.5 * pixel_vec_x + 0.5 * pixel_vec_y - # Reduce dimension. - focal_length: torch.Tensor = focal_length[:, 0] - if batch_processing: - return ( - pos_vec, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - focal_length, - principal_point_offsets, - ) - else: - return ( - pos_vec[0], - pixel_0_0_center[0], - pixel_vec_x[0], - pixel_vec_y[0], - focal_length[0], - principal_point_offsets[0], - ) - - def forward( - self, - vert_pos: torch.Tensor, - vert_col: torch.Tensor, - vert_rad: torch.Tensor, - cam_params: torch.Tensor, - gamma: float, - max_depth: float, - min_depth: float = 0.0, - bg_col: Optional[torch.Tensor] = None, - opacity: Optional[torch.Tensor] = None, - percent_allowed_difference: float = 0.01, - # pyre-fixme[16]: Module `_C` has no attribute `MAX_UINT`. - max_n_hits: int = _C.MAX_UINT, - mode: int = 0, - return_forward_info: bool = False, - first_R_then_T: bool = False, - ) -> Union[torch.Tensor, Tuple[torch.Tensor, Optional[torch.Tensor]]]: - """ - Rendering pass to create an image from the provided spheres and camera - parameters. - - Args: - * vert_pos: vertex positions. [Bx]Nx3 tensor of positions in 3D space. - * vert_col: vertex colors. [Bx]NxK tensor of channels. - * vert_rad: vertex radii. [Bx]N tensor of radiuses, >0. - * cam_params: camera parameter(s). [Bx]8 tensor, consisting of: - - 3 components for camera position, - - 3 components for camera rotation (axis angle representation) or - 6 components as described in "On the Continuity of Rotation - Representations in Neural Networks" (Zhou et al.), - - focal length, - - the sensor width in world coordinates, - - [optional] an offset for the principal point in x, y (no gradients). - * gamma: sphere transparency in [1.,1E-5], with 1 being mostly transparent. - [Bx]1. - * max_depth: maximum depth for spheres to render. Set this as tightly - as possible to have good numerical accuracy for gradients. - float > min_depth + eps. - * min_depth: a float with the minimum depth a sphere must have to be - rendered. Must be 0. or > max(focal_length) + eps. - * bg_col: K tensor with a background color to use or None (uses all ones). - * opacity: [Bx]N tensor of opacity values in [0., 1.] or None (uses all - ones). - * percent_allowed_difference: a float in [0., 1.[ with the maximum allowed - difference in color space. This is used to speed up the - computation. Default: 0.01. - * max_n_hits: a hard limit on the number of hits per ray. Default: max int. - * mode: render mode in {0, 1}. 0: render an image; 1: render the hit map. - * return_forward_info: whether to return a second map. This second map - contains 13 channels: first channel contains sm_m (the maximum - exponent factor observed), the second sm_d (the normalization - denominator, the sum of all coefficients), the third the maximum closest - possible intersection for a hit. The following channels alternate with - the float encoded integer index of a sphere and its weight. They are the - five spheres with the highest color contribution to this pixel color, - ordered descending. Default: False. - * first_R_then_T: bool, whether to first apply rotation to the camera, - then translation (PyTorch3D convention). Default: False. - - Returns: - * image: [Bx]HxWx3 float tensor with the resulting image. - * forw_info: [Bx]HxWx13 float forward information as described above, if - enabled. - """ - # The device tracker is registered as buffer. - self._renderer.device_tracker = self.device_tracker - ( - pos_vec, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - focal_lengths, - principal_point_offsets, - ) = Renderer._transform_cam_params( - cam_params, - self._renderer.width, - self._renderer.height, - self._renderer.orthogonal, - self._renderer.right_handed, - first_R_then_T=first_R_then_T, - ) - if ( - focal_lengths.min().item() > 0.0 - and max_depth > 10_000.0 * focal_lengths.min().item() - ): - warnings.warn( - ( - "Extreme ratio of `max_depth` vs. focal length detected " - "(%f vs. %f, ratio: %f). This will likely lead to " - "artifacts due to numerical instabilities." - ) - % ( - max_depth, - focal_lengths.min().item(), - max_depth / focal_lengths.min().item(), - ) - ) - ret_res = _Render.apply( - vert_pos, - vert_col, - vert_rad, - pos_vec, - pixel_0_0_center, - pixel_vec_x, - pixel_vec_y, - # Focal length and sensor size don't need gradients other than through - # `pixel_vec_x` and `pixel_vec_y`. The focal length is only used in the - # renderer to determine the projection areas of the balls. - focal_lengths, - # principal_point_offsets does not receive gradients. - principal_point_offsets, - gamma, - max_depth, - self._renderer, - min_depth, - bg_col, - opacity, - percent_allowed_difference, - max_n_hits, - mode, - (mode == 0) and return_forward_info, - ) - if return_forward_info and mode != 0: - return ret_res, None - return ret_res - - def extra_repr(self) -> str: - """Extra information to print in pytorch graphs.""" - return "width={}, height={}, max_num_balls={}".format( - self._renderer.width, self._renderer.height, self._renderer.max_num_balls - ) diff --git a/pytorch3d/pytorch3d/renderer/points/pulsar/unified.py b/pytorch3d/pytorch3d/renderer/points/pulsar/unified.py deleted file mode 100644 index 146bbb8f5b927f937260593de494c2e0cbc8fd82..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/points/pulsar/unified.py +++ /dev/null @@ -1,554 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -import warnings -from typing import Any, Dict, Optional, Tuple, Union - -import torch -import torch.nn as nn - -from ...camera_conversions import _pulsar_from_cameras_projection -from ...cameras import ( - FoVOrthographicCameras, - FoVPerspectiveCameras, - OrthographicCameras, - PerspectiveCameras, -) -from ..compositor import AlphaCompositor, NormWeightedCompositor -from ..rasterizer import PointsRasterizer -from .renderer import Renderer as PulsarRenderer - - -def _ensure_float_tensor(val_in, device): - """Make sure that the value provided is wrapped a PyTorch float tensor.""" - if not isinstance(val_in, torch.Tensor): - val_out = torch.tensor(val_in, dtype=torch.float32, device=device).reshape((1,)) - else: - val_out = val_in.to(torch.float32).to(device).reshape((1,)) - return val_out - - -class PulsarPointsRenderer(nn.Module): - """ - This renderer is a PyTorch3D interface wrapper around the pulsar renderer. - - It provides an interface consistent with PyTorch3D Pointcloud rendering. - It will extract all necessary information from the rasterizer and compositor - objects and convert them to the pulsar required format, then invoke rendering - in the pulsar renderer. All gradients are handled appropriately through the - wrapper and the wrapper should provide equivalent results to using the pulsar - renderer directly. - """ - - def __init__( - self, - rasterizer: PointsRasterizer, - compositor: Optional[Union[NormWeightedCompositor, AlphaCompositor]] = None, - n_channels: int = 3, - max_num_spheres: int = int(1e6), # noqa: B008 - **kwargs, - ) -> None: - """ - rasterizer (PointsRasterizer): An object encapsulating rasterization parameters. - compositor (ignored): Only keeping this for interface consistency. Default: None. - n_channels (int): The number of channels of the resulting image. Default: 3. - max_num_spheres (int): The maximum number of spheres intended to render with - this renderer. Default: 1e6. - kwargs (Any): kwargs to pass on to the pulsar renderer. - See `pytorch3d.renderer.points.pulsar.renderer.Renderer` for all options. - """ - super().__init__() - self.rasterizer = rasterizer - if compositor is not None: - warnings.warn( - "Creating a `PulsarPointsRenderer` with a compositor object! " - "This object is ignored and just allowed as an argument for interface " - "compatibility." - ) - # Initialize the pulsar renderers. - if not isinstance( - rasterizer.cameras, - ( - FoVOrthographicCameras, - FoVPerspectiveCameras, - PerspectiveCameras, - OrthographicCameras, - ), - ): - raise ValueError( - "Only FoVPerspectiveCameras, PerspectiveCameras, " - "FoVOrthographicCameras and OrthographicCameras are supported " - "by the pulsar backend." - ) - if isinstance(rasterizer.raster_settings.image_size, tuple): - height, width = rasterizer.raster_settings.image_size - else: - width = rasterizer.raster_settings.image_size - height = rasterizer.raster_settings.image_size - # Making sure about integer types. - width = int(width) - height = int(height) - max_num_spheres = int(max_num_spheres) - orthogonal_projection = isinstance( - rasterizer.cameras, (FoVOrthographicCameras, OrthographicCameras) - ) - n_channels = int(n_channels) - self.renderer = PulsarRenderer( - width=width, - height=height, - max_num_balls=max_num_spheres, - orthogonal_projection=orthogonal_projection, - right_handed_system=False, - n_channels=n_channels, - **kwargs, - ) - - def _conf_check(self, point_clouds, kwargs: Dict[str, Any]) -> bool: - """ - Verify internal configuration state with kwargs and pointclouds. - - This method will raise ValueError's for any inconsistencies found. It - returns whether an orthogonal projection will be used. - """ - if "gamma" not in kwargs.keys(): - raise ValueError( - "gamma is a required keyword argument for the PulsarPointsRenderer!" - ) - if ( - len(point_clouds) != len(self.rasterizer.cameras) - and len(self.rasterizer.cameras) != 1 - ): - raise ValueError( - ( - "The len(point_clouds) must either be equal to len(rasterizer.cameras) or " - "only one camera must be used. len(point_clouds): %d, " - "len(rasterizer.cameras): %d." - ) - % ( - len(point_clouds), - len(self.rasterizer.cameras), - ) - ) - # Make sure the rasterizer and cameras objects have no - # changes that can't be matched. - orthogonal_projection = isinstance( - self.rasterizer.cameras, (FoVOrthographicCameras, OrthographicCameras) - ) - if orthogonal_projection != self.renderer._renderer.orthogonal: - raise ValueError( - "The camera type can not be changed after renderer initialization! " - "Current camera orthogonal: %r. Original orthogonal: %r." - ) % (orthogonal_projection, self.renderer._renderer.orthogonal) - image_size = self.rasterizer.raster_settings.image_size - if isinstance(image_size, tuple): - expected_height, expected_width = image_size - else: - expected_height = expected_width = image_size - if expected_width != self.renderer._renderer.width: - raise ValueError( - ( - "The rasterizer width can not be changed after renderer " - "initialization! Current width: %s. Original width: %d." - ) - % ( - expected_width, - self.renderer._renderer.width, - ) - ) - if expected_height != self.renderer._renderer.height: - raise ValueError( - ( - "The rasterizer height can not be changed after renderer " - "initialization! Current height: %s. Original height: %d." - ) - % ( - expected_height, - self.renderer._renderer.height, - ) - ) - return orthogonal_projection - - def _extract_intrinsics( # noqa: C901 - self, orthogonal_projection, kwargs, cloud_idx, device - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, float, float]: - """ - Translate the camera intrinsics from PyTorch3D format to pulsar format. - """ - # Shorthand: - cameras = self.rasterizer.cameras - if orthogonal_projection: - focal_length = torch.zeros((1,), dtype=torch.float32) - if isinstance(cameras, FoVOrthographicCameras): - znear = kwargs.get("znear", cameras.znear)[cloud_idx] - zfar = kwargs.get("zfar", cameras.zfar)[cloud_idx] - max_y = kwargs.get("max_y", cameras.max_y)[cloud_idx] - min_y = kwargs.get("min_y", cameras.min_y)[cloud_idx] - max_x = kwargs.get("max_x", cameras.max_x)[cloud_idx] - min_x = kwargs.get("min_x", cameras.min_x)[cloud_idx] - if max_y != -min_y: - raise ValueError( - "The orthographic camera must be centered around 0. " - f"Max is {max_y} and min is {min_y}." - ) - if max_x != -min_x: - raise ValueError( - "The orthographic camera must be centered around 0. " - f"Max is {max_x} and min is {min_x}." - ) - if not torch.all( - kwargs.get("scale_xyz", cameras.scale_xyz)[cloud_idx] == 1.0 - ): - raise ValueError( - "The orthographic camera scale must be ((1.0, 1.0, 1.0),). " - f"{kwargs.get('scale_xyz', cameras.scale_xyz)[cloud_idx]}." - ) - sensor_width = max_x - min_x - if not sensor_width > 0.0: - raise ValueError( - f"The orthographic camera must have positive size! Is: {sensor_width}." # noqa: B950 - ) - principal_point_x, principal_point_y = ( - torch.zeros((1,), dtype=torch.float32), - torch.zeros((1,), dtype=torch.float32), - ) - else: - # Currently, this means it must be an 'OrthographicCameras' object. - focal_length_conf = kwargs.get("focal_length", cameras.focal_length)[ - cloud_idx - ] - if ( - focal_length_conf.numel() == 2 - and focal_length_conf[0] * self.renderer._renderer.width - - focal_length_conf[1] * self.renderer._renderer.height - > 1e-5 - ): - raise ValueError( - "Pulsar only supports a single focal length! " - "Provided: %s." % (str(focal_length_conf)) - ) - if focal_length_conf.numel() == 2: - sensor_width = 2.0 / focal_length_conf[0] - else: - if focal_length_conf.numel() != 1: - raise ValueError( - "Focal length not parsable: %s." % (str(focal_length_conf)) - ) - sensor_width = 2.0 / focal_length_conf - if "znear" not in kwargs.keys() or "zfar" not in kwargs.keys(): - raise ValueError( - "pulsar needs znear and zfar values for " - "the OrthographicCameras. Please provide them as keyword " - "argument to the forward method." - ) - znear = kwargs["znear"][cloud_idx] - zfar = kwargs["zfar"][cloud_idx] - principal_point_x = ( - kwargs.get("principal_point", cameras.principal_point)[cloud_idx][0] - * 0.5 - * self.renderer._renderer.width - ) - principal_point_y = ( - kwargs.get("principal_point", cameras.principal_point)[cloud_idx][1] - * 0.5 - * self.renderer._renderer.height - ) - else: - if not isinstance(cameras, PerspectiveCameras): - # Create a virtual focal length that is closer than znear. - znear = kwargs.get("znear", cameras.znear)[cloud_idx] - zfar = kwargs.get("zfar", cameras.zfar)[cloud_idx] - focal_length = znear - 1e-6 - # Create a sensor size that matches the expected fov assuming this f. - afov = kwargs.get("fov", cameras.fov)[cloud_idx] - if kwargs.get("degrees", cameras.degrees): - afov *= math.pi / 180.0 - sensor_width = math.tan(afov / 2.0) * 2.0 * focal_length - if not ( - kwargs.get("aspect_ratio", cameras.aspect_ratio)[cloud_idx] - - self.renderer._renderer.width / self.renderer._renderer.height - < 1e-6 - ): - raise ValueError( - "The aspect ratio (" - f"{kwargs.get('aspect_ratio', cameras.aspect_ratio)[cloud_idx]}) " - "must agree with the resolution width / height (" - f"{self.renderer._renderer.width / self.renderer._renderer.height})." # noqa: B950 - ) - principal_point_x, principal_point_y = ( - torch.zeros((1,), dtype=torch.float32), - torch.zeros((1,), dtype=torch.float32), - ) - else: - focal_length_conf = kwargs.get("focal_length", cameras.focal_length)[ - cloud_idx - ] - if ( - focal_length_conf.numel() == 2 - and focal_length_conf[0] * self.renderer._renderer.width - - focal_length_conf[1] * self.renderer._renderer.height - > 1e-5 - ): - raise ValueError( - "Pulsar only supports a single focal length! " - "Provided: %s." % (str(focal_length_conf)) - ) - if "znear" not in kwargs.keys() or "zfar" not in kwargs.keys(): - raise ValueError( - "pulsar needs znear and zfar values for " - "the PerspectiveCameras. Please provide them as keyword " - "argument to the forward method." - ) - znear = kwargs["znear"][cloud_idx] - zfar = kwargs["zfar"][cloud_idx] - if focal_length_conf.numel() == 2: - focal_length_px = focal_length_conf[0] - else: - if focal_length_conf.numel() != 1: - raise ValueError( - "Focal length not parsable: %s." % (str(focal_length_conf)) - ) - focal_length_px = focal_length_conf - focal_length = torch.tensor( - [ - znear - 1e-6, - ], - dtype=torch.float32, - device=focal_length_px.device, - ) - sensor_width = focal_length / focal_length_px * 2.0 - principal_point_x = ( - kwargs.get("principal_point", cameras.principal_point)[cloud_idx][0] - * 0.5 - * self.renderer._renderer.width - ) - principal_point_y = ( - kwargs.get("principal_point", cameras.principal_point)[cloud_idx][1] - * 0.5 - * self.renderer._renderer.height - ) - focal_length = _ensure_float_tensor(focal_length, device) - sensor_width = _ensure_float_tensor(sensor_width, device) - principal_point_x = _ensure_float_tensor(principal_point_x, device) - principal_point_y = _ensure_float_tensor(principal_point_y, device) - znear = _ensure_float_tensor(znear, device) - zfar = _ensure_float_tensor(zfar, device) - return ( - focal_length, - sensor_width, - principal_point_x, - principal_point_y, - znear, - zfar, - ) - - def _extract_extrinsics( - self, kwargs, cloud_idx - ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Extract the extrinsic information from the kwargs for a specific point cloud. - - Instead of implementing a direct translation from the PyTorch3D to the Pulsar - camera model, we chain the two conversions of PyTorch3D->OpenCV and - OpenCV->Pulsar for better maintainability (PyTorch3D->OpenCV is maintained and - tested by the core PyTorch3D team, whereas OpenCV->Pulsar is maintained and - tested by the Pulsar team). - """ - # Shorthand: - cameras = self.rasterizer.cameras - R = kwargs.get("R", cameras.R)[cloud_idx] - T = kwargs.get("T", cameras.T)[cloud_idx] - tmp_cams = PerspectiveCameras( - R=R.unsqueeze(0), T=T.unsqueeze(0), device=R.device - ) - size_tensor = torch.tensor( - [[self.renderer._renderer.height, self.renderer._renderer.width]] - ) - pulsar_cam = _pulsar_from_cameras_projection(tmp_cams, size_tensor) - cam_pos = pulsar_cam[0, :3] - cam_rot = pulsar_cam[0, 3:9] - return cam_pos, cam_rot - - def _get_vert_rad( - self, vert_pos, cam_pos, orthogonal_projection, focal_length, kwargs, cloud_idx - ) -> torch.Tensor: - """ - Get point radiuses. - - These can be depending on the camera position in case of a perspective - transform. - """ - # Normalize point radiuses. - # `self.rasterizer.raster_settings.radius` can either be a float - # or itself a tensor. - raster_rad = self.rasterizer.raster_settings.radius - if kwargs.get("radius_world", False): - return raster_rad - if ( - isinstance(raster_rad, torch.Tensor) - and raster_rad.numel() > 1 - and raster_rad.ndim > 1 - ): - # In this case it must be a batched torch tensor. - raster_rad = raster_rad[cloud_idx] - if orthogonal_projection: - vert_rad = ( - torch.ones( - (vert_pos.shape[0],), dtype=torch.float32, device=vert_pos.device - ) - * raster_rad - ) - else: - point_dists = torch.norm((vert_pos - cam_pos), p=2, dim=1, keepdim=False) - vert_rad = raster_rad / focal_length.to(vert_pos.device) * point_dists - if isinstance(self.rasterizer.cameras, PerspectiveCameras): - # NDC normalization happens through adjusted focal length. - pass - else: - vert_rad = vert_rad / 2.0 # NDC normalization. - return vert_rad - - # point_clouds is not typed to avoid a cyclic dependency. - def forward(self, point_clouds, **kwargs) -> torch.Tensor: - """ - Get the rendering of the provided `Pointclouds`. - - The number of point clouds in the `Pointclouds` object determines the - number of resulting images. The provided cameras can be either 1 or equal - to the number of pointclouds (in the first case, the same camera will be - used for all clouds, in the latter case each point cloud will be rendered - with the corresponding camera). - - The following kwargs are support from PyTorch3D (depending on the selected - camera model potentially overriding camera parameters): - radius_world (bool): use the provided radiuses from the raster_settings - plain as radiuses in world space. Default: False. - znear (Iterable[float]): near geometry cutoff. Is required for - OrthographicCameras and PerspectiveCameras. - zfar (Iterable[float]): far geometry cutoff. Is required for - OrthographicCameras and PerspectiveCameras. - R (torch.Tensor): [Bx3x3] camera rotation matrices. - T (torch.Tensor): [Bx3] camera translation vectors. - principal_point (torch.Tensor): [Bx2] camera intrinsic principal - point offset vectors. - focal_length (torch.Tensor): [Bx1] camera intrinsic focal lengths. - aspect_ratio (Iterable[float]): camera aspect ratios. - fov (Iterable[float]): camera FOVs. - degrees (bool): whether FOVs are specified in degrees or - radians. - min_x (Iterable[float]): minimum x for the FoVOrthographicCameras. - max_x (Iterable[float]): maximum x for the FoVOrthographicCameras. - min_y (Iterable[float]): minimum y for the FoVOrthographicCameras. - max_y (Iterable[float]): maximum y for the FoVOrthographicCameras. - - The following kwargs are supported from pulsar: - gamma (float): The gamma value to use. This defines the transparency for - differentiability (see pulsar paper for details). Must be in [1., 1e-5] - with 1.0 being mostly transparent. This keyword argument is *required*! - bg_col (torch.Tensor): The background color. Must be a tensor on the same - device as the point clouds, with as many channels as features (no batch - dimension - it is the same for all images in the batch). - Default: 0.0 for all channels. - percent_allowed_difference (float): a value in [0., 1.[ with the maximum - allowed difference in channel space. This is used to speed up the - computation. Default: 0.01. - max_n_hits (int): a hard limit on the number of sphere hits per ray. - Default: max int. - mode (int): render mode in {0, 1}. 0: render image; 1: render hit map. - """ - orthogonal_projection: bool = self._conf_check(point_clouds, kwargs) - # Get access to inputs. We're using the list accessor and process - # them sequentially. - position_list = point_clouds.points_list() - features_list = point_clouds.features_list() - # Result list. - images = [] - for cloud_idx, (vert_pos, vert_col) in enumerate( - zip(position_list, features_list) - ): - # Get extrinsics. - cam_pos, cam_rot = self._extract_extrinsics(kwargs, cloud_idx) - # Get intrinsics. - ( - focal_length, - sensor_width, - principal_point_x, - principal_point_y, - znear, - zfar, - ) = self._extract_intrinsics( - orthogonal_projection, kwargs, cloud_idx, cam_pos.device - ) - # Put everything together. - cam_params = torch.cat( - ( - cam_pos, - cam_rot.to(cam_pos.device), - torch.cat( - [ - focal_length, - sensor_width, - principal_point_x, - principal_point_y, - ], - ), - ) - ) - # Get point radiuses (can depend on camera position). - vert_rad = self._get_vert_rad( - vert_pos, - cam_pos, - orthogonal_projection, - focal_length, - kwargs, - cloud_idx, - ) - # Clean kwargs for passing on. - gamma = kwargs["gamma"][cloud_idx] - if "first_R_then_T" in kwargs.keys(): - raise ValueError("`first_R_then_T` is not supported in this interface.") - otherargs = { - argn: argv - for argn, argv in kwargs.items() - if argn - not in [ - "radius_world", - "gamma", - "znear", - "zfar", - "R", - "T", - "principal_point", - "focal_length", - "aspect_ratio", - "fov", - "degrees", - "min_x", - "max_x", - "min_y", - "max_y", - ] - } - # background color - if "bg_col" not in otherargs: - bg_col = torch.zeros( - vert_col.shape[1], device=cam_params.device, dtype=torch.float32 - ) - otherargs["bg_col"] = bg_col - # Go! - images.append( - self.renderer( - vert_pos=vert_pos, - vert_col=vert_col, - vert_rad=vert_rad, - cam_params=cam_params, - gamma=gamma, - max_depth=zfar, - min_depth=znear, - **otherargs, - ).flip(dims=[0]) - ) - return torch.stack(images, dim=0) diff --git a/pytorch3d/pytorch3d/renderer/points/rasterize_points.py b/pytorch3d/pytorch3d/renderer/points/rasterize_points.py deleted file mode 100644 index 08211049fb48488569b50881bbc936ac9d9064c8..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/points/rasterize_points.py +++ /dev/null @@ -1,320 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, Optional, Tuple, Union - -import numpy as np -import torch -from pytorch3d import _C -from pytorch3d.renderer.mesh.rasterize_meshes import pix_to_non_square_ndc - -from ..utils import parse_image_size - - -# Maximum number of faces per bins for -# coarse-to-fine rasterization -kMaxPointsPerBin = 22 - - -def rasterize_points( - pointclouds, - image_size: Union[int, List[int], Tuple[int, int]] = 256, - radius: Union[float, List, Tuple, torch.Tensor] = 0.01, - points_per_pixel: int = 8, - bin_size: Optional[int] = None, - max_points_per_bin: Optional[int] = None, -): - """ - Each pointcloud is rasterized onto a separate image of shape - (H, W) if `image_size` is a tuple or (image_size, image_size) if it - is an int. - - If the desired image size is non square (i.e. a tuple of (H, W) where H != W) - the aspect ratio needs special consideration. There are two aspect ratios - to be aware of: - - the aspect ratio of each pixel - - the aspect ratio of the output image - The camera can be used to set the pixel aspect ratio. In the rasterizer, - we assume square pixels, but variable image aspect ratio (i.e rectangle images). - - In most cases you will want to set the camera aspect ratio to - 1.0 (i.e. square pixels) and only vary the - `image_size` (i.e. the output image dimensions in pix - - Args: - pointclouds: A Pointclouds object representing a batch of point clouds to be - rasterized. This is a batch of N pointclouds, where each point cloud - can have a different number of points; the coordinates of each point - are (x, y, z). The coordinates are expected to - be in normalized device coordinates (NDC): [-1, 1]^3 with the camera at - (0, 0, 0); In the camera coordinate frame the x-axis goes from right-to-left, - the y-axis goes from bottom-to-top, and the z-axis goes from back-to-front. - image_size: Size in pixels of the output image to be rasterized. - Can optionally be a tuple of (H, W) in the case of non square images. - radius (Optional): The radius (in NDC units) of the disk to - be rasterized. This can either be a float in which case the same radius is used - for each point, or a torch.Tensor of shape (N, P) giving a radius per point - in the batch. - points_per_pixel (Optional): We will keep track of this many points per - pixel, returning the nearest points_per_pixel points along the z-axis - bin_size: Size of bins to use for coarse-to-fine rasterization. Setting - bin_size=0 uses naive rasterization; setting bin_size=None attempts to - set it heuristically based on the shape of the input. This should not - affect the output, but can affect the speed of the forward pass. - max_points_per_bin: Only applicable when using coarse-to-fine rasterization - (bin_size > 0); this is the maximum number of points allowed within each - bin. This should not affect the output values, but can affect - the memory usage in the forward pass. - - Returns: - 3-element tuple containing - - - **idx**: int32 Tensor of shape (N, image_size, image_size, points_per_pixel) - giving the indices of the nearest points at each pixel, in ascending - z-order. Concretely `idx[n, y, x, k] = p` means that `points[p]` is the kth - closest point (along the z-direction) to pixel (y, x) - note that points - represents the packed points of shape (P, 3). - Pixels that are hit by fewer than points_per_pixel are padded with -1. - - **zbuf**: Tensor of shape (N, image_size, image_size, points_per_pixel) - giving the z-coordinates of the nearest points at each pixel, sorted in - z-order. Concretely, if `idx[n, y, x, k] = p` then - `zbuf[n, y, x, k] = points[n, p, 2]`. Pixels hit by fewer than - points_per_pixel are padded with -1 - - **dists2**: Tensor of shape (N, image_size, image_size, points_per_pixel) - giving the squared Euclidean distance (in NDC units) in the x/y plane - for each point closest to the pixel. Concretely if `idx[n, y, x, k] = p` - then `dists[n, y, x, k]` is the squared distance between the pixel (y, x) - and the point `(points[n, p, 0], points[n, p, 1])`. Pixels hit with fewer - than points_per_pixel are padded with -1. - - In the case that image_size is a tuple of (H, W) then the outputs - will be of shape `(N, H, W, ...)`. - """ - points_packed = pointclouds.points_packed() - cloud_to_packed_first_idx = pointclouds.cloud_to_packed_first_idx() - num_points_per_cloud = pointclouds.num_points_per_cloud() - - radius = _format_radius(radius, pointclouds) - - # In the case that H != W use the max image size to set the bin_size - # to accommodate the num bins constraint in the coarse rasterizer. - # If the ratio of H:W is large this might cause issues as the smaller - # dimension will have fewer bins. - # TODO: consider a better way of setting the bin size. - im_size = parse_image_size(image_size) - max_image_size = max(*im_size) - - if bin_size is None: - if not points_packed.is_cuda: - # Binned CPU rasterization not fully implemented - bin_size = 0 - else: - bin_size = int(2 ** max(np.ceil(np.log2(max_image_size)) - 4, 4)) - - if bin_size != 0: - # There is a limit on the number of points per bin in the cuda kernel. - points_per_bin = 1 + (max_image_size - 1) // bin_size - if points_per_bin >= kMaxPointsPerBin: - raise ValueError( - "bin_size too small, number of points per bin must be less than %d; got %d" - % (kMaxPointsPerBin, points_per_bin) - ) - - if max_points_per_bin is None: - max_points_per_bin = int(max(10000, pointclouds._P / 5)) - - # Function.apply cannot take keyword args, so we handle defaults in this - # wrapper and call apply with positional args only - return _RasterizePoints.apply( - points_packed, - cloud_to_packed_first_idx, - num_points_per_cloud, - im_size, - radius, - points_per_pixel, - bin_size, - max_points_per_bin, - ) - - -def _format_radius( - radius: Union[float, List, Tuple, torch.Tensor], pointclouds -) -> torch.Tensor: - """ - Format the radius as a torch tensor of shape (P_packed,) - where P_packed is the total number of points in the - batch (i.e. pointclouds.points_packed().shape[0]). - - This will enable support for a different size radius - for each point in the batch. - - Args: - radius: can be a float, List, Tuple or tensor of - shape (N, P_padded) where P_padded is the - maximum number of points for each pointcloud - in the batch. - - Returns: - radius: torch.Tensor of shape (P_packed) - """ - N, P_padded = pointclouds._N, pointclouds._P - points_packed = pointclouds.points_packed() - P_packed = points_packed.shape[0] - if isinstance(radius, (list, tuple)): - radius = torch.tensor(radius).type_as(points_packed) - if isinstance(radius, torch.Tensor): - if N == 1 and radius.ndim == 1: - radius = radius[None, ...] - if radius.shape != (N, P_padded): - msg = "radius must be of shape (N, P): got %s" - raise ValueError(msg % (repr(radius.shape))) - else: - padded_to_packed_idx = pointclouds.padded_to_packed_idx() - radius = radius.view(-1)[padded_to_packed_idx] - elif isinstance(radius, float): - radius = torch.full((P_packed,), fill_value=radius).type_as(points_packed) - else: - msg = "radius must be a float, list, tuple or tensor; got %s" - raise ValueError(msg % type(radius)) - return radius - - -class _RasterizePoints(torch.autograd.Function): - @staticmethod - # pyre-fixme[14]: `forward` overrides method defined in `Function` inconsistently. - def forward( - ctx, - points, # (P, 3) - cloud_to_packed_first_idx, - num_points_per_cloud, - image_size: Union[List[int], Tuple[int, int]] = (256, 256), - radius: Union[float, torch.Tensor] = 0.01, - points_per_pixel: int = 8, - bin_size: int = 0, - max_points_per_bin: int = 0, - ): - # TODO: Add better error handling for when there are more than - # max_points_per_bin in any bin. - args = ( - points, - cloud_to_packed_first_idx, - num_points_per_cloud, - image_size, - radius, - points_per_pixel, - bin_size, - max_points_per_bin, - ) - # pyre-fixme[16]: Module `pytorch3d` has no attribute `_C`. - idx, zbuf, dists = _C.rasterize_points(*args) - ctx.save_for_backward(points, idx) - ctx.mark_non_differentiable(idx) - return idx, zbuf, dists - - @staticmethod - def backward(ctx, grad_idx, grad_zbuf, grad_dists): - grad_points = None - grad_cloud_to_packed_first_idx = None - grad_num_points_per_cloud = None - grad_image_size = None - grad_radius = None - grad_points_per_pixel = None - grad_bin_size = None - grad_max_points_per_bin = None - points, idx = ctx.saved_tensors - args = (points, idx, grad_zbuf, grad_dists) - grad_points = _C.rasterize_points_backward(*args) - grads = ( - grad_points, - grad_cloud_to_packed_first_idx, - grad_num_points_per_cloud, - grad_image_size, - grad_radius, - grad_points_per_pixel, - grad_bin_size, - grad_max_points_per_bin, - ) - return grads - - -def rasterize_points_python( - pointclouds, - image_size: Union[int, Tuple[int, int]] = 256, - radius: Union[float, torch.Tensor] = 0.01, - points_per_pixel: int = 8, -): - """ - Naive pure PyTorch implementation of pointcloud rasterization. - - Inputs / Outputs: Same as above - """ - N = len(pointclouds) - H, W = ( - image_size - if isinstance(image_size, (tuple, list)) - else (image_size, image_size) - ) - K = points_per_pixel - device = pointclouds.device - - points_packed = pointclouds.points_packed() - cloud_to_packed_first_idx = pointclouds.cloud_to_packed_first_idx() - num_points_per_cloud = pointclouds.num_points_per_cloud() - - # Support variable size radius for each point in the batch - radius = _format_radius(radius, pointclouds) - - # Initialize output tensors. - point_idxs = torch.full( - (N, H, W, K), fill_value=-1, dtype=torch.int32, device=device - ) - zbuf = torch.full((N, H, W, K), fill_value=-1, dtype=torch.float32, device=device) - pix_dists = torch.full( - (N, H, W, K), fill_value=-1, dtype=torch.float32, device=device - ) - - # NDC is from [-1, 1]. Get pixel size using specified image size. - radius2 = radius * radius - - # Iterate through the batch of point clouds. - for n in range(N): - point_start_idx = cloud_to_packed_first_idx[n] - point_stop_idx = point_start_idx + num_points_per_cloud[n] - - # Iterate through the horizontal lines of the image from top to bottom. - for yi in range(H): - # Y coordinate of one end of the image. Reverse the ordering - # of yi so that +Y is pointing up in the image. - yfix = H - 1 - yi - yf = pix_to_non_square_ndc(yfix, H, W) - - # Iterate through pixels on this horizontal line, left to right. - for xi in range(W): - # X coordinate of one end of the image. Reverse the ordering - # of xi so that +X is pointing to the left in the image. - xfix = W - 1 - xi - xf = pix_to_non_square_ndc(xfix, W, H) - - top_k_points = [] - # Check whether each point in the batch affects this pixel. - for p in range(point_start_idx, point_stop_idx): - px, py, pz = points_packed[p, :] - r = radius2[p] - if pz < 0: - continue - dx = px - xf - dy = py - yf - dist2 = dx * dx + dy * dy - if dist2 < r: - top_k_points.append((pz, p, dist2)) - top_k_points.sort() - if len(top_k_points) > K: - top_k_points = top_k_points[:K] - for k, (pz, p, dist2) in enumerate(top_k_points): - zbuf[n, yi, xi, k] = pz - point_idxs[n, yi, xi, k] = p - pix_dists[n, yi, xi, k] = dist2 - return point_idxs, zbuf, pix_dists diff --git a/pytorch3d/pytorch3d/renderer/points/rasterizer.py b/pytorch3d/pytorch3d/renderer/points/rasterizer.py deleted file mode 100644 index 5831994c7b88c2119aa159717e1b0af382d81842..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/points/rasterizer.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from dataclasses import dataclass -from typing import NamedTuple, Optional, Tuple, Union - -import torch -import torch.nn as nn -from pytorch3d.renderer.cameras import try_get_projection_transform -from pytorch3d.structures import Pointclouds - -from .rasterize_points import rasterize_points - - -class PointFragments(NamedTuple): - """ - Class to store the outputs of point rasterization - - Members: - idx: int32 Tensor of shape (N, image_size, image_size, points_per_pixel) - giving the indices of the nearest points at each pixel, in ascending - z-order. Concretely `idx[n, y, x, k] = p` means that `points[p]` is the kth - closest point (along the z-direction) to pixel (y, x) - note that points - represents the packed points of shape (P, 3). - Pixels that are hit by fewer than points_per_pixel are padded with -1. - zbuf: Tensor of shape (N, image_size, image_size, points_per_pixel) - giving the z-coordinates of the nearest points at each pixel, sorted in - z-order. Concretely, if `idx[n, y, x, k] = p` then - `zbuf[n, y, x, k] = points[n, p, 2]`. Pixels hit by fewer than - points_per_pixel are padded with -1. - dists: Tensor of shape (N, image_size, image_size, points_per_pixel) - giving the squared Euclidean distance (in NDC units) in the x/y plane - for each point closest to the pixel. Concretely if `idx[n, y, x, k] = p` - then `dists[n, y, x, k]` is the squared distance between the pixel (y, x) - and the point `(points[n, p, 0], points[n, p, 1])`. Pixels hit with fewer - than points_per_pixel are padded with -1. - """ - - idx: torch.Tensor - zbuf: torch.Tensor - dists: torch.Tensor - - -@dataclass -class PointsRasterizationSettings: - """ - Class to store the point rasterization params with defaults - - Members: - image_size: Either common height and width or (height, width), in pixels. - radius: The radius (in NDC units) of each disk to be rasterized. - This can either be a float in which case the same radius is used - for each point, or a torch.Tensor of shape (N, P) giving a radius - per point in the batch. - points_per_pixel: (int) Number of points to keep track of per pixel. - We return the nearest points_per_pixel points along the z-axis. - bin_size: Size of bins to use for coarse-to-fine rasterization. Setting - bin_size=0 uses naive rasterization; setting bin_size=None attempts - to set it heuristically based on the shape of the input. This should - not affect the output, but can affect the speed of the forward pass. - max_points_per_bin: Only applicable when using coarse-to-fine - rasterization (bin_size != 0); this is the maximum number of points - allowed within each bin. This should not affect the output values, - but can affect the memory usage in the forward pass. - Setting max_points_per_bin=None attempts to set with a heuristic. - """ - - image_size: Union[int, Tuple[int, int]] = 256 - radius: Union[float, torch.Tensor] = 0.01 - points_per_pixel: int = 8 - bin_size: Optional[int] = None - max_points_per_bin: Optional[int] = None - - -class PointsRasterizer(nn.Module): - """ - This class implements methods for rasterizing a batch of pointclouds. - """ - - def __init__(self, cameras=None, raster_settings=None) -> None: - """ - cameras: A cameras object which has a `transform_points` method - which returns the transformed points after applying the - world-to-view and view-to-ndc transformations. - raster_settings: the parameters for rasterization. This should be a - named tuple. - - All these initial settings can be overridden by passing keyword - arguments to the forward function. - """ - super().__init__() - if raster_settings is None: - raster_settings = PointsRasterizationSettings() - - self.cameras = cameras - self.raster_settings = raster_settings - - def transform(self, point_clouds, **kwargs) -> Pointclouds: - """ - Args: - point_clouds: a set of point clouds - - Returns: - points_proj: the points with positions projected - in NDC space - - NOTE: keeping this as a separate function for readability but it could - be moved into forward. - """ - cameras = kwargs.get("cameras", self.cameras) - if cameras is None: - msg = "Cameras must be specified either at initialization \ - or in the forward pass of PointsRasterizer" - raise ValueError(msg) - - pts_world = point_clouds.points_padded() - # NOTE: Retaining view space z coordinate for now. - # TODO: Remove this line when the convention for the z coordinate in - # the rasterizer is decided. i.e. retain z in view space or transform - # to a different range. - eps = kwargs.get("eps", None) - pts_view = cameras.get_world_to_view_transform(**kwargs).transform_points( - pts_world, eps=eps - ) - to_ndc_transform = cameras.get_ndc_camera_transform(**kwargs) - projection_transform = try_get_projection_transform(cameras, kwargs) - if projection_transform is not None: - projection_transform = projection_transform.compose(to_ndc_transform) - pts_ndc = projection_transform.transform_points(pts_view, eps=eps) - else: - # Call transform_points instead of explicitly composing transforms to handle - # the case, where camera class does not have a projection matrix form. - pts_proj = cameras.transform_points(pts_world, eps=eps) - pts_ndc = to_ndc_transform.transform_points(pts_proj, eps=eps) - - pts_ndc[..., 2] = pts_view[..., 2] - point_clouds = point_clouds.update_padded(pts_ndc) - return point_clouds - - def to(self, device): - # Manually move to device cameras as it is not a subclass of nn.Module - if self.cameras is not None: - self.cameras = self.cameras.to(device) - return self - - def forward(self, point_clouds, **kwargs) -> PointFragments: - """ - Args: - point_clouds: a set of point clouds with coordinates in world space. - Returns: - PointFragments: Rasterization outputs as a named tuple. - """ - points_proj = self.transform(point_clouds, **kwargs) - raster_settings = kwargs.get("raster_settings", self.raster_settings) - idx, zbuf, dists2 = rasterize_points( - points_proj, - image_size=raster_settings.image_size, - radius=raster_settings.radius, - points_per_pixel=raster_settings.points_per_pixel, - bin_size=raster_settings.bin_size, - max_points_per_bin=raster_settings.max_points_per_bin, - ) - return PointFragments(idx=idx, zbuf=zbuf, dists=dists2) diff --git a/pytorch3d/pytorch3d/renderer/points/renderer.py b/pytorch3d/pytorch3d/renderer/points/renderer.py deleted file mode 100644 index 0a83ec40458cd4ed55fd42ad4a0ebefa7b064ab8..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/points/renderer.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch -import torch.nn as nn - - -# A renderer class should be initialized with a -# function for rasterization and a function for compositing. -# The rasterizer should: -# - transform inputs from world -> screen space -# - rasterize inputs -# - return fragments -# The compositor can take fragments as input along with any other properties of -# the scene and generate images. - -# E.g. rasterize inputs and then shade -# -# fragments = self.rasterize(point_clouds) -# images = self.compositor(fragments, point_clouds) -# return images - - -class PointsRenderer(nn.Module): - """ - A class for rendering a batch of points. The class should - be initialized with a rasterizer and compositor class which each have a forward - function. - - The points are rendered with with varying alpha (weights) values depending on - the distance of the pixel center to the true point in the xy plane. The purpose - of this is to soften the hard decision boundary, for differentiability. - See Section 3.2 of "SynSin: End-to-end View Synthesis from a Single Image" - (https://arxiv.org/pdf/1912.08804.pdf) for more details. - """ - - def __init__(self, rasterizer, compositor) -> None: - super().__init__() - self.rasterizer = rasterizer - self.compositor = compositor - - def to(self, device): - # Manually move to device rasterizer as the cameras - # within the class are not of type nn.Module - self.rasterizer = self.rasterizer.to(device) - self.compositor = self.compositor.to(device) - return self - - def forward(self, point_clouds, **kwargs) -> torch.Tensor: - fragments = self.rasterizer(point_clouds, **kwargs) - - # Construct weights based on the distance of a point to the true point. - # However, this could be done differently: e.g. predicted as opposed - # to a function of the weights. - r = self.rasterizer.raster_settings.radius - - dists2 = fragments.dists.permute(0, 3, 1, 2) - weights = 1 - dists2 / (r * r) - images = self.compositor( - fragments.idx.long().permute(0, 3, 1, 2), - weights, - point_clouds.features_packed().permute(1, 0), - **kwargs, - ) - - # permute so image comes at the end - images = images.permute(0, 2, 3, 1) - - return images diff --git a/pytorch3d/pytorch3d/renderer/splatter_blend.py b/pytorch3d/pytorch3d/renderer/splatter_blend.py deleted file mode 100644 index 0149dfb30f44dd716fd21be38051266c8bceb67d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/splatter_blend.py +++ /dev/null @@ -1,566 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# This file defines SplatterBlender, which is used for blending in SplatterPhongShader. - -import itertools -from typing import Tuple - -import torch -import torch.nn.functional as F -from pytorch3d.common.datatypes import Device -from pytorch3d.renderer import BlendParams -from pytorch3d.renderer.cameras import FoVPerspectiveCameras - -from .blending import _get_background_color - - -def _precompute( - input_shape: Tuple[int, int, int, int], device: Device -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Precompute padding and offset constants that won't change for a given NHWK shape. - - Args: - input_shape: Tuple indicating N (batch size), H, W (image size) and K (number of - intersections) output by the rasterizer. - device: Device to store the tensors on. - - returns: - crop_ids_h: An (N, H, W+2, K, 9, 5) tensor, used during splatting to offset the - p-pixels (splatting pixels) in one of the 9 splatting directions within a - call to torch.gather. See comments and offset_splats for details. - crop_ids_w: An (N, H, W, K, 9, 5) tensor, used similarly to crop_ids_h. - offsets: A (1, 1, 1, 1, 9, 2) tensor (shaped so for broadcasting) containing va- - lues [-1, -1], [-1, 0], [-1, 1], [0, -1], ..., [1, 1] which correspond to - the nine splatting directions. - """ - N, H, W, K = input_shape - - # (N, H, W+2, K, 9, 5) tensor, used to reduce a tensor from (N, H+2, W+2...) to - # (N, H, W+2, ...) in torch.gather. If only torch.gather broadcasted, we wouldn't - # need the tiling. But it doesn't. - crop_ids_h = ( - torch.arange(0, H, device=device).view(1, H, 1, 1, 1, 1) - + torch.tensor([0, 1, 2, 0, 1, 2, 0, 1, 2], device=device).view( - 1, 1, 1, 1, 9, 1 - ) - ).expand(N, H, W + 2, K, 9, 5) - - # (N, H, W, K, 9, 5) tensor, used to reduce a tensor from (N, H, W+2, ...) to - # (N, H, W, ...) in torch.gather. - crop_ids_w = ( - torch.arange(0, W, device=device).view(1, 1, W, 1, 1, 1) - + torch.tensor([0, 0, 0, 1, 1, 1, 2, 2, 2], device=device).view( - 1, 1, 1, 1, 9, 1 - ) - ).expand(N, H, W, K, 9, 5) - - offsets = torch.tensor( - list(itertools.product((-1, 0, 1), repeat=2)), - dtype=torch.long, - device=device, - ) - - return crop_ids_h, crop_ids_w, offsets - - -def _prepare_pixels_and_colors( - pixel_coords_cameras: torch.Tensor, - colors: torch.Tensor, - cameras: FoVPerspectiveCameras, - background_mask: torch.Tensor, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Project pixel coords into the un-inverted screen frame of reference, and set - background pixel z-values to 1.0 and alphas to 0.0. - - Args: - pixel_coords_cameras: (N, H, W, K, 3) float tensor. - colors: (N, H, W, K, 3) float tensor. - cameras: PyTorch3D cameras, for now we assume FoVPerspectiveCameras. - background_mask: (N, H, W, K) boolean tensor. - - Returns: - pixel_coords_screen: (N, H, W, K, 3) float tensor. Background pixels have - x=y=z=1.0. - colors: (N, H, W, K, 4). Alpha is set to 1 for foreground pixels and 0 for back- - ground pixels. - """ - - N, H, W, K, C = colors.shape - # pixel_coords_screen will contain invalid values at background - # intersections, and [H+0.5, W+0.5, z] at valid intersections. It is important - # to not flip the xy axes, otherwise the gradients will be inverted when the - # splatter works with a detached rasterizer. - pixel_coords_screen = cameras.transform_points_screen( - pixel_coords_cameras.view([N, -1, 3]), image_size=(H, W), with_xyflip=False - ).reshape(pixel_coords_cameras.shape) - - # Set colors' alpha to 1 and background to 0. - colors = torch.cat( - [colors, torch.ones_like(colors[..., :1])], dim=-1 - ) # (N, H, W, K, 4) - - # The hw values of background don't matter because their alpha is set - # to 0 in the next step (which means that no matter what their splatting kernel - # value is, they will not splat as the kernel is multiplied by alpha). However, - # their z-values need to be at max depth. Otherwise, we could incorrectly compute - # occlusion layer linkage. - pixel_coords_screen[background_mask] = 1.0 - - # Any background color value value with alpha=0 will do, as anything with - # alpha=0 will have a zero-weight splatting power. Note that neighbors can still - # splat on zero-alpha pixels: that's the way we get non-zero gradients at the - # boundary with the background. - colors[background_mask] = 0.0 - - return pixel_coords_screen, colors - - -def _get_splat_kernel_normalization( - offsets: torch.Tensor, - sigma: float = 0.5, -): - if sigma <= 0.0: - raise ValueError("Only positive standard deviations make sense.") - - epsilon = 0.05 - normalization_constant = torch.exp( - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - -(offsets**2).sum(dim=1) - / (2 * sigma**2) - ).sum() - - # We add an epsilon to the normalization constant to ensure the gradient will travel - # through non-boundary pixels' normalization factor, see Sec. 3.3.1 in "Differentia- - # ble Surface Rendering via Non-Differentiable Sampling", Cole et al. - # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. - return (1 + epsilon) / normalization_constant - - -def _compute_occlusion_layers( - q_depth: torch.Tensor, -) -> torch.Tensor: - """ - For each splatting pixel, decide whether it splats from a background, surface, or - foreground depth relative to the splatted pixel. See unit tests in - test_splatter_blend for some enlightening examples. - - Args: - q_depth: (N, H, W, K) tensor of z-values of the splatted pixels. - - Returns: - occlusion_layers: (N, H, W, 9) long tensor. Each of the 9 values corresponds to - one of the nine splatting directions ([-1, -1], [-1, 0], ..., [1, - 1]). The value at nhwd (where d is the splatting direction) is 0 if - the splat in direction d is on the same surface level as the pixel at - hw. The value is negative if the splat is in the background (occluded - by another splat above it that is at the same surface level as the - pixel splatted on), and the value is positive if the splat is in the - foreground. - """ - N, H, W, K = q_depth.shape - - # q are the "center pixels" and p the pixels splatting onto them. Use `unfold` to - # create `p_depth`, a tensor with 9 layers, each of which corresponds to the - # depth of a neighbor of q in one of the 9 directions. For example, p_depth[nk0hw] - # is the depth of the pixel splatting onto pixel nhwk from the [-1, -1] direction, - # and p_depth[nk4hw] the depth of q (self-splatting onto itself). - # More concretely, imagine the pixel depths in a 2x2 image's k-th layer are - # .1 .2 - # .3 .4 - # Then (remembering that we pad with zeros when a pixel has fewer than 9 neighbors): - # - # p_depth[n, k, :, 0, 0] = [ 0 0 0 0 .1 .2 0 .3 .4] - neighbors of .1 - # p_depth[n, k, :, 0, 1] = [ 0 0 0 .1 .2 0 .3 .4 0] - neighbors of .2 - # p_depth[n, k, :, 1, 0] = [ 0 .1 .2 0 .3 .4 0 0 0] - neighbors of .3 - # p_depth[n, k, :, 0, 1] = [.1 .2 0 .3 .4 0 0 0 0] - neighbors of .4 - q_depth = q_depth.permute(0, 3, 1, 2) # (N, K, H, W) - p_depth = F.unfold(q_depth, kernel_size=3, padding=1) # (N, 3^2 * K, H * W) - q_depth = q_depth.view(N, K, 1, H, W) - p_depth = p_depth.view(N, K, 9, H, W) - - # Take the center pixel q's top rasterization layer. This is the "surface layer" - # that we're splatting on. For each of the nine splatting directions p, find which - # of the K splatting rasterization layers is closest in depth to the surface - # splatted layer. - qtop_to_p_zdist = torch.abs(p_depth - q_depth[:, 0:1]) # (N, K, 9, H, W) - qtop_to_p_closest_zdist, qtop_to_p_closest_id = qtop_to_p_zdist.min(dim=1) - - # For each of the nine splatting directions p, take the top of the K rasterization - # layers. Check which of the K q-layers (that the given direction is splatting on) - # is closest in depth to the top splatting layer. - ptop_to_q_zdist = torch.abs(p_depth[:, 0:1] - q_depth) # (N, K, 9, H, W) - ptop_to_q_closest_zdist, ptop_to_q_closest_id = ptop_to_q_zdist.min(dim=1) - - # Decide whether each p is on the same level, below, or above the q it is splatting - # on. See Fig. 4 in [0] for an illustration. Briefly: say we're interested in pixel - # p_{h, w} = [10, 32] splatting onto its neighbor q_{h, w} = [11, 33]. The splat is - # coming from direction [-1, -1], which has index 0 in our enumeration of splatting - # directions. Hence, we are interested in - # - # P = p_depth[n, :, d=0, 11, 33] - a vector of K depth values, and - # Q = q_depth.squeeze()[n, :, 11, 33] - a vector of K depth values. - # - # If Q[0] is closest, say, to P[2], then we assume the 0th surface layer of Q is - # the same surface as P[2] that's splatting onto it, and P[:2] are foreground splats - # and P[3:] are background splats. - # - # If instead say Q[2] is closest to P[0], then all the splats are background splats, - # because the top splatting layer is the same surface as a non-top splatted layer. - # - # Finally, if Q[0] is closest to P[0], then the top-level P is splatting onto top- - # level Q, and P[1:] are all background splats. - occlusion_offsets = torch.where( # noqa - ptop_to_q_closest_zdist < qtop_to_p_closest_zdist, - -ptop_to_q_closest_id, - qtop_to_p_closest_id, - ) # (N, 9, H, W) - - occlusion_layers = occlusion_offsets.permute((0, 2, 3, 1)) # (N, H, W, 9) - return occlusion_layers - - -def _compute_splatting_colors_and_weights( - pixel_coords_screen: torch.Tensor, - colors: torch.Tensor, - sigma: float, - offsets: torch.Tensor, -) -> torch.Tensor: - """ - For each center pixel q, compute the splatting weights of its surrounding nine spla- - tting pixels p, as well as their splatting colors (which are just their colors re- - weighted by the splatting weights). - - Args: - pixel_coords_screen: (N, H, W, K, 2) tensor of pixel screen coords. - colors: (N, H, W, K, 4) RGBA tensor of pixel colors. - sigma: splatting kernel variance. - offsets: (9, 2) tensor computed by _precompute, indicating the nine - splatting directions ([-1, -1], ..., [1, 1]). - - Returns: - splat_colors_and_weights: (N, H, W, K, 9, 5) tensor. - splat_colors_and_weights[..., :4] corresponds to the splatting colors, and - splat_colors_and_weights[..., 4:5] to the splatting weights. The "9" di- - mension corresponds to the nine splatting directions. - """ - N, H, W, K, C = colors.shape - splat_kernel_normalization = _get_splat_kernel_normalization(offsets, sigma) - - # Distance from each barycentric-interpolated triangle vertices' triplet from its - # "ideal" pixel-center location. pixel_coords_screen are in screen coordinates, and - # should be at the "ideal" locations on the forward pass -- e.g. - # pixel_coords_screen[n, 24, 31, k] = [24.5, 31.5]. For this reason, q_to_px_center - # should equal torch.zeros during the forward pass. On the backwards pass, these - # coordinates will be adjusted and non-zero, allowing the gradients to flow back - # to the mesh vertex coordinates. - q_to_px_center = ( - torch.floor(pixel_coords_screen[..., :2]) - pixel_coords_screen[..., :2] + 0.5 - ).view((N, H, W, K, 1, 2)) - - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - dist2_p_q = torch.sum((q_to_px_center + offsets) ** 2, dim=5) # (N, H, W, K, 9) - splat_weights = torch.exp(-dist2_p_q / (2 * sigma**2)) - alpha = colors[..., 3:4] - splat_weights = (alpha * splat_kernel_normalization * splat_weights).unsqueeze( - 5 - ) # (N, H, W, K, 9, 1) - - # splat_colors[n, h, w, direction, :] contains the splatting color (weighted by the - # splatting weight) that pixel h, w will splat in one of the nine possible - # directions (e.g. nhw0 corresponds to splatting in [-1, 1] direciton, nhw4 is - # self-splatting). - splat_colors = splat_weights * colors.unsqueeze(4) # (N, H, W, K, 9, 4) - - return torch.cat([splat_colors, splat_weights], dim=5) - - -def _offset_splats( - splat_colors_and_weights: torch.Tensor, - crop_ids_h: torch.Tensor, - crop_ids_w: torch.Tensor, -) -> torch.Tensor: - """ - Pad splatting colors and weights so that tensor locations/coordinates are aligned - with the splatting directions. For example, say we have an example input Red channel - splat_colors_and_weights[n, :, :, k, direction=0, channel=0] equal to - .1 .2 .3 - .4 .5 .6 - .7 .8 .9 - the (h, w) entry indicates that pixel n, h, w, k splats the given color in direction - equal to 0, which corresponds to offsets[0] = (-1, -1). Note that this is the x-y - direction, not h-w. This function pads and crops this array to - 0 0 0 - .2 .3 0 - .5 .6 0 - which indicates, for example, that: - * There is no pixel splatting in direction (-1, -1) whose splat lands on pixel - h=w=0. - * There is a pixel splatting in direction (-1, -1) whose splat lands on the pi- - xel h=1, w=0, and that pixel's splatting color is .2. - * There is a pixel splatting in direction (-1, -1) whose splat lands on the pi- - xel h=2, w=1, and that pixel's splatting color is .6. - - Args: - *splat_colors_and_weights*: (N, H, W, K, 9, 5) tensor of colors and weights, - where dim=-2 corresponds to the splatting directions/offsets. - *crop_ids_h*: (N, H, W+2, K, 9, 5) precomputed tensor used for padding within - torch.gather. See _precompute for more info. - *crop_ids_w*: (N, H, W, K, 9, 5) precomputed tensor used for padding within - torch.gather. See _precompute for more info. - - - Returns: - *splat_colors_and_weights*: (N, H, W, K, 9, 5) tensor. - """ - N, H, W, K, _, _ = splat_colors_and_weights.shape - # Transform splat_colors such that each of the 9 layers (corresponding to - # the 9 splat offsets) is padded with 1 and shifted in the appropriate - # direction. E.g. splat_colors[n, :, :, 0] corresponds to the (-1, -1) - # offset, so will be padded with one rows of 1 on the right and have a - # single row clipped at the bottom, and splat_colors[n, :, :, 4] corrsponds - # to offset (0, 0) and will remain unchanged. - splat_colors_and_weights = F.pad( - splat_colors_and_weights, [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0] - ) # N, H+2, W+2, 9, 5 - - # (N, H+2, W+2, K, 9, 5) -> (N, H, W+2, K, 9, 5) - splat_colors_and_weights = torch.gather( - splat_colors_and_weights, dim=1, index=crop_ids_h - ) - - # (N, H, W+2, K, 9, 5) -> (N, H, W, K, 9, 5) - splat_colors_and_weights = torch.gather( - splat_colors_and_weights, dim=2, index=crop_ids_w - ) - - return splat_colors_and_weights - - -def _compute_splatted_colors_and_weights( - occlusion_layers: torch.Tensor, # (N, H, W, 9) - splat_colors_and_weights: torch.Tensor, # (N, H, W, K, 9, 5) -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Accumulate splatted colors in background, surface and foreground occlusion buffers. - - Args: - occlusion_layers: (N, H, W, 9) tensor. See _compute_occlusion_layers. - splat_colors_and_weights: (N, H, W, K, 9, 5) tensor. See _offset_splats. - - Returns: - splatted_colors: (N, H, W, 4, 3) tensor. Last dimension corresponds to back- - ground, surface, and foreground splat colors. - splatted_weights: (N, H, W, 1, 3) tensor. Last dimension corresponds to back- - ground, surface, and foreground splat weights and is used for normalization. - - """ - N, H, W, K, _, _ = splat_colors_and_weights.shape - - # Create an occlusion mask, with the last dimension of length 3, corresponding to - # background/surface/foreground splatting. E.g. occlusion_layer_mask[n,h,w,k,d,0] is - # 1 if the pixel at hw is splatted from direction d such that the splatting pixel p - # is below the splatted pixel q (in the background); otherwise, the value is 0. - # occlusion_layer_mask[n,h,w,k,d,1] is 1 if the splatting pixel is at the same - # surface level as the splatted pixel q, and occlusion_layer_mask[n,h,w,k,d,2] is - # 1 only if the splatting pixel is in the foreground. - layer_ids = torch.arange(K, device=splat_colors_and_weights.device).view( - 1, 1, 1, K, 1 - ) - occlusion_layers = occlusion_layers.view(N, H, W, 1, 9) - occlusion_layer_mask = torch.stack( - [ - occlusion_layers > layer_ids, # (N, H, W, K, 9) - occlusion_layers == layer_ids, # (N, H, W, K, 9) - occlusion_layers < layer_ids, # (N, H, W, K, 9) - ], - dim=5, - ).float() # (N, H, W, K, 9, 3) - - # (N * H * W, 5, 9 * K) x (N * H * W, 9 * K, 3) -> (N * H * W, 5, 3) - splatted_colors_and_weights = torch.bmm( - splat_colors_and_weights.permute(0, 1, 2, 5, 3, 4).reshape( - (N * H * W, 5, K * 9) - ), - occlusion_layer_mask.reshape((N * H * W, K * 9, 3)), - ).reshape((N, H, W, 5, 3)) - - return ( - splatted_colors_and_weights[..., :4, :], - splatted_colors_and_weights[..., 4:5, :], - ) - - -def _normalize_and_compose_all_layers( - background_color: torch.Tensor, - splatted_colors_per_occlusion_layer: torch.Tensor, - splatted_weights_per_occlusion_layer: torch.Tensor, -) -> torch.Tensor: - """ - Normalize each bg/surface/fg buffer by its weight, and compose. - - Args: - background_color: (3) RGB tensor. - splatter_colors_per_occlusion_layer: (N, H, W, 4, 3) RGBA tensor, last dimension - corresponds to foreground, surface, and background splatting. - splatted_weights_per_occlusion_layer: (N, H, W, 1, 3) weight tensor. - - Returns: - output_colors: (N, H, W, 4) RGBA tensor. - """ - device = splatted_colors_per_occlusion_layer.device - - # Normalize each of bg/surface/fg splat layers separately. - normalization_scales = 1.0 / ( - # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. - torch.maximum( - splatted_weights_per_occlusion_layer, - torch.tensor([1.0], device=device), - ) - ) # (N, H, W, 1, 3) - - normalized_splatted_colors = ( - splatted_colors_per_occlusion_layer * normalization_scales - ) # (N, H, W, 4, 3) - - # Use alpha-compositing to compose the splat layers. - output_colors = torch.cat( - [background_color, torch.tensor([0.0], device=device)] - ) # (4), will broadcast to (N, H, W, 4) below. - - for occlusion_layer_id in (-1, -2, -3): - # Over-compose the bg, surface, and fg occlusion layers. Note that we already - # multiplied each pixel's RGBA by its own alpha as part of self-splatting in - # _compute_splatting_colors_and_weights, so we don't re-multiply by alpha here. - alpha = normalized_splatted_colors[..., 3:4, occlusion_layer_id] # (N, H, W, 1) - output_colors = ( - normalized_splatted_colors[..., occlusion_layer_id] - + (1.0 - alpha) * output_colors - ) - return output_colors - - -class SplatterBlender(torch.nn.Module): - def __init__( - self, - input_shape: Tuple[int, int, int, int], - device, - ): - """ - A splatting blender. See `forward` docs for details of the splatting mechanism. - - Args: - input_shape: Tuple (N, H, W, K) indicating the batch size, image height, - image width, and number of rasterized layers. Used to precompute - constant tensors that do not change as long as this tuple is unchanged. - """ - super().__init__() - self.crop_ids_h, self.crop_ids_w, self.offsets = _precompute( - input_shape, device - ) - - def to(self, device): - self.offsets = self.offsets.to(device) - self.crop_ids_h = self.crop_ids_h.to(device) - self.crop_ids_w = self.crop_ids_w.to(device) - super().to(device) - - def forward( - self, - colors: torch.Tensor, - pixel_coords_cameras: torch.Tensor, - cameras: FoVPerspectiveCameras, - background_mask: torch.Tensor, - blend_params: BlendParams, - ) -> torch.Tensor: - """ - RGB blending using splatting, as proposed in [0]. - - Args: - colors: (N, H, W, K, 3) tensor of RGB colors at each h, w pixel location for - K intersection layers. - pixel_coords_cameras: (N, H, W, K, 3) tensor of pixel coordinates in the - camera frame of reference. It is *crucial* that these are computed by - interpolating triangle vertex positions using barycentric coordinates -- - this allows gradients to travel through pixel_coords_camera back to the - vertex positions. - cameras: Cameras object used to project pixel_coords_cameras screen coords. - background_mask: (N, H, W, K, 3) boolean tensor, True for bg pixels. A pixel - is considered "background" if no mesh triangle projects to it. This is - typically computed by the rasterizer. - blend_params: BlendParams, from which we use sigma (splatting kernel - variance) and background_color. - - Returns: - output_colors: (N, H, W, 4) tensor of RGBA values. The alpha layer is set to - fully transparent in the background. - - [0] Cole, F. et al., "Differentiable Surface Rendering via Non-differentiable - Sampling". - """ - - # Our implementation has 6 stages. In the description below, we will call each - # pixel q and the 9 surrounding splatting pixels (including itself) p. - # 1. Use barycentrics to compute the position of each pixel in screen - # coordinates. These should exactly correspond to pixel centers during the - # forward pass, but can be shifted on backwards. This step allows gradients to - # travel to vertex coordinates, even if the rasterizer is non-differentiable. - # 2a. For each center pixel q, take each splatting p and decide whether it - # is on the same surface level as q, or in the background or foreground. - # 2b. For each center pixel q, compute the splatting weight of surrounding - # pixels p, and their splatting colors (which are just the original colors - # weighted by the splatting weights). - # 3. As a vectorization technicality, offset the tensors corresponding to - # the splatting p values in the nine directions, by padding each of nine - # splatting layers on the bottom/top, left/right. - # 4. Do the actual splatting, by accumulating the splatting colors of the - # surrounding p's for each pixel q. The weights get accumulated separately for - # p's that got assigned to the background/surface/foreground in Step 2a. - # 5. Normalize each the splatted bg/surface/fg colors for each q, and - # compose the resulting color maps. - # - # Note that it is crucial that in Step 1 we compute the pixel coordinates by in- - # terpolating triangle vertices using barycentric coords from the rasterizer. In - # our case, these pixel_coords_camera are computed by the shader and passed to - # this function to avoid re-computation. - - pixel_coords_screen, colors = _prepare_pixels_and_colors( - pixel_coords_cameras, colors, cameras, background_mask - ) # (N, H, W, K, 3) and (N, H, W, K, 4) - - occlusion_layers = _compute_occlusion_layers( - pixel_coords_screen[..., 2:3].squeeze(dim=-1) - ) # (N, H, W, 9) - - splat_colors_and_weights = _compute_splatting_colors_and_weights( - pixel_coords_screen, - colors, - blend_params.sigma, - self.offsets, - ) # (N, H, W, K, 9, 5) - - splat_colors_and_weights = _offset_splats( - splat_colors_and_weights, - self.crop_ids_h, - self.crop_ids_w, - ) # (N, H, W, K, 9, 5) - - ( - splatted_colors_per_occlusion_layer, - splatted_weights_per_occlusion_layer, - ) = _compute_splatted_colors_and_weights( - occlusion_layers, splat_colors_and_weights - ) # (N, H, W, 4, 3) and (N, H, W, 1, 3) - - output_colors = _normalize_and_compose_all_layers( - _get_background_color(blend_params, colors.device), - splatted_colors_per_occlusion_layer, - splatted_weights_per_occlusion_layer, - ) # (N, H, W, 4) - - return output_colors diff --git a/pytorch3d/pytorch3d/renderer/utils.py b/pytorch3d/pytorch3d/renderer/utils.py deleted file mode 100644 index e2c37871c1fad0c530df96bdf4b779f647f9ce8e..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/renderer/utils.py +++ /dev/null @@ -1,458 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import copy -import inspect -import warnings -from typing import Any, List, Optional, Tuple, TypeVar, Union - -import numpy as np -import torch -import torch.nn as nn - -from ..common.datatypes import Device, make_device - - -class TensorAccessor(nn.Module): - """ - A helper class to be used with the __getitem__ method. This can be used for - getting/setting the values for an attribute of a class at one particular - index. This is useful when the attributes of a class are batched tensors - and one element in the batch needs to be modified. - """ - - def __init__(self, class_object, index: Union[int, slice]) -> None: - """ - Args: - class_object: this should be an instance of a class which has - attributes which are tensors representing a batch of - values. - index: int/slice, an index indicating the position in the batch. - In __setattr__ and __getattr__ only the value of class - attributes at this index will be accessed. - """ - self.__dict__["class_object"] = class_object - self.__dict__["index"] = index - - def __setattr__(self, name: str, value: Any): - """ - Update the attribute given by `name` to the value given by `value` - at the index specified by `self.index`. - - Args: - name: str, name of the attribute. - value: value to set the attribute to. - """ - v = getattr(self.class_object, name) - if not torch.is_tensor(v): - msg = "Can only set values on attributes which are tensors; got %r" - raise AttributeError(msg % type(v)) - - # Convert the attribute to a tensor if it is not a tensor. - if not torch.is_tensor(value): - value = torch.tensor( - value, device=v.device, dtype=v.dtype, requires_grad=v.requires_grad - ) - - # Check the shapes match the existing shape and the shape of the index. - if v.dim() > 1 and value.dim() > 1 and value.shape[1:] != v.shape[1:]: - msg = "Expected value to have shape %r; got %r" - raise ValueError(msg % (v.shape, value.shape)) - if ( - v.dim() == 0 - and isinstance(self.index, slice) - and len(value) != len(self.index) - ): - msg = "Expected value to have len %r; got %r" - raise ValueError(msg % (len(self.index), len(value))) - self.class_object.__dict__[name][self.index] = value - - def __getattr__(self, name: str): - """ - Return the value of the attribute given by "name" on self.class_object - at the index specified in self.index. - - Args: - name: string of the attribute name - """ - if hasattr(self.class_object, name): - return self.class_object.__dict__[name][self.index] - else: - msg = "Attribute %s not found on %r" - return AttributeError(msg % (name, self.class_object.__name__)) - - -BROADCAST_TYPES = (float, int, list, tuple, torch.Tensor, np.ndarray) - - -class TensorProperties(nn.Module): - """ - A mix-in class for storing tensors as properties with helper methods. - """ - - def __init__( - self, - dtype: torch.dtype = torch.float32, - device: Device = "cpu", - **kwargs, - ) -> None: - """ - Args: - dtype: data type to set for the inputs - device: Device (as str or torch.device) - kwargs: any number of keyword arguments. Any arguments which are - of type (float/int/list/tuple/tensor/array) are broadcasted and - other keyword arguments are set as attributes. - """ - super().__init__() - self.device = make_device(device) - self._N = 0 - if kwargs is not None: - - # broadcast all inputs which are float/int/list/tuple/tensor/array - # set as attributes anything else e.g. strings, bools - args_to_broadcast = {} - for k, v in kwargs.items(): - if v is None or isinstance(v, (str, bool)): - setattr(self, k, v) - elif isinstance(v, BROADCAST_TYPES): - args_to_broadcast[k] = v - else: - msg = "Arg %s with type %r is not broadcastable" - warnings.warn(msg % (k, type(v))) - - names = args_to_broadcast.keys() - # convert from type dict.values to tuple - values = tuple(v for v in args_to_broadcast.values()) - - if len(values) > 0: - broadcasted_values = convert_to_tensors_and_broadcast( - *values, device=device - ) - - # Set broadcasted values as attributes on self. - for i, n in enumerate(names): - setattr(self, n, broadcasted_values[i]) - if self._N == 0: - self._N = broadcasted_values[i].shape[0] - - def __len__(self) -> int: - return self._N - - def isempty(self) -> bool: - return self._N == 0 - - def __getitem__(self, index: Union[int, slice]) -> TensorAccessor: - """ - - Args: - index: an int or slice used to index all the fields. - - Returns: - if `index` is an index int/slice return a TensorAccessor class - with getattribute/setattribute methods which return/update the value - at the index in the original class. - """ - if isinstance(index, (int, slice)): - return TensorAccessor(class_object=self, index=index) - - msg = "Expected index of type int or slice; got %r" - raise ValueError(msg % type(index)) - - # pyre-fixme[14]: `to` overrides method defined in `Module` inconsistently. - def to(self, device: Device = "cpu") -> "TensorProperties": - """ - In place operation to move class properties which are tensors to a - specified device. If self has a property "device", update this as well. - """ - device_ = make_device(device) - for k in dir(self): - v = getattr(self, k) - if k == "device": - setattr(self, k, device_) - if torch.is_tensor(v) and v.device != device_: - setattr(self, k, v.to(device_)) - return self - - def cpu(self) -> "TensorProperties": - return self.to("cpu") - - # pyre-fixme[14]: `cuda` overrides method defined in `Module` inconsistently. - def cuda(self, device: Optional[int] = None) -> "TensorProperties": - return self.to(f"cuda:{device}" if device is not None else "cuda") - - def clone(self, other) -> "TensorProperties": - """ - Update the tensor properties of other with the cloned properties of self. - """ - for k in dir(self): - v = getattr(self, k) - if inspect.ismethod(v) or k.startswith("__") or type(v) is TypeVar: - continue - if torch.is_tensor(v): - v_clone = v.clone() - else: - v_clone = copy.deepcopy(v) - setattr(other, k, v_clone) - return other - - def gather_props(self, batch_idx) -> "TensorProperties": - """ - This is an in place operation to reformat all tensor class attributes - based on a set of given indices using torch.gather. This is useful when - attributes which are batched tensors e.g. shape (N, 3) need to be - multiplied with another tensor which has a different first dimension - e.g. packed vertices of shape (V, 3). - - Example - - .. code-block:: python - - self.specular_color = (N, 3) tensor of specular colors for each mesh - - A lighting calculation may use - - .. code-block:: python - - verts_packed = meshes.verts_packed() # (V, 3) - - To multiply these two tensors the batch dimension needs to be the same. - To achieve this we can do - - .. code-block:: python - - batch_idx = meshes.verts_packed_to_mesh_idx() # (V) - - This gives index of the mesh for each vertex in verts_packed. - - .. code-block:: python - - self.gather_props(batch_idx) - self.specular_color = (V, 3) tensor with the specular color for - each packed vertex. - - torch.gather requires the index tensor to have the same shape as the - input tensor so this method takes care of the reshaping of the index - tensor to use with class attributes with arbitrary dimensions. - - Args: - batch_idx: shape (B, ...) where `...` represents an arbitrary - number of dimensions - - Returns: - self with all properties reshaped. e.g. a property with shape (N, 3) - is transformed to shape (B, 3). - """ - # Iterate through the attributes of the class which are tensors. - for k in dir(self): - v = getattr(self, k) - if torch.is_tensor(v): - if v.shape[0] > 1: - # There are different values for each batch element - # so gather these using the batch_idx. - # First clone the input batch_idx tensor before - # modifying it. - _batch_idx = batch_idx.clone() - idx_dims = _batch_idx.shape - tensor_dims = v.shape - if len(idx_dims) > len(tensor_dims): - msg = "batch_idx cannot have more dimensions than %s. " - msg += "got shape %r and %s has shape %r" - raise ValueError(msg % (k, idx_dims, k, tensor_dims)) - if idx_dims != tensor_dims: - # To use torch.gather the index tensor (_batch_idx) has - # to have the same shape as the input tensor. - new_dims = len(tensor_dims) - len(idx_dims) - new_shape = idx_dims + (1,) * new_dims - expand_dims = (-1,) + tensor_dims[1:] - _batch_idx = _batch_idx.view(*new_shape) - _batch_idx = _batch_idx.expand(*expand_dims) - - v = v.gather(0, _batch_idx) - setattr(self, k, v) - return self - - -def format_tensor( - input, - dtype: torch.dtype = torch.float32, - device: Device = "cpu", -) -> torch.Tensor: - """ - Helper function for converting a scalar value to a tensor. - - Args: - input: Python scalar, Python list/tuple, torch scalar, 1D torch tensor - dtype: data type for the input - device: Device (as str or torch.device) on which the tensor should be placed. - - Returns: - input_vec: torch tensor with optional added batch dimension. - """ - device_ = make_device(device) - if not torch.is_tensor(input): - input = torch.tensor(input, dtype=dtype, device=device_) - - if input.dim() == 0: - input = input.view(1) - - if input.device == device_: - return input - - input = input.to(device=device) - return input - - -def convert_to_tensors_and_broadcast( - *args, - dtype: torch.dtype = torch.float32, - device: Device = "cpu", -): - """ - Helper function to handle parsing an arbitrary number of inputs (*args) - which all need to have the same batch dimension. - The output is a list of tensors. - - Args: - *args: an arbitrary number of inputs - Each of the values in `args` can be one of the following - - Python scalar - - Torch scalar - - Torch tensor of shape (N, K_i) or (1, K_i) where K_i are - an arbitrary number of dimensions which can vary for each - value in args. In this case each input is broadcast to a - tensor of shape (N, K_i) - dtype: data type to use when creating new tensors. - device: torch device on which the tensors should be placed. - - Output: - args: A list of tensors of shape (N, K_i) - """ - # Convert all inputs to tensors with a batch dimension - args_1d = [format_tensor(c, dtype, device) for c in args] - - # Find broadcast size - sizes = [c.shape[0] for c in args_1d] - N = max(sizes) - - args_Nd = [] - for c in args_1d: - if c.shape[0] != 1 and c.shape[0] != N: - msg = "Got non-broadcastable sizes %r" % sizes - raise ValueError(msg) - - # Expand broadcast dim and keep non broadcast dims the same size - expand_sizes = (N,) + (-1,) * len(c.shape[1:]) - args_Nd.append(c.expand(*expand_sizes)) - - return args_Nd - - -def ndc_grid_sample( - input: torch.Tensor, - grid_ndc: torch.Tensor, - *, - align_corners: bool = False, - **grid_sample_kwargs, -) -> torch.Tensor: - """ - Samples a tensor `input` of shape `(B, dim, H, W)` at 2D locations - specified by a tensor `grid_ndc` of shape `(B, ..., 2)` using - the `torch.nn.functional.grid_sample` function. - `grid_ndc` is specified in PyTorch3D NDC coordinate frame. - - Args: - input: The tensor of shape `(B, dim, H, W)` to be sampled. - grid_ndc: A tensor of shape `(B, ..., 2)` denoting the set of - 2D locations at which `input` is sampled. - See [1] for a detailed description of the NDC coordinates. - align_corners: Forwarded to the `torch.nn.functional.grid_sample` - call. See its docstring. - grid_sample_kwargs: Additional arguments forwarded to the - `torch.nn.functional.grid_sample` call. See the corresponding - docstring for a listing of the corresponding arguments. - - Returns: - sampled_input: A tensor of shape `(B, dim, ...)` containing the samples - of `input` at 2D locations `grid_ndc`. - - References: - [1] https://pytorch3d.org/docs/cameras - """ - - batch, *spatial_size, pt_dim = grid_ndc.shape - if batch != input.shape[0]: - raise ValueError("'input' and 'grid_ndc' have to have the same batch size.") - if input.ndim != 4: - raise ValueError("'input' has to be a 4-dimensional Tensor.") - if pt_dim != 2: - raise ValueError("The last dimension of 'grid_ndc' has to be == 2.") - - grid_ndc_flat = grid_ndc.reshape(batch, -1, 1, 2) - - # pyre-fixme[6]: For 2nd param expected `Tuple[int, int]` but got `Size`. - grid_flat = ndc_to_grid_sample_coords(grid_ndc_flat, input.shape[2:]) - - sampled_input_flat = torch.nn.functional.grid_sample( - input, grid_flat, align_corners=align_corners, **grid_sample_kwargs - ) - - sampled_input = sampled_input_flat.reshape([batch, input.shape[1], *spatial_size]) - - return sampled_input - - -def ndc_to_grid_sample_coords( - xy_ndc: torch.Tensor, - image_size_hw: Tuple[int, int], -) -> torch.Tensor: - """ - Convert from the PyTorch3D's NDC coordinates to - `torch.nn.functional.grid_sampler`'s coordinates. - - Args: - xy_ndc: Tensor of shape `(..., 2)` containing 2D points in the - PyTorch3D's NDC coordinates. - image_size_hw: A tuple `(image_height, image_width)` denoting the - height and width of the image tensor to sample. - Returns: - xy_grid_sample: Tensor of shape `(..., 2)` containing 2D points in the - `torch.nn.functional.grid_sample` coordinates. - """ - if len(image_size_hw) != 2 or any(s <= 0 for s in image_size_hw): - raise ValueError("'image_size_hw' has to be a 2-tuple of positive integers") - aspect = min(image_size_hw) / max(image_size_hw) - xy_grid_sample = -xy_ndc # first negate the coords - if image_size_hw[0] >= image_size_hw[1]: - xy_grid_sample[..., 1] *= aspect - else: - xy_grid_sample[..., 0] *= aspect - return xy_grid_sample - - -def parse_image_size( - image_size: Union[List[int], Tuple[int, int], int] -) -> Tuple[int, int]: - """ - Args: - image_size: A single int (for square images) or a tuple/list of two ints. - - Returns: - A tuple of two ints. - - Throws: - ValueError if got more than two ints, any negative numbers or non-ints. - """ - if not isinstance(image_size, (tuple, list)): - return (image_size, image_size) - if len(image_size) != 2: - raise ValueError("Image size can only be a tuple/list of (H, W)") - if not all(i > 0 for i in image_size): - raise ValueError("Image sizes must be greater than 0; got %d, %d" % image_size) - if not all(isinstance(i, int) for i in image_size): - raise ValueError("Image sizes must be integers; got %f, %f" % image_size) - return tuple(image_size) diff --git a/pytorch3d/pytorch3d/structures/__init__.py b/pytorch3d/pytorch3d/structures/__init__.py deleted file mode 100644 index b92e87241a5c3614876be5a04a49350b46413ff3..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/structures/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .meshes import join_meshes_as_batch, join_meshes_as_scene, Meshes -from .pointclouds import ( - join_pointclouds_as_batch, - join_pointclouds_as_scene, - Pointclouds, -) -from .utils import list_to_packed, list_to_padded, packed_to_list, padded_to_list -from .volumes import Volumes - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/structures/meshes.py b/pytorch3d/pytorch3d/structures/meshes.py deleted file mode 100644 index fce929bec31f7152c961c0082604c5e8347af98a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/structures/meshes.py +++ /dev/null @@ -1,1750 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, Union - -import torch - -from ..common.datatypes import Device, make_device -from . import utils as struct_utils - - -class Meshes: - """ - This class provides functions for working with batches of triangulated - meshes with varying numbers of faces and vertices, and converting between - representations. - - Within Meshes, there are three different representations of the faces and - verts data: - - List - - only used for input as a starting point to convert to other representations. - Padded - - has specific batch dimension. - Packed - - no batch dimension. - - has auxiliary variables used to index into the padded representation. - - Example: - - Input list of verts V_n = [[V_1], [V_2], ... , [V_N]] - where V_1, ... , V_N are the number of verts in each mesh and N is the - number of meshes. - - Input list of faces F_n = [[F_1], [F_2], ... , [F_N]] - where F_1, ... , F_N are the number of faces in each mesh. - - # SPHINX IGNORE - List | Padded | Packed - ---------------------------|-------------------------|------------------------ - [[V_1], ... , [V_N]] | size = (N, max(V_n), 3) | size = (sum(V_n), 3) - | | - Example for verts: | | - | | - V_1 = 3, V_2 = 4, V_3 = 5 | size = (3, 5, 3) | size = (12, 3) - | | - List([ | tensor([ | tensor([ - [ | [ | [0.1, 0.3, 0.5], - [0.1, 0.3, 0.5], | [0.1, 0.3, 0.5], | [0.5, 0.2, 0.1], - [0.5, 0.2, 0.1], | [0.5, 0.2, 0.1], | [0.6, 0.8, 0.7], - [0.6, 0.8, 0.7], | [0.6, 0.8, 0.7], | [0.1, 0.3, 0.3], - ], | [0, 0, 0], | [0.6, 0.7, 0.8], - [ | [0, 0, 0], | [0.2, 0.3, 0.4], - [0.1, 0.3, 0.3], | ], | [0.1, 0.5, 0.3], - [0.6, 0.7, 0.8], | [ | [0.7, 0.3, 0.6], - [0.2, 0.3, 0.4], | [0.1, 0.3, 0.3], | [0.2, 0.4, 0.8], - [0.1, 0.5, 0.3], | [0.6, 0.7, 0.8], | [0.9, 0.5, 0.2], - ], | [0.2, 0.3, 0.4], | [0.2, 0.3, 0.4], - [ | [0.1, 0.5, 0.3], | [0.9, 0.3, 0.8], - [0.7, 0.3, 0.6], | [0, 0, 0], | ]) - [0.2, 0.4, 0.8], | ], | - [0.9, 0.5, 0.2], | [ | - [0.2, 0.3, 0.4], | [0.7, 0.3, 0.6], | - [0.9, 0.3, 0.8], | [0.2, 0.4, 0.8], | - ] | [0.9, 0.5, 0.2], | - ]) | [0.2, 0.3, 0.4], | - | [0.9, 0.3, 0.8], | - | ] | - | ]) | - Example for faces: | | - | | - F_1 = 1, F_2 = 2, F_3 = 7 | size = (3, 7, 3) | size = (10, 3) - | | - List([ | tensor([ | tensor([ - [ | [ | [ 0, 1, 2], - [0, 1, 2], | [0, 1, 2], | [ 3, 4, 5], - ], | [-1, -1, -1], | [ 4, 5, 6], - [ | [-1, -1, -1] | [ 8, 9, 7], - [0, 1, 2], | [-1, -1, -1] | [ 7, 8, 10], - [1, 2, 3], | [-1, -1, -1] | [ 9, 10, 8], - ], | [-1, -1, -1], | [11, 10, 9], - [ | [-1, -1, -1], | [11, 7, 8], - [1, 2, 0], | ], | [11, 10, 8], - [0, 1, 3], | [ | [11, 9, 8], - [2, 3, 1], | [0, 1, 2], | ]) - [4, 3, 2], | [1, 2, 3], | - [4, 0, 1], | [-1, -1, -1], | - [4, 3, 1], | [-1, -1, -1], | - [4, 2, 1], | [-1, -1, -1], | - ], | [-1, -1, -1], | - ]) | [-1, -1, -1], | - | ], | - | [ | - | [1, 2, 0], | - | [0, 1, 3], | - | [2, 3, 1], | - | [4, 3, 2], | - | [4, 0, 1], | - | [4, 3, 1], | - | [4, 2, 1], | - | ] | - | ]) | - ----------------------------------------------------------------------------- - - Auxiliary variables for packed representation - - Name | Size | Example from above - -------------------------------|---------------------|----------------------- - | | - verts_packed_to_mesh_idx | size = (sum(V_n)) | tensor([ - | | 0, 0, 0, 1, 1, 1, - | | 1, 2, 2, 2, 2, 2 - | | )] - | | size = (12) - | | - mesh_to_verts_packed_first_idx | size = (N) | tensor([0, 3, 7]) - | | size = (3) - | | - num_verts_per_mesh | size = (N) | tensor([3, 4, 5]) - | | size = (3) - | | - faces_packed_to_mesh_idx | size = (sum(F_n)) | tensor([ - | | 0, 1, 1, 2, 2, 2, - | | 2, 2, 2, 2 - | | )] - | | size = (10) - | | - mesh_to_faces_packed_first_idx | size = (N) | tensor([0, 1, 3]) - | | size = (3) - | | - num_faces_per_mesh | size = (N) | tensor([1, 2, 7]) - | | size = (3) - | | - verts_padded_to_packed_idx | size = (sum(V_n)) | tensor([ - | | 0, 1, 2, 5, 6, 7, - | | 8, 10, 11, 12, 13, - | | 14 - | | )] - | | size = (12) - ----------------------------------------------------------------------------- - # SPHINX IGNORE - - From the faces, edges are computed and have packed and padded - representations with auxiliary variables. - - E_n = [[E_1], ... , [E_N]] - where E_1, ... , E_N are the number of unique edges in each mesh. - Total number of unique edges = sum(E_n) - - # SPHINX IGNORE - Name | Size | Example from above - -------------------------------|-------------------------|---------------------- - | | - edges_packed | size = (sum(E_n), 2) | tensor([ - | | [0, 1], - | | [0, 2], - | | [1, 2], - | | ... - | | [10, 11], - | | )] - | | size = (18, 2) - | | - num_edges_per_mesh | size = (N) | tensor([3, 5, 10]) - | | size = (3) - | | - edges_packed_to_mesh_idx | size = (sum(E_n)) | tensor([ - | | 0, 0, 0, - | | . . . - | | 2, 2, 2 - | | ]) - | | size = (18) - | | - faces_packed_to_edges_packed | size = (sum(F_n), 3) | tensor([ - | | [2, 1, 0], - | | [5, 4, 3], - | | . . . - | | [12, 14, 16], - | | ]) - | | size = (10, 3) - | | - mesh_to_edges_packed_first_idx | size = (N) | tensor([0, 3, 8]) - | | size = (3) - ---------------------------------------------------------------------------- - # SPHINX IGNORE - """ - - _INTERNAL_TENSORS = [ - "_verts_packed", - "_verts_packed_to_mesh_idx", - "_mesh_to_verts_packed_first_idx", - "_verts_padded", - "_num_verts_per_mesh", - "_faces_packed", - "_faces_packed_to_mesh_idx", - "_mesh_to_faces_packed_first_idx", - "_faces_padded", - "_faces_areas_packed", - "_verts_normals_packed", - "_faces_normals_packed", - "_num_faces_per_mesh", - "_edges_packed", - "_edges_packed_to_mesh_idx", - "_mesh_to_edges_packed_first_idx", - "_faces_packed_to_edges_packed", - "_num_edges_per_mesh", - "_verts_padded_to_packed_idx", - "_laplacian_packed", - "valid", - "equisized", - ] - - def __init__( - self, - verts, - faces, - textures=None, - *, - verts_normals=None, - ) -> None: - """ - Args: - verts: - Can be either - - - List where each element is a tensor of shape (num_verts, 3) - containing the (x, y, z) coordinates of each vertex. - - Padded float tensor with shape (num_meshes, max_num_verts, 3). - Meshes should be padded with fill value of 0 so they all have - the same number of vertices. - faces: - Can be either - - - List where each element is a tensor of shape (num_faces, 3) - containing the indices of the 3 vertices in the corresponding - mesh in verts which form the triangular face. - - Padded long tensor of shape (num_meshes, max_num_faces, 3). - Meshes should be padded with fill value of -1 so they have - the same number of faces. - textures: Optional instance of the Textures class with mesh - texture properties. - verts_normals: - Optional. Can be either - - - List where each element is a tensor of shape (num_verts, 3) - containing the normals of each vertex. - - Padded float tensor with shape (num_meshes, max_num_verts, 3). - They should be padded with fill value of 0 so they all have - the same number of vertices. - Note that modifying the mesh later, e.g. with offset_verts_, - can cause these normals to be forgotten and normals to be recalculated - based on the new vertex positions. - - Refer to comments above for descriptions of List and Padded representations. - """ - self.device = torch.device("cpu") - if textures is not None and not hasattr(textures, "sample_textures"): - msg = "Expected textures to be an instance of type TexturesBase; got %r" - raise ValueError(msg % type(textures)) - - self.textures = textures - - # Indicates whether the meshes in the list/batch have the same number - # of faces and vertices. - self.equisized = False - - # Boolean indicator for each mesh in the batch - # True if mesh has non zero number of verts and face, False otherwise. - self.valid = None - - self._N = 0 # batch size (number of meshes) - self._V = 0 # (max) number of vertices per mesh - self._F = 0 # (max) number of faces per mesh - - # List of Tensors of verts and faces. - self._verts_list = None - self._faces_list = None - - # Packed representation for verts. - self._verts_packed = None # (sum(V_n), 3) - self._verts_packed_to_mesh_idx = None # sum(V_n) - - # Index to convert verts from flattened padded to packed - self._verts_padded_to_packed_idx = None # N * max_V - - # Index of each mesh's first vert in the packed verts. - # Assumes packing is sequential. - self._mesh_to_verts_packed_first_idx = None # N - - # Packed representation for faces. - self._faces_packed = None # (sum(F_n), 3) - self._faces_packed_to_mesh_idx = None # sum(F_n) - - # Index of each mesh's first face in packed faces. - # Assumes packing is sequential. - self._mesh_to_faces_packed_first_idx = None # N - - # Packed representation of edges sorted by index of the first vertex - # in the edge. Edges can be shared between faces in a mesh. - self._edges_packed = None # (sum(E_n), 2) - - # Map from packed edges to corresponding mesh index. - self._edges_packed_to_mesh_idx = None # sum(E_n) - self._num_edges_per_mesh = None # N - self._mesh_to_edges_packed_first_idx = None # N - - # Map from packed faces to packed edges. This represents the index of - # the edge opposite the vertex for each vertex in the face. E.g. - # - # v0 - # /\ - # / \ - # e1 / \ e2 - # / \ - # /________\ - # v2 e0 v1 - # - # Face (v0, v1, v2) => Edges (e0, e1, e2) - self._faces_packed_to_edges_packed = None # (sum(F_n), 3) - - # Padded representation of verts. - self._verts_padded = None # (N, max(V_n), 3) - self._num_verts_per_mesh = None # N - - # Padded representation of faces. - self._faces_padded = None # (N, max(F_n), 3) - self._num_faces_per_mesh = None # N - - # Face areas - self._faces_areas_packed = None - - # Normals - self._verts_normals_packed = None - self._faces_normals_packed = None - - # Packed representation of Laplacian Matrix - self._laplacian_packed = None - - # Identify type of verts and faces. - if isinstance(verts, list) and isinstance(faces, list): - self._verts_list = verts - self._faces_list = [ - f[f.gt(-1).all(1)].to(torch.int64) if len(f) > 0 else f for f in faces - ] - self._N = len(self._verts_list) - self.valid = torch.zeros((self._N,), dtype=torch.bool, device=self.device) - if self._N > 0: - self.device = self._verts_list[0].device - if not ( - all(v.device == self.device for v in verts) - and all(f.device == self.device for f in faces) - ): - raise ValueError( - "All Verts and Faces tensors should be on same device." - ) - self._num_verts_per_mesh = torch.tensor( - [len(v) for v in self._verts_list], device=self.device - ) - self._V = int(self._num_verts_per_mesh.max()) - self._num_faces_per_mesh = torch.tensor( - [len(f) for f in self._faces_list], device=self.device - ) - self._F = int(self._num_faces_per_mesh.max()) - self.valid = torch.tensor( - [ - len(v) > 0 and len(f) > 0 - for (v, f) in zip(self._verts_list, self._faces_list) - ], - dtype=torch.bool, - device=self.device, - ) - if (len(self._num_verts_per_mesh.unique()) == 1) and ( - len(self._num_faces_per_mesh.unique()) == 1 - ): - self.equisized = True - - elif torch.is_tensor(verts) and torch.is_tensor(faces): - if verts.size(2) != 3 or faces.size(2) != 3: - raise ValueError("Verts or Faces tensors have incorrect dimensions.") - self._verts_padded = verts - self._faces_padded = faces.to(torch.int64) - self._N = self._verts_padded.shape[0] - self._V = self._verts_padded.shape[1] - - if verts.device != faces.device: - msg = "Verts and Faces tensors should be on same device. \n Got {} and {}." - raise ValueError(msg.format(verts.device, faces.device)) - - self.device = self._verts_padded.device - self.valid = torch.zeros((self._N,), dtype=torch.bool, device=self.device) - if self._N > 0: - # Check that padded faces - which have value -1 - are at the - # end of the tensors - faces_not_padded = self._faces_padded.gt(-1).all(2) - self._num_faces_per_mesh = faces_not_padded.sum(1) - if (faces_not_padded[:, :-1] < faces_not_padded[:, 1:]).any(): - raise ValueError("Padding of faces must be at the end") - - # NOTE that we don't check for the ordering of padded verts - # as long as the faces index correspond to the right vertices. - - self.valid = self._num_faces_per_mesh > 0 - self._F = int(self._num_faces_per_mesh.max()) - if len(self._num_faces_per_mesh.unique()) == 1: - self.equisized = True - - self._num_verts_per_mesh = torch.full( - size=(self._N,), - fill_value=self._V, - dtype=torch.int64, - device=self.device, - ) - - else: - raise ValueError( - "Verts and Faces must be either a list or a tensor with \ - shape (batch_size, N, 3) where N is either the maximum \ - number of verts or faces respectively." - ) - - if self.isempty(): - self._num_verts_per_mesh = torch.zeros( - (0,), dtype=torch.int64, device=self.device - ) - self._num_faces_per_mesh = torch.zeros( - (0,), dtype=torch.int64, device=self.device - ) - - # Set the num verts/faces on the textures if present. - if textures is not None: - shape_ok = self.textures.check_shapes(self._N, self._V, self._F) - if not shape_ok: - msg = "Textures do not match the dimensions of Meshes." - raise ValueError(msg) - - self.textures._num_faces_per_mesh = self._num_faces_per_mesh.tolist() - self.textures._num_verts_per_mesh = self._num_verts_per_mesh.tolist() - self.textures.valid = self.valid - - if verts_normals is not None: - self._set_verts_normals(verts_normals) - - def _set_verts_normals(self, verts_normals) -> None: - if isinstance(verts_normals, list): - if len(verts_normals) != self._N: - raise ValueError("Invalid verts_normals input") - - for item, n_verts in zip(verts_normals, self._num_verts_per_mesh): - if ( - not isinstance(item, torch.Tensor) - or item.ndim != 2 - or item.shape[1] != 3 - or item.shape[0] != n_verts - ): - raise ValueError("Invalid verts_normals input") - self._verts_normals_packed = torch.cat(verts_normals, 0) - elif torch.is_tensor(verts_normals): - if ( - verts_normals.ndim != 3 - or verts_normals.size(2) != 3 - or verts_normals.size(0) != self._N - ): - raise ValueError("Vertex normals tensor has incorrect dimensions.") - self._verts_normals_packed = struct_utils.padded_to_packed( - verts_normals, split_size=self._num_verts_per_mesh.tolist() - ) - else: - raise ValueError("verts_normals must be a list or tensor") - - def __len__(self) -> int: - return self._N - - def __getitem__( - self, index: Union[int, List[int], slice, torch.BoolTensor, torch.LongTensor] - ) -> "Meshes": - """ - Args: - index: Specifying the index of the mesh to retrieve. - Can be an int, slice, list of ints or a boolean tensor. - - Returns: - Meshes object with selected meshes. The mesh tensors are not cloned. - """ - if isinstance(index, (int, slice)): - verts = self.verts_list()[index] - faces = self.faces_list()[index] - elif isinstance(index, list): - verts = [self.verts_list()[i] for i in index] - faces = [self.faces_list()[i] for i in index] - elif isinstance(index, torch.Tensor): - if index.dim() != 1 or index.dtype.is_floating_point: - raise IndexError(index) - # NOTE consider converting index to cpu for efficiency - if index.dtype == torch.bool: - # advanced indexing on a single dimension - index = index.nonzero() - index = index.squeeze(1) if index.numel() > 0 else index - index = index.tolist() - verts = [self.verts_list()[i] for i in index] - faces = [self.faces_list()[i] for i in index] - else: - raise IndexError(index) - - textures = None if self.textures is None else self.textures[index] - - if torch.is_tensor(verts) and torch.is_tensor(faces): - return self.__class__(verts=[verts], faces=[faces], textures=textures) - elif isinstance(verts, list) and isinstance(faces, list): - return self.__class__(verts=verts, faces=faces, textures=textures) - else: - raise ValueError("(verts, faces) not defined correctly") - - def isempty(self) -> bool: - """ - Checks whether any mesh is valid. - - Returns: - bool indicating whether there is any data. - """ - return self._N == 0 or self.valid.eq(False).all() - - def verts_list(self): - """ - Get the list representation of the vertices. - - Returns: - list of tensors of vertices of shape (V_n, 3). - """ - if self._verts_list is None: - assert ( - self._verts_padded is not None - ), "verts_padded is required to compute verts_list." - self._verts_list = struct_utils.padded_to_list( - self._verts_padded, self.num_verts_per_mesh().tolist() - ) - return self._verts_list - - def faces_list(self): - """ - Get the list representation of the faces. - - Returns: - list of tensors of faces of shape (F_n, 3). - """ - if self._faces_list is None: - assert ( - self._faces_padded is not None - ), "faces_padded is required to compute faces_list." - self._faces_list = struct_utils.padded_to_list( - self._faces_padded, self.num_faces_per_mesh().tolist() - ) - return self._faces_list - - def verts_packed(self): - """ - Get the packed representation of the vertices. - - Returns: - tensor of vertices of shape (sum(V_n), 3). - """ - self._compute_packed() - return self._verts_packed - - def verts_packed_to_mesh_idx(self): - """ - Return a 1D tensor with the same first dimension as verts_packed. - verts_packed_to_mesh_idx[i] gives the index of the mesh which contains - verts_packed[i]. - - Returns: - 1D tensor of indices. - """ - self._compute_packed() - return self._verts_packed_to_mesh_idx - - def mesh_to_verts_packed_first_idx(self): - """ - Return a 1D tensor x with length equal to the number of meshes such that - the first vertex of the ith mesh is verts_packed[x[i]]. - - Returns: - 1D tensor of indices of first items. - """ - self._compute_packed() - return self._mesh_to_verts_packed_first_idx - - def num_verts_per_mesh(self): - """ - Return a 1D tensor x with length equal to the number of meshes giving - the number of vertices in each mesh. - - Returns: - 1D tensor of sizes. - """ - return self._num_verts_per_mesh - - def faces_packed(self): - """ - Get the packed representation of the faces. - Faces are given by the indices of the three vertices in verts_packed. - - Returns: - tensor of faces of shape (sum(F_n), 3). - """ - self._compute_packed() - return self._faces_packed - - def faces_packed_to_mesh_idx(self): - """ - Return a 1D tensor with the same first dimension as faces_packed. - faces_packed_to_mesh_idx[i] gives the index of the mesh which contains - faces_packed[i]. - - Returns: - 1D tensor of indices. - """ - self._compute_packed() - return self._faces_packed_to_mesh_idx - - def mesh_to_faces_packed_first_idx(self): - """ - Return a 1D tensor x with length equal to the number of meshes such that - the first face of the ith mesh is faces_packed[x[i]]. - - Returns: - 1D tensor of indices of first items. - """ - self._compute_packed() - return self._mesh_to_faces_packed_first_idx - - def verts_padded(self): - """ - Get the padded representation of the vertices. - - Returns: - tensor of vertices of shape (N, max(V_n), 3). - """ - self._compute_padded() - return self._verts_padded - - def faces_padded(self): - """ - Get the padded representation of the faces. - - Returns: - tensor of faces of shape (N, max(F_n), 3). - """ - self._compute_padded() - return self._faces_padded - - def num_faces_per_mesh(self): - """ - Return a 1D tensor x with length equal to the number of meshes giving - the number of faces in each mesh. - - Returns: - 1D tensor of sizes. - """ - return self._num_faces_per_mesh - - def edges_packed(self): - """ - Get the packed representation of the edges. - - Returns: - tensor of edges of shape (sum(E_n), 2). - """ - self._compute_edges_packed() - return self._edges_packed - - def edges_packed_to_mesh_idx(self): - """ - Return a 1D tensor with the same first dimension as edges_packed. - edges_packed_to_mesh_idx[i] gives the index of the mesh which contains - edges_packed[i]. - - Returns: - 1D tensor of indices. - """ - self._compute_edges_packed() - return self._edges_packed_to_mesh_idx - - def mesh_to_edges_packed_first_idx(self): - """ - Return a 1D tensor x with length equal to the number of meshes such that - the first edge of the ith mesh is edges_packed[x[i]]. - - Returns: - 1D tensor of indices of first items. - """ - self._compute_edges_packed() - return self._mesh_to_edges_packed_first_idx - - def faces_packed_to_edges_packed(self): - """ - Get the packed representation of the faces in terms of edges. - Faces are given by the indices of the three edges in - the packed representation of the edges. - - Returns: - tensor of faces of shape (sum(F_n), 3). - """ - self._compute_edges_packed() - return self._faces_packed_to_edges_packed - - def num_edges_per_mesh(self): - """ - Return a 1D tensor x with length equal to the number of meshes giving - the number of edges in each mesh. - - Returns: - 1D tensor of sizes. - """ - self._compute_edges_packed() - return self._num_edges_per_mesh - - def verts_padded_to_packed_idx(self): - """ - Return a 1D tensor x with length equal to the total number of vertices - such that verts_packed()[i] is element x[i] of the flattened padded - representation. - The packed representation can be calculated as follows. - - .. code-block:: python - - p = verts_padded().reshape(-1, 3) - verts_packed = p[x] - - Returns: - 1D tensor of indices. - """ - if self._verts_padded_to_packed_idx is not None: - return self._verts_padded_to_packed_idx - - self._verts_padded_to_packed_idx = torch.cat( - [ - torch.arange(v, dtype=torch.int64, device=self.device) + i * self._V - for (i, v) in enumerate(self.num_verts_per_mesh()) - ], - dim=0, - ) - return self._verts_padded_to_packed_idx - - def has_verts_normals(self) -> bool: - """ - Check whether vertex normals are already present. - """ - return self._verts_normals_packed is not None - - def verts_normals_packed(self): - """ - Get the packed representation of the vertex normals. - - Returns: - tensor of normals of shape (sum(V_n), 3). - """ - self._compute_vertex_normals() - return self._verts_normals_packed - - def verts_normals_list(self): - """ - Get the list representation of the vertex normals. - - Returns: - list of tensors of normals of shape (V_n, 3). - """ - if self.isempty(): - return [ - torch.empty((0, 3), dtype=torch.float32, device=self.device) - ] * self._N - verts_normals_packed = self.verts_normals_packed() - split_size = self.num_verts_per_mesh().tolist() - return struct_utils.packed_to_list(verts_normals_packed, split_size) - - def verts_normals_padded(self): - """ - Get the padded representation of the vertex normals. - - Returns: - tensor of normals of shape (N, max(V_n), 3). - """ - if self.isempty(): - return torch.zeros((self._N, 0, 3), dtype=torch.float32, device=self.device) - verts_normals_list = self.verts_normals_list() - return struct_utils.list_to_padded( - verts_normals_list, (self._V, 3), pad_value=0.0, equisized=self.equisized - ) - - def faces_normals_packed(self): - """ - Get the packed representation of the face normals. - - Returns: - tensor of normals of shape (sum(F_n), 3). - """ - self._compute_face_areas_normals() - return self._faces_normals_packed - - def faces_normals_list(self): - """ - Get the list representation of the face normals. - - Returns: - list of tensors of normals of shape (F_n, 3). - """ - if self.isempty(): - return [ - torch.empty((0, 3), dtype=torch.float32, device=self.device) - ] * self._N - faces_normals_packed = self.faces_normals_packed() - split_size = self.num_faces_per_mesh().tolist() - return struct_utils.packed_to_list(faces_normals_packed, split_size) - - def faces_normals_padded(self): - """ - Get the padded representation of the face normals. - - Returns: - tensor of normals of shape (N, max(F_n), 3). - """ - if self.isempty(): - return torch.zeros((self._N, 0, 3), dtype=torch.float32, device=self.device) - faces_normals_list = self.faces_normals_list() - return struct_utils.list_to_padded( - faces_normals_list, (self._F, 3), pad_value=0.0, equisized=self.equisized - ) - - def faces_areas_packed(self): - """ - Get the packed representation of the face areas. - - Returns: - tensor of areas of shape (sum(F_n),). - """ - self._compute_face_areas_normals() - return self._faces_areas_packed - - def laplacian_packed(self): - self._compute_laplacian_packed() - return self._laplacian_packed - - def _compute_face_areas_normals(self, refresh: bool = False): - """ - Compute the area and normal of each face in faces_packed. - The convention of a normal for a face consisting of verts [v0, v1, v2] - is normal = (v1 - v0) x (v2 - v0) - - Args: - refresh: Set to True to force recomputation of face areas. - Default: False. - """ - from ..ops.mesh_face_areas_normals import mesh_face_areas_normals - - if not ( - refresh - or any( - v is None - for v in [self._faces_areas_packed, self._faces_normals_packed] - ) - ): - return - faces_packed = self.faces_packed() - verts_packed = self.verts_packed() - face_areas, face_normals = mesh_face_areas_normals(verts_packed, faces_packed) - self._faces_areas_packed = face_areas - self._faces_normals_packed = face_normals - - def _compute_vertex_normals(self, refresh: bool = False): - """Computes the packed version of vertex normals from the packed verts - and faces. This assumes verts are shared between faces. The normal for - a vertex is computed as the sum of the normals of all the faces it is - part of weighed by the face areas. - - Args: - refresh: Set to True to force recomputation of vertex normals. - Default: False. - """ - if not (refresh or any(v is None for v in [self._verts_normals_packed])): - return - - if self.isempty(): - self._verts_normals_packed = torch.zeros( - (self._N, 3), dtype=torch.int64, device=self.device - ) - else: - faces_packed = self.faces_packed() - verts_packed = self.verts_packed() - verts_normals = torch.zeros_like(verts_packed) - vertices_faces = verts_packed[faces_packed] - - faces_normals = torch.cross( - vertices_faces[:, 2] - vertices_faces[:, 1], - vertices_faces[:, 0] - vertices_faces[:, 1], - dim=1, - ) - - # NOTE: this is already applying the area weighting as the magnitude - # of the cross product is 2 x area of the triangle. - verts_normals = verts_normals.index_add( - 0, faces_packed[:, 0], faces_normals - ) - verts_normals = verts_normals.index_add( - 0, faces_packed[:, 1], faces_normals - ) - verts_normals = verts_normals.index_add( - 0, faces_packed[:, 2], faces_normals - ) - - self._verts_normals_packed = torch.nn.functional.normalize( - verts_normals, eps=1e-6, dim=1 - ) - - def _compute_padded(self, refresh: bool = False): - """ - Computes the padded version of meshes from verts_list and faces_list. - """ - if not ( - refresh or any(v is None for v in [self._verts_padded, self._faces_padded]) - ): - return - - verts_list = self.verts_list() - faces_list = self.faces_list() - assert ( - faces_list is not None and verts_list is not None - ), "faces_list and verts_list arguments are required" - - if self.isempty(): - self._faces_padded = torch.zeros( - (self._N, 0, 3), dtype=torch.int64, device=self.device - ) - self._verts_padded = torch.zeros( - (self._N, 0, 3), dtype=torch.float32, device=self.device - ) - else: - self._faces_padded = struct_utils.list_to_padded( - faces_list, (self._F, 3), pad_value=-1.0, equisized=self.equisized - ) - self._verts_padded = struct_utils.list_to_padded( - verts_list, (self._V, 3), pad_value=0.0, equisized=self.equisized - ) - - # TODO(nikhilar) Improve performance of _compute_packed. - def _compute_packed(self, refresh: bool = False): - """ - Computes the packed version of the meshes from verts_list and faces_list - and sets the values of auxiliary tensors. - - Args: - refresh: Set to True to force recomputation of packed representations. - Default: False. - """ - - if not ( - refresh - or any( - v is None - for v in [ - self._verts_packed, - self._verts_packed_to_mesh_idx, - self._mesh_to_verts_packed_first_idx, - self._faces_packed, - self._faces_packed_to_mesh_idx, - self._mesh_to_faces_packed_first_idx, - ] - ) - ): - return - - # Packed can be calculated from padded or list, so can call the - # accessor function for verts_list and faces_list. - verts_list = self.verts_list() - faces_list = self.faces_list() - if self.isempty(): - self._verts_packed = torch.zeros( - (0, 3), dtype=torch.float32, device=self.device - ) - self._verts_packed_to_mesh_idx = torch.zeros( - (0,), dtype=torch.int64, device=self.device - ) - self._mesh_to_verts_packed_first_idx = torch.zeros( - (0,), dtype=torch.int64, device=self.device - ) - self._num_verts_per_mesh = torch.zeros( - (0,), dtype=torch.int64, device=self.device - ) - self._faces_packed = -( - torch.ones((0, 3), dtype=torch.int64, device=self.device) - ) - self._faces_packed_to_mesh_idx = torch.zeros( - (0,), dtype=torch.int64, device=self.device - ) - self._mesh_to_faces_packed_first_idx = torch.zeros( - (0,), dtype=torch.int64, device=self.device - ) - self._num_faces_per_mesh = torch.zeros( - (0,), dtype=torch.int64, device=self.device - ) - return - - verts_list_to_packed = struct_utils.list_to_packed(verts_list) - self._verts_packed = verts_list_to_packed[0] - if not torch.allclose(self.num_verts_per_mesh(), verts_list_to_packed[1]): - raise ValueError("The number of verts per mesh should be consistent.") - self._mesh_to_verts_packed_first_idx = verts_list_to_packed[2] - self._verts_packed_to_mesh_idx = verts_list_to_packed[3] - - faces_list_to_packed = struct_utils.list_to_packed(faces_list) - faces_packed = faces_list_to_packed[0] - if not torch.allclose(self.num_faces_per_mesh(), faces_list_to_packed[1]): - raise ValueError("The number of faces per mesh should be consistent.") - self._mesh_to_faces_packed_first_idx = faces_list_to_packed[2] - self._faces_packed_to_mesh_idx = faces_list_to_packed[3] - - faces_packed_offset = self._mesh_to_verts_packed_first_idx[ - self._faces_packed_to_mesh_idx - ] - self._faces_packed = faces_packed + faces_packed_offset.view(-1, 1) - - def _compute_edges_packed(self, refresh: bool = False): - """ - Computes edges in packed form from the packed version of faces and verts. - """ - if not ( - refresh - or any( - v is None - for v in [ - self._edges_packed, - self._faces_packed_to_mesh_idx, - self._edges_packed_to_mesh_idx, - self._num_edges_per_mesh, - self._mesh_to_edges_packed_first_idx, - ] - ) - ): - return - - if self.isempty(): - self._edges_packed = torch.full( - (0, 2), fill_value=-1, dtype=torch.int64, device=self.device - ) - self._edges_packed_to_mesh_idx = torch.zeros( - (0,), dtype=torch.int64, device=self.device - ) - return - - faces = self.faces_packed() - F = faces.shape[0] - v0, v1, v2 = faces.chunk(3, dim=1) - e01 = torch.cat([v0, v1], dim=1) # (sum(F_n), 2) - e12 = torch.cat([v1, v2], dim=1) # (sum(F_n), 2) - e20 = torch.cat([v2, v0], dim=1) # (sum(F_n), 2) - - # All edges including duplicates. - edges = torch.cat([e12, e20, e01], dim=0) # (sum(F_n)*3, 2) - edge_to_mesh = torch.cat( - [ - self._faces_packed_to_mesh_idx, - self._faces_packed_to_mesh_idx, - self._faces_packed_to_mesh_idx, - ], - dim=0, - ) # sum(F_n)*3 - - # Sort the edges in increasing vertex order to remove duplicates as - # the same edge may appear in different orientations in different faces. - # i.e. rows in edges after sorting will be of the form (v0, v1) where v1 > v0. - # This sorting does not change the order in dim=0. - edges, _ = edges.sort(dim=1) - - # Remove duplicate edges: convert each edge (v0, v1) into an - # integer hash = V * v0 + v1; this allows us to use the scalar version of - # unique which is much faster than edges.unique(dim=1) which is very slow. - # After finding the unique elements reconstruct the vertex indices as: - # (v0, v1) = (hash / V, hash % V) - # The inverse maps from unique_edges back to edges: - # unique_edges[inverse_idxs] == edges - # i.e. inverse_idxs[i] == j means that edges[i] == unique_edges[j] - - V = self._verts_packed.shape[0] - edges_hash = V * edges[:, 0] + edges[:, 1] - u, inverse_idxs = torch.unique(edges_hash, return_inverse=True) - - # Find indices of unique elements. - # TODO (nikhilar) remove following 4 lines when torch.unique has support - # for returning unique indices - sorted_hash, sort_idx = torch.sort(edges_hash, dim=0) - unique_mask = torch.ones( - edges_hash.shape[0], dtype=torch.bool, device=self.device - ) - unique_mask[1:] = sorted_hash[1:] != sorted_hash[:-1] - unique_idx = sort_idx[unique_mask] - - self._edges_packed = torch.stack([u // V, u % V], dim=1) - self._edges_packed_to_mesh_idx = edge_to_mesh[unique_idx] - - self._faces_packed_to_edges_packed = inverse_idxs.reshape(3, F).t() - - # Compute number of edges per mesh - num_edges_per_mesh = torch.zeros(self._N, dtype=torch.int32, device=self.device) - ones = torch.ones(1, dtype=torch.int32, device=self.device).expand( - self._edges_packed_to_mesh_idx.shape - ) - num_edges_per_mesh = num_edges_per_mesh.scatter_add_( - 0, self._edges_packed_to_mesh_idx, ones - ) - self._num_edges_per_mesh = num_edges_per_mesh - - # Compute first idx for each mesh in edges_packed - mesh_to_edges_packed_first_idx = torch.zeros( - self._N, dtype=torch.int64, device=self.device - ) - num_edges_cumsum = num_edges_per_mesh.cumsum(dim=0) - mesh_to_edges_packed_first_idx[1:] = num_edges_cumsum[:-1].clone() - - self._mesh_to_edges_packed_first_idx = mesh_to_edges_packed_first_idx - - def _compute_laplacian_packed(self, refresh: bool = False): - """ - Computes the laplacian in packed form. - The definition of the laplacian is - L[i, j] = -1 , if i == j - L[i, j] = 1 / deg(i) , if (i, j) is an edge - L[i, j] = 0 , otherwise - where deg(i) is the degree of the i-th vertex in the graph - - Returns: - Sparse FloatTensor of shape (V, V) where V = sum(V_n) - - """ - from ..ops import laplacian - - if not (refresh or self._laplacian_packed is None): - return - - if self.isempty(): - self._laplacian_packed = torch.zeros( - (0, 0), dtype=torch.float32, device=self.device - ).to_sparse() - return - - verts_packed = self.verts_packed() # (sum(V_n), 3) - edges_packed = self.edges_packed() # (sum(E_n), 3) - - self._laplacian_packed = laplacian(verts_packed, edges_packed) - - def clone(self): - """ - Deep copy of Meshes object. All internal tensors are cloned individually. - - Returns: - new Meshes object. - """ - verts_list = self.verts_list() - faces_list = self.faces_list() - new_verts_list = [v.clone() for v in verts_list] - new_faces_list = [f.clone() for f in faces_list] - other = self.__class__(verts=new_verts_list, faces=new_faces_list) - for k in self._INTERNAL_TENSORS: - v = getattr(self, k) - if torch.is_tensor(v): - setattr(other, k, v.clone()) - - # Textures is not a tensor but has a clone method - if self.textures is not None: - other.textures = self.textures.clone() - return other - - def detach(self): - """ - Detach Meshes object. All internal tensors are detached individually. - - Returns: - new Meshes object. - """ - verts_list = self.verts_list() - faces_list = self.faces_list() - new_verts_list = [v.detach() for v in verts_list] - new_faces_list = [f.detach() for f in faces_list] - other = self.__class__(verts=new_verts_list, faces=new_faces_list) - for k in self._INTERNAL_TENSORS: - v = getattr(self, k) - if torch.is_tensor(v): - setattr(other, k, v.detach()) - - # Textures is not a tensor but has a detach method - if self.textures is not None: - other.textures = self.textures.detach() - return other - - def to(self, device: Device, copy: bool = False): - """ - Match functionality of torch.Tensor.to() - If copy = True or the self Tensor is on a different device, the - returned tensor is a copy of self with the desired torch.device. - If copy = False and the self Tensor already has the correct torch.device, - then self is returned. - - Args: - device: Device (as str or torch.device) for the new tensor. - copy: Boolean indicator whether or not to clone self. Default False. - - Returns: - Meshes object. - """ - device_ = make_device(device) - if not copy and self.device == device_: - return self - - other = self.clone() - if self.device == device_: - return other - - other.device = device_ - if other._N > 0: - other._verts_list = [v.to(device_) for v in other._verts_list] - other._faces_list = [f.to(device_) for f in other._faces_list] - for k in self._INTERNAL_TENSORS: - v = getattr(self, k) - if torch.is_tensor(v): - setattr(other, k, v.to(device_)) - if self.textures is not None: - other.textures = other.textures.to(device_) - return other - - def cpu(self): - return self.to("cpu") - - def cuda(self): - return self.to("cuda") - - def get_mesh_verts_faces(self, index: int): - """ - Get tensors for a single mesh from the list representation. - - Args: - index: Integer in the range [0, N). - - Returns: - verts: Tensor of shape (V, 3). - faces: LongTensor of shape (F, 3). - """ - if not isinstance(index, int): - raise ValueError("Mesh index must be an integer.") - if index < 0 or index > self._N: - raise ValueError( - "Mesh index must be in the range [0, N) where \ - N is the number of meshes in the batch." - ) - verts = self.verts_list() - faces = self.faces_list() - return verts[index], faces[index] - - # TODO(nikhilar) Move function to a utils file. - def split(self, split_sizes: list): - """ - Splits Meshes object of size N into a list of Meshes objects of - size len(split_sizes), where the i-th Meshes object is of size split_sizes[i]. - Similar to torch.split(). - - Args: - split_sizes: List of integer sizes of Meshes objects to be returned. - - Returns: - list[Meshes]. - """ - if not all(isinstance(x, int) for x in split_sizes): - raise ValueError("Value of split_sizes must be a list of integers.") - meshlist = [] - curi = 0 - for i in split_sizes: - meshlist.append(self[curi : curi + i]) - curi += i - return meshlist - - def offset_verts_(self, vert_offsets_packed): - """ - Add an offset to the vertices of this Meshes. In place operation. - If normals are present they may be recalculated. - - Args: - vert_offsets_packed: A Tensor of shape (3,) or the same shape as - self.verts_packed, giving offsets to be added - to all vertices. - Returns: - self. - """ - verts_packed = self.verts_packed() - if vert_offsets_packed.shape == (3,): - update_normals = False - vert_offsets_packed = vert_offsets_packed.expand_as(verts_packed) - else: - update_normals = True - if vert_offsets_packed.shape != verts_packed.shape: - raise ValueError("Verts offsets must have dimension (all_v, 3).") - # update verts packed - self._verts_packed = verts_packed + vert_offsets_packed - new_verts_list = list( - self._verts_packed.split(self.num_verts_per_mesh().tolist(), 0) - ) - # update verts list - # Note that since _compute_packed() has been executed, verts_list - # cannot be None even if not provided during construction. - self._verts_list = new_verts_list - - # update verts padded - if self._verts_padded is not None: - for i, verts in enumerate(new_verts_list): - if len(verts) > 0: - self._verts_padded[i, : verts.shape[0], :] = verts - - # update face areas and normals and vertex normals - # only if the original attributes are present - if update_normals and any( - v is not None - for v in [self._faces_areas_packed, self._faces_normals_packed] - ): - self._compute_face_areas_normals(refresh=True) - if update_normals and self._verts_normals_packed is not None: - self._compute_vertex_normals(refresh=True) - - return self - - # TODO(nikhilar) Move out of place operator to a utils file. - def offset_verts(self, vert_offsets_packed): - """ - Out of place offset_verts. - - Args: - vert_offsets_packed: A Tensor of the same shape as self.verts_packed - giving offsets to be added to all vertices. - Returns: - new Meshes object. - """ - new_mesh = self.clone() - return new_mesh.offset_verts_(vert_offsets_packed) - - def scale_verts_(self, scale): - """ - Multiply the vertices of this Meshes object by a scalar value. - In place operation. - - Args: - scale: A scalar, or a Tensor of shape (N,). - - Returns: - self. - """ - if not torch.is_tensor(scale): - scale = torch.full((len(self),), scale, device=self.device) - new_verts_list = [] - verts_list = self.verts_list() - for i, old_verts in enumerate(verts_list): - new_verts_list.append(scale[i] * old_verts) - # update list - self._verts_list = new_verts_list - # update packed - if self._verts_packed is not None: - self._verts_packed = torch.cat(new_verts_list, dim=0) - # update padded - if self._verts_padded is not None: - for i, verts in enumerate(self._verts_list): - if len(verts) > 0: - self._verts_padded[i, : verts.shape[0], :] = verts - - # update face areas and normals - # only if the original attributes are computed - if any( - v is not None - for v in [self._faces_areas_packed, self._faces_normals_packed] - ): - self._compute_face_areas_normals(refresh=True) - return self - - def scale_verts(self, scale): - """ - Out of place scale_verts. - - Args: - scale: A scalar, or a Tensor of shape (N,). - - Returns: - new Meshes object. - """ - new_mesh = self.clone() - return new_mesh.scale_verts_(scale) - - def update_padded(self, new_verts_padded): - """ - This function allows for an update of verts_padded without having to - explicitly convert it to the list representation for heterogeneous batches. - Returns a Meshes structure with updated padded tensors and copies of the - auxiliary tensors at construction time. - It updates self._verts_padded with new_verts_padded, and does a - shallow copy of (faces_padded, faces_list, num_verts_per_mesh, num_faces_per_mesh). - If packed representations are computed in self, they are updated as well. - - Args: - new_points_padded: FloatTensor of shape (N, V, 3) - - Returns: - Meshes with updated padded representations - """ - - def check_shapes(x, size): - if x.shape[0] != size[0]: - raise ValueError("new values must have the same batch dimension.") - if x.shape[1] != size[1]: - raise ValueError("new values must have the same number of points.") - if x.shape[2] != size[2]: - raise ValueError("new values must have the same dimension.") - - check_shapes(new_verts_padded, [self._N, self._V, 3]) - - new = self.__class__(verts=new_verts_padded, faces=self.faces_padded()) - - if new._N != self._N or new._V != self._V or new._F != self._F: - raise ValueError("Inconsistent sizes after construction.") - - # overwrite the equisized flag - new.equisized = self.equisized - - # overwrite textures if any - new.textures = self.textures - - # copy auxiliary tensors - copy_tensors = ["_num_verts_per_mesh", "_num_faces_per_mesh", "valid"] - - for k in copy_tensors: - v = getattr(self, k) - if torch.is_tensor(v): - setattr(new, k, v) # shallow copy - - # shallow copy of faces_list if any, st new.faces_list() - # does not re-compute from _faces_padded - new._faces_list = self._faces_list - - # update verts/faces packed if they are computed in self - if self._verts_packed is not None: - copy_tensors = [ - "_faces_packed", - "_verts_packed_to_mesh_idx", - "_faces_packed_to_mesh_idx", - "_mesh_to_verts_packed_first_idx", - "_mesh_to_faces_packed_first_idx", - ] - for k in copy_tensors: - v = getattr(self, k) - assert torch.is_tensor(v) - setattr(new, k, v) # shallow copy - # update verts_packed - pad_to_packed = self.verts_padded_to_packed_idx() - new_verts_packed = new_verts_padded.reshape(-1, 3)[pad_to_packed, :] - new._verts_packed = new_verts_packed - new._verts_padded_to_packed_idx = pad_to_packed - - # update edges packed if they are computed in self - if self._edges_packed is not None: - copy_tensors = [ - "_edges_packed", - "_edges_packed_to_mesh_idx", - "_mesh_to_edges_packed_first_idx", - "_faces_packed_to_edges_packed", - "_num_edges_per_mesh", - ] - for k in copy_tensors: - v = getattr(self, k) - assert torch.is_tensor(v) - setattr(new, k, v) # shallow copy - - # update laplacian if it is compute in self - if self._laplacian_packed is not None: - new._laplacian_packed = self._laplacian_packed - - assert new._verts_list is None - assert new._verts_normals_packed is None - assert new._faces_normals_packed is None - assert new._faces_areas_packed is None - - return new - - # TODO(nikhilar) Move function to utils file. - def get_bounding_boxes(self): - """ - Compute an axis-aligned bounding box for each mesh in this Meshes object. - - Returns: - bboxes: Tensor of shape (N, 3, 2) where bbox[i, j] gives the - min and max values of mesh i along the jth coordinate axis. - """ - all_mins, all_maxes = [], [] - for verts in self.verts_list(): - cur_mins = verts.min(dim=0)[0] # (3,) - cur_maxes = verts.max(dim=0)[0] # (3,) - all_mins.append(cur_mins) - all_maxes.append(cur_maxes) - all_mins = torch.stack(all_mins, dim=0) # (N, 3) - all_maxes = torch.stack(all_maxes, dim=0) # (N, 3) - bboxes = torch.stack([all_mins, all_maxes], dim=2) - return bboxes - - def extend(self, N: int): - """ - Create new Meshes class which contains each input mesh N times - - Args: - N: number of new copies of each mesh. - - Returns: - new Meshes object. - """ - if not isinstance(N, int): - raise ValueError("N must be an integer.") - if N <= 0: - raise ValueError("N must be > 0.") - new_verts_list, new_faces_list = [], [] - for verts, faces in zip(self.verts_list(), self.faces_list()): - new_verts_list.extend(verts.clone() for _ in range(N)) - new_faces_list.extend(faces.clone() for _ in range(N)) - - tex = None - if self.textures is not None: - tex = self.textures.extend(N) - - return self.__class__(verts=new_verts_list, faces=new_faces_list, textures=tex) - - def sample_textures(self, fragments): - if self.textures is not None: - - # Check dimensions of textures match that of meshes - shape_ok = self.textures.check_shapes(self._N, self._V, self._F) - if not shape_ok: - msg = "Textures do not match the dimensions of Meshes." - raise ValueError(msg) - - # Pass in faces packed. If the textures are defined per - # vertex, the face indices are needed in order to interpolate - # the vertex attributes across the face. - return self.textures.sample_textures( - fragments, faces_packed=self.faces_packed() - ) - else: - raise ValueError("Meshes does not have textures") - - def submeshes( - self, - face_indices: Union[ - List[List[torch.LongTensor]], List[torch.LongTensor], torch.LongTensor - ], - ) -> "Meshes": - """ - Split a batch of meshes into a batch of submeshes. - - The return value is a Meshes object representing - [mesh restricted to only faces indexed by selected_faces - for mesh, selected_faces_list in zip(self, face_indices) - for faces in selected_faces_list] - - Args: - face_indices: - Let the original mesh have verts_list() of length N. - Can be either - - List of lists of LongTensors. The n-th element is a list of length - num_submeshes_n (empty lists are allowed). The k-th element of the n-th - sublist is a LongTensor of length num_faces_submesh_n_k. - - List of LongTensors. The n-th element is a (possibly empty) LongTensor - of shape (num_submeshes_n, num_faces_n). - - A LongTensor of shape (N, num_submeshes_per_mesh, num_faces_per_submesh) - where all meshes in the batch will have the same number of submeshes. - This will result in an output Meshes object with batch size equal to - N * num_submeshes_per_mesh. - - Returns: - Meshes object of length `sum(len(ids) for ids in face_indices)`. - - Example 1: - - If `meshes` has batch size 1, and `face_indices` is a 1D LongTensor, - then `meshes.submeshes([[face_indices]]) and - `meshes.submeshes(face_indices[None, None])` both produce a Meshes of length 1, - containing a single submesh with a subset of `meshes`' faces, whose indices are - specified by `face_indices`. - - Example 2: - - Take a Meshes object `cubes` with 4 meshes, each a translated cube. Then: - * len(cubes) is 4, len(cubes.verts_list()) is 4, len(cubes.faces_list()) 4, - * [cube_verts.size for cube_verts in cubes.verts_list()] is [8, 8, 8, 8], - * [cube_faces.size for cube_faces in cubes.faces_list()] if [6, 6, 6, 6], - - Now let front_facet, top_and_bottom, all_facets be LongTensors of - sizes (2), (4), and (12), each picking up a number of facets of a cube by - specifying the appropriate triangular faces. - - Then let `subcubes = cubes.submeshes([[front_facet, top_and_bottom], [], - [all_facets], []])`. - * len(subcubes) is 3. - * subcubes[0] is the front facet of the cube contained in cubes[0]. - * subcubes[1] is a mesh containing the (disconnected) top and bottom facets - of cubes[0]. - * subcubes[2] is cubes[2]. - * There are no submeshes of cubes[1] and cubes[3] in subcubes. - * subcubes[0] and subcubes[1] are not watertight. subcubes[2] is. - """ - if len(face_indices) != len(self): - raise ValueError( - "You must specify exactly one set of submeshes" - " for each mesh in this Meshes object." - ) - - sub_verts = [] - sub_verts_ids = [] - sub_faces = [] - - for face_ids_per_mesh, faces, verts in zip( - face_indices, self.faces_list(), self.verts_list() - ): - sub_verts_ids.append([]) - for submesh_face_ids in face_ids_per_mesh: - faces_to_keep = faces[submesh_face_ids] - - # Say we are keeping two faces from a mesh with six vertices: - # faces_to_keep = [[0, 6, 4], - # [0, 2, 6]] - # Then we want verts_to_keep to contain only vertices [0, 2, 4, 6]: - vertex_ids_to_keep = torch.unique(faces_to_keep, sorted=True) - sub_verts.append(verts[vertex_ids_to_keep]) - sub_verts_ids[-1].append(vertex_ids_to_keep) - - # Now, convert faces_to_keep to use the new vertex ids. - # In our example, instead of - # [[0, 6, 4], - # [0, 2, 6]] - # we want faces_to_keep to be - # [[0, 3, 2], - # [0, 1, 3]], - # as each point id got reduced to its sort rank. - _, ids_of_unique_ids_in_sorted = torch.unique( - faces_to_keep, return_inverse=True - ) - sub_faces.append(ids_of_unique_ids_in_sorted) - - return self.__class__( - verts=sub_verts, - faces=sub_faces, - textures=( - self.textures.submeshes(sub_verts_ids, face_indices) - if self.textures - else None - ), - ) - - -def join_meshes_as_batch(meshes: List[Meshes], include_textures: bool = True) -> Meshes: - """ - Merge multiple Meshes objects, i.e. concatenate the meshes objects. They - must all be on the same device. If include_textures is true, they must all - be compatible, either all or none having textures, and all the Textures - objects being the same type. If include_textures is False, textures are - ignored. - - If the textures are TexturesAtlas then being the same type includes having - the same resolution. If they are TexturesUV then it includes having the same - align_corners and padding_mode. - - Args: - meshes: list of meshes. - include_textures: (bool) whether to try to join the textures. - - Returns: - new Meshes object containing all the meshes from all the inputs. - """ - if isinstance(meshes, Meshes): - # Meshes objects can be iterated and produce single Meshes. We avoid - # letting join_meshes_as_batch(mesh1, mesh2) silently do the wrong thing. - raise ValueError("Wrong first argument to join_meshes_as_batch.") - verts = [v for mesh in meshes for v in mesh.verts_list()] - faces = [f for mesh in meshes for f in mesh.faces_list()] - if len(meshes) == 0 or not include_textures: - return Meshes(verts=verts, faces=faces) - - if meshes[0].textures is None: - if any(mesh.textures is not None for mesh in meshes): - raise ValueError("Inconsistent textures in join_meshes_as_batch.") - return Meshes(verts=verts, faces=faces) - - if any(mesh.textures is None for mesh in meshes): - raise ValueError("Inconsistent textures in join_meshes_as_batch.") - - # Now we know there are multiple meshes and they have textures to merge. - all_textures = [mesh.textures for mesh in meshes] - first = all_textures[0] - tex_types_same = all(type(tex) == type(first) for tex in all_textures) # noqa: E721 - - if not tex_types_same: - raise ValueError("All meshes in the batch must have the same type of texture.") - - tex = first.join_batch(all_textures[1:]) - return Meshes(verts=verts, faces=faces, textures=tex) - - -def join_meshes_as_scene( - meshes: Union[Meshes, List[Meshes]], include_textures: bool = True -) -> Meshes: - """ - Joins a batch of meshes in the form of a Meshes object or a list of Meshes - objects as a single mesh. If the input is a list, the Meshes objects in the - list must all be on the same device. Unless include_textures is False, the - meshes must all have the same type of texture or must all not have textures. - - If textures are included, then the textures are joined as a single scene in - addition to the meshes. For this, texture types have an appropriate method - called join_scene which joins mesh textures into a single texture. - If the textures are TexturesAtlas then they must have the same resolution. - If they are TexturesUV then they must have the same align_corners and - padding_mode. Values in verts_uvs outside [0, 1] will not - be respected. - - Args: - meshes: Meshes object that contains a batch of meshes, or a list of - Meshes objects. - include_textures: (bool) whether to try to join the textures. - - Returns: - new Meshes object containing a single mesh - """ - if not isinstance(include_textures, (bool, int)): - # We want to avoid letting join_meshes_as_scene(mesh1, mesh2) silently - # do the wrong thing. - raise ValueError( - f"include_textures argument cannot be {type(include_textures)}" - ) - if isinstance(meshes, List): - meshes = join_meshes_as_batch(meshes, include_textures=include_textures) - - if len(meshes) == 1: - return meshes - verts = meshes.verts_packed() # (sum(V_n), 3) - # Offset automatically done by faces_packed - faces = meshes.faces_packed() # (sum(F_n), 3) - textures = None - - if include_textures and meshes.textures is not None: - textures = meshes.textures.join_scene() - - mesh = Meshes(verts=verts.unsqueeze(0), faces=faces.unsqueeze(0), textures=textures) - return mesh diff --git a/pytorch3d/pytorch3d/structures/pointclouds.py b/pytorch3d/pytorch3d/structures/pointclouds.py deleted file mode 100644 index 654b5a26ca32fada8c434443502059e0fe6a4b94..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/structures/pointclouds.py +++ /dev/null @@ -1,1303 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from itertools import zip_longest -from typing import List, Optional, Sequence, Tuple, Union - -import numpy as np -import torch - -from ..common.datatypes import Device, make_device -from . import utils as struct_utils - - -class Pointclouds: - """ - This class provides functions for working with batches of 3d point clouds, - and converting between representations. - - Within Pointclouds, there are three different representations of the data. - - List - - only used for input as a starting point to convert to other representations. - Padded - - has specific batch dimension. - Packed - - no batch dimension. - - has auxiliary variables used to index into the padded representation. - - Example - - Input list of points = [[P_1], [P_2], ... , [P_N]] - where P_1, ... , P_N are the number of points in each cloud and N is the - number of clouds. - - # SPHINX IGNORE - List | Padded | Packed - ---------------------------|-------------------------|------------------------ - [[P_1], ... , [P_N]] | size = (N, max(P_n), 3) | size = (sum(P_n), 3) - | | - Example for locations | | - or colors: | | - | | - P_1 = 3, P_2 = 4, P_3 = 5 | size = (3, 5, 3) | size = (12, 3) - | | - List([ | tensor([ | tensor([ - [ | [ | [0.1, 0.3, 0.5], - [0.1, 0.3, 0.5], | [0.1, 0.3, 0.5], | [0.5, 0.2, 0.1], - [0.5, 0.2, 0.1], | [0.5, 0.2, 0.1], | [0.6, 0.8, 0.7], - [0.6, 0.8, 0.7] | [0.6, 0.8, 0.7], | [0.1, 0.3, 0.3], - ], | [0, 0, 0], | [0.6, 0.7, 0.8], - [ | [0, 0, 0] | [0.2, 0.3, 0.4], - [0.1, 0.3, 0.3], | ], | [0.1, 0.5, 0.3], - [0.6, 0.7, 0.8], | [ | [0.7, 0.3, 0.6], - [0.2, 0.3, 0.4], | [0.1, 0.3, 0.3], | [0.2, 0.4, 0.8], - [0.1, 0.5, 0.3] | [0.6, 0.7, 0.8], | [0.9, 0.5, 0.2], - ], | [0.2, 0.3, 0.4], | [0.2, 0.3, 0.4], - [ | [0.1, 0.5, 0.3], | [0.9, 0.3, 0.8], - [0.7, 0.3, 0.6], | [0, 0, 0] | ]) - [0.2, 0.4, 0.8], | ], | - [0.9, 0.5, 0.2], | [ | - [0.2, 0.3, 0.4], | [0.7, 0.3, 0.6], | - [0.9, 0.3, 0.8], | [0.2, 0.4, 0.8], | - ] | [0.9, 0.5, 0.2], | - ]) | [0.2, 0.3, 0.4], | - | [0.9, 0.3, 0.8] | - | ] | - | ]) | - ----------------------------------------------------------------------------- - - Auxiliary variables for packed representation - - Name | Size | Example from above - -------------------------------|---------------------|----------------------- - | | - packed_to_cloud_idx | size = (sum(P_n)) | tensor([ - | | 0, 0, 0, 1, 1, 1, - | | 1, 2, 2, 2, 2, 2 - | | )] - | | size = (12) - | | - cloud_to_packed_first_idx | size = (N) | tensor([0, 3, 7]) - | | size = (3) - | | - num_points_per_cloud | size = (N) | tensor([3, 4, 5]) - | | size = (3) - | | - padded_to_packed_idx | size = (sum(P_n)) | tensor([ - | | 0, 1, 2, 5, 6, 7, - | | 8, 10, 11, 12, 13, - | | 14 - | | )] - | | size = (12) - ----------------------------------------------------------------------------- - # SPHINX IGNORE - """ - - _INTERNAL_TENSORS = [ - "_points_packed", - "_points_padded", - "_normals_packed", - "_normals_padded", - "_features_packed", - "_features_padded", - "_packed_to_cloud_idx", - "_cloud_to_packed_first_idx", - "_num_points_per_cloud", - "_padded_to_packed_idx", - "valid", - "equisized", - ] - - def __init__(self, points, normals=None, features=None) -> None: - """ - Args: - points: - Can be either - - - List where each element is a tensor of shape (num_points, 3) - containing the (x, y, z) coordinates of each point. - - Padded float tensor with shape (num_clouds, num_points, 3). - normals: - Can be either - - - None - - List where each element is a tensor of shape (num_points, 3) - containing the normal vector for each point. - - Padded float tensor of shape (num_clouds, num_points, 3). - features: - Can be either - - - None - - List where each element is a tensor of shape (num_points, C) - containing the features for the points in the cloud. - - Padded float tensor of shape (num_clouds, num_points, C). - where C is the number of channels in the features. - For example 3 for RGB color. - - Refer to comments above for descriptions of List and Padded - representations. - """ - self.device = torch.device("cpu") - - # Indicates whether the clouds in the list/batch have the same number - # of points. - self.equisized = False - - # Boolean indicator for each cloud in the batch. - # True if cloud has non zero number of points, False otherwise. - self.valid = None - - self._N = 0 # batch size (number of clouds) - self._P = 0 # (max) number of points per cloud - self._C = None # number of channels in the features - - # List of Tensors of points and features. - self._points_list = None - self._normals_list = None - self._features_list = None - - # Number of points per cloud. - self._num_points_per_cloud = None # N - - # Packed representation. - self._points_packed = None # (sum(P_n), 3) - self._normals_packed = None # (sum(P_n), 3) - self._features_packed = None # (sum(P_n), C) - - self._packed_to_cloud_idx = None # sum(P_n) - - # Index of each cloud's first point in the packed points. - # Assumes packing is sequential. - self._cloud_to_packed_first_idx = None # N - - # Padded representation. - self._points_padded = None # (N, max(P_n), 3) - self._normals_padded = None # (N, max(P_n), 3) - self._features_padded = None # (N, max(P_n), C) - - # Index to convert points from flattened padded to packed. - self._padded_to_packed_idx = None # N * max_P - - # Identify type of points. - if isinstance(points, list): - self._points_list = points - self._N = len(self._points_list) - self.valid = torch.zeros((self._N,), dtype=torch.bool, device=self.device) - - if self._N > 0: - self.device = self._points_list[0].device - for p in self._points_list: - if len(p) > 0 and (p.dim() != 2 or p.shape[1] != 3): - raise ValueError("Clouds in list must be of shape Px3 or empty") - if p.device != self.device: - raise ValueError("All points must be on the same device") - - num_points_per_cloud = torch.tensor( - [len(p) for p in self._points_list], device=self.device - ) - self._P = int(num_points_per_cloud.max()) - self.valid = torch.tensor( - [len(p) > 0 for p in self._points_list], - dtype=torch.bool, - device=self.device, - ) - - if len(num_points_per_cloud.unique()) == 1: - self.equisized = True - self._num_points_per_cloud = num_points_per_cloud - else: - self._num_points_per_cloud = torch.tensor([], dtype=torch.int64) - - elif torch.is_tensor(points): - if points.dim() != 3 or points.shape[2] != 3: - raise ValueError("Points tensor has incorrect dimensions.") - self._points_padded = points - self._N = self._points_padded.shape[0] - self._P = self._points_padded.shape[1] - self.device = self._points_padded.device - self.valid = torch.ones((self._N,), dtype=torch.bool, device=self.device) - self._num_points_per_cloud = torch.tensor( - [self._P] * self._N, device=self.device - ) - self.equisized = True - else: - raise ValueError( - "Points must be either a list or a tensor with \ - shape (batch_size, P, 3) where P is the maximum number of \ - points in a cloud." - ) - - # parse normals - normals_parsed = self._parse_auxiliary_input(normals) - self._normals_list, self._normals_padded, normals_C = normals_parsed - if normals_C is not None and normals_C != 3: - raise ValueError("Normals are expected to be 3-dimensional") - - # parse features - features_parsed = self._parse_auxiliary_input(features) - self._features_list, self._features_padded, features_C = features_parsed - if features_C is not None: - self._C = features_C - - def _parse_auxiliary_input( - self, aux_input - ) -> Tuple[Optional[List[torch.Tensor]], Optional[torch.Tensor], Optional[int]]: - """ - Interpret the auxiliary inputs (normals, features) given to __init__. - - Args: - aux_input: - Can be either - - - List where each element is a tensor of shape (num_points, C) - containing the features for the points in the cloud. - - Padded float tensor of shape (num_clouds, num_points, C). - For normals, C = 3 - - Returns: - 3-element tuple of list, padded, num_channels. - If aux_input is list, then padded is None. If aux_input is a tensor, - then list is None. - """ - if aux_input is None or self._N == 0: - return None, None, None - - aux_input_C = None - - if isinstance(aux_input, list): - return self._parse_auxiliary_input_list(aux_input) - if torch.is_tensor(aux_input): - if aux_input.dim() != 3: - raise ValueError("Auxiliary input tensor has incorrect dimensions.") - if self._N != aux_input.shape[0]: - raise ValueError("Points and inputs must be the same length.") - if self._P != aux_input.shape[1]: - raise ValueError( - "Inputs tensor must have the right maximum \ - number of points in each cloud." - ) - if aux_input.device != self.device: - raise ValueError( - "All auxiliary inputs must be on the same device as the points." - ) - aux_input_C = aux_input.shape[2] - return None, aux_input, aux_input_C - else: - raise ValueError( - "Auxiliary input must be either a list or a tensor with \ - shape (batch_size, P, C) where P is the maximum number of \ - points in a cloud." - ) - - def _parse_auxiliary_input_list( - self, aux_input: list - ) -> Tuple[Optional[List[torch.Tensor]], None, Optional[int]]: - """ - Interpret the auxiliary inputs (normals, features) given to __init__, - if a list. - - Args: - aux_input: - - List where each element is a tensor of shape (num_points, C) - containing the features for the points in the cloud. - For normals, C = 3 - - Returns: - 3-element tuple of list, padded=None, num_channels. - If aux_input is list, then padded is None. If aux_input is a tensor, - then list is None. - """ - aux_input_C = None - good_empty = None - needs_fixing = False - - if len(aux_input) != self._N: - raise ValueError("Points and auxiliary input must be the same length.") - for p, d in zip(self._num_points_per_cloud, aux_input): - valid_but_empty = p == 0 and d is not None and d.ndim == 2 - if p > 0 or valid_but_empty: - if p != d.shape[0]: - raise ValueError( - "A cloud has mismatched numbers of points and inputs" - ) - if d.dim() != 2: - raise ValueError( - "A cloud auxiliary input must be of shape PxC or empty" - ) - if aux_input_C is None: - aux_input_C = d.shape[1] - elif aux_input_C != d.shape[1]: - raise ValueError("The clouds must have the same number of channels") - if d.device != self.device: - raise ValueError( - "All auxiliary inputs must be on the same device as the points." - ) - else: - needs_fixing = True - - if aux_input_C is None: - # We found nothing useful - return None, None, None - - # If we have empty but "wrong" inputs we want to store "fixed" versions. - if needs_fixing: - if good_empty is None: - good_empty = torch.zeros((0, aux_input_C), device=self.device) - aux_input_out = [] - for p, d in zip(self._num_points_per_cloud, aux_input): - valid_but_empty = p == 0 and d is not None and d.ndim == 2 - if p > 0 or valid_but_empty: - aux_input_out.append(d) - else: - aux_input_out.append(good_empty) - else: - aux_input_out = aux_input - - return aux_input_out, None, aux_input_C - - def __len__(self) -> int: - return self._N - - def __getitem__( - self, - index: Union[int, List[int], slice, torch.BoolTensor, torch.LongTensor], - ) -> "Pointclouds": - """ - Args: - index: Specifying the index of the cloud to retrieve. - Can be an int, slice, list of ints or a boolean tensor. - - Returns: - Pointclouds object with selected clouds. The tensors are not cloned. - """ - normals, features = None, None - normals_list = self.normals_list() - features_list = self.features_list() - if isinstance(index, int): - points = [self.points_list()[index]] - if normals_list is not None: - normals = [normals_list[index]] - if features_list is not None: - features = [features_list[index]] - elif isinstance(index, slice): - points = self.points_list()[index] - if normals_list is not None: - normals = normals_list[index] - if features_list is not None: - features = features_list[index] - elif isinstance(index, list): - points = [self.points_list()[i] for i in index] - if normals_list is not None: - normals = [normals_list[i] for i in index] - if features_list is not None: - features = [features_list[i] for i in index] - elif isinstance(index, torch.Tensor): - if index.dim() != 1 or index.dtype.is_floating_point: - raise IndexError(index) - # NOTE consider converting index to cpu for efficiency - if index.dtype == torch.bool: - # advanced indexing on a single dimension - index = index.nonzero() - index = index.squeeze(1) if index.numel() > 0 else index - index = index.tolist() - points = [self.points_list()[i] for i in index] - if normals_list is not None: - normals = [normals_list[i] for i in index] - if features_list is not None: - features = [features_list[i] for i in index] - else: - raise IndexError(index) - - return self.__class__(points=points, normals=normals, features=features) - - def isempty(self) -> bool: - """ - Checks whether any cloud is valid. - - Returns: - bool indicating whether there is any data. - """ - return self._N == 0 or self.valid.eq(False).all() - - def points_list(self) -> List[torch.Tensor]: - """ - Get the list representation of the points. - - Returns: - list of tensors of points of shape (P_n, 3). - """ - if self._points_list is None: - assert ( - self._points_padded is not None - ), "points_padded is required to compute points_list." - points_list = [] - for i in range(self._N): - points_list.append( - self._points_padded[i, : self.num_points_per_cloud()[i]] - ) - self._points_list = points_list - return self._points_list - - def normals_list(self) -> Optional[List[torch.Tensor]]: - """ - Get the list representation of the normals, - or None if there are no normals. - - Returns: - list of tensors of normals of shape (P_n, 3). - """ - if self._normals_list is None: - if self._normals_padded is None: - # No normals provided so return None - return None - self._normals_list = struct_utils.padded_to_list( - self._normals_padded, self.num_points_per_cloud().tolist() - ) - return self._normals_list - - def features_list(self) -> Optional[List[torch.Tensor]]: - """ - Get the list representation of the features, - or None if there are no features. - - Returns: - list of tensors of features of shape (P_n, C). - """ - if self._features_list is None: - if self._features_padded is None: - # No features provided so return None - return None - self._features_list = struct_utils.padded_to_list( - self._features_padded, self.num_points_per_cloud().tolist() - ) - return self._features_list - - def points_packed(self) -> torch.Tensor: - """ - Get the packed representation of the points. - - Returns: - tensor of points of shape (sum(P_n), 3). - """ - self._compute_packed() - return self._points_packed - - def normals_packed(self) -> Optional[torch.Tensor]: - """ - Get the packed representation of the normals. - - Returns: - tensor of normals of shape (sum(P_n), 3), - or None if there are no normals. - """ - self._compute_packed() - return self._normals_packed - - def features_packed(self) -> Optional[torch.Tensor]: - """ - Get the packed representation of the features. - - Returns: - tensor of features of shape (sum(P_n), C), - or None if there are no features - """ - self._compute_packed() - return self._features_packed - - def packed_to_cloud_idx(self): - """ - Return a 1D tensor x with length equal to the total number of points. - packed_to_cloud_idx()[i] gives the index of the cloud which contains - points_packed()[i]. - - Returns: - 1D tensor of indices. - """ - self._compute_packed() - return self._packed_to_cloud_idx - - def cloud_to_packed_first_idx(self): - """ - Return a 1D tensor x with length equal to the number of clouds such that - the first point of the ith cloud is points_packed[x[i]]. - - Returns: - 1D tensor of indices of first items. - """ - self._compute_packed() - return self._cloud_to_packed_first_idx - - def num_points_per_cloud(self) -> torch.Tensor: - """ - Return a 1D tensor x with length equal to the number of clouds giving - the number of points in each cloud. - - Returns: - 1D tensor of sizes. - """ - return self._num_points_per_cloud - - def points_padded(self) -> torch.Tensor: - """ - Get the padded representation of the points. - - Returns: - tensor of points of shape (N, max(P_n), 3). - """ - self._compute_padded() - return self._points_padded - - def normals_padded(self) -> Optional[torch.Tensor]: - """ - Get the padded representation of the normals, - or None if there are no normals. - - Returns: - tensor of normals of shape (N, max(P_n), 3). - """ - self._compute_padded() - return self._normals_padded - - def features_padded(self) -> Optional[torch.Tensor]: - """ - Get the padded representation of the features, - or None if there are no features. - - Returns: - tensor of features of shape (N, max(P_n), 3). - """ - self._compute_padded() - return self._features_padded - - def padded_to_packed_idx(self): - """ - Return a 1D tensor x with length equal to the total number of points - such that points_packed()[i] is element x[i] of the flattened padded - representation. - The packed representation can be calculated as follows. - - .. code-block:: python - - p = points_padded().reshape(-1, 3) - points_packed = p[x] - - Returns: - 1D tensor of indices. - """ - if self._padded_to_packed_idx is not None: - return self._padded_to_packed_idx - if self._N == 0: - self._padded_to_packed_idx = [] - else: - self._padded_to_packed_idx = torch.cat( - [ - torch.arange(v, dtype=torch.int64, device=self.device) + i * self._P - for (i, v) in enumerate(self.num_points_per_cloud()) - ], - dim=0, - ) - return self._padded_to_packed_idx - - def _compute_padded(self, refresh: bool = False): - """ - Computes the padded version from points_list, normals_list and features_list. - - Args: - refresh: whether to force the recalculation. - """ - if not (refresh or self._points_padded is None): - return - - self._normals_padded, self._features_padded = None, None - if self.isempty(): - self._points_padded = torch.zeros((self._N, 0, 3), device=self.device) - else: - self._points_padded = struct_utils.list_to_padded( - self.points_list(), - (self._P, 3), - pad_value=0.0, - equisized=self.equisized, - ) - normals_list = self.normals_list() - if normals_list is not None: - self._normals_padded = struct_utils.list_to_padded( - normals_list, - (self._P, 3), - pad_value=0.0, - equisized=self.equisized, - ) - features_list = self.features_list() - if features_list is not None: - self._features_padded = struct_utils.list_to_padded( - features_list, - (self._P, self._C), - pad_value=0.0, - equisized=self.equisized, - ) - - # TODO(nikhilar) Improve performance of _compute_packed. - def _compute_packed(self, refresh: bool = False): - """ - Computes the packed version from points_list, normals_list and - features_list and sets the values of auxiliary tensors. - - Args: - refresh: Set to True to force recomputation of packed - representations. Default: False. - """ - - if not ( - refresh - or any( - v is None - for v in [ - self._points_packed, - self._packed_to_cloud_idx, - self._cloud_to_packed_first_idx, - ] - ) - ): - return - - # Packed can be calculated from padded or list, so can call the - # accessor function for the lists. - points_list = self.points_list() - normals_list = self.normals_list() - features_list = self.features_list() - if self.isempty(): - self._points_packed = torch.zeros( - (0, 3), dtype=torch.float32, device=self.device - ) - self._packed_to_cloud_idx = torch.zeros( - (0,), dtype=torch.int64, device=self.device - ) - self._cloud_to_packed_first_idx = torch.zeros( - (0,), dtype=torch.int64, device=self.device - ) - self._normals_packed = None - self._features_packed = None - return - - points_list_to_packed = struct_utils.list_to_packed(points_list) - self._points_packed = points_list_to_packed[0] - if not torch.allclose(self._num_points_per_cloud, points_list_to_packed[1]): - raise ValueError("Inconsistent list to packed conversion") - self._cloud_to_packed_first_idx = points_list_to_packed[2] - self._packed_to_cloud_idx = points_list_to_packed[3] - - self._normals_packed, self._features_packed = None, None - if normals_list is not None: - normals_list_to_packed = struct_utils.list_to_packed(normals_list) - self._normals_packed = normals_list_to_packed[0] - - if features_list is not None: - features_list_to_packed = struct_utils.list_to_packed(features_list) - self._features_packed = features_list_to_packed[0] - - def clone(self): - """ - Deep copy of Pointclouds object. All internal tensors are cloned - individually. - - Returns: - new Pointclouds object. - """ - # instantiate new pointcloud with the representation which is not None - # (either list or tensor) to save compute. - new_points, new_normals, new_features = None, None, None - if self._points_list is not None: - new_points = [v.clone() for v in self.points_list()] - normals_list = self.normals_list() - features_list = self.features_list() - if normals_list is not None: - new_normals = [n.clone() for n in normals_list] - if features_list is not None: - new_features = [f.clone() for f in features_list] - elif self._points_padded is not None: - new_points = self.points_padded().clone() - normals_padded = self.normals_padded() - features_padded = self.features_padded() - if normals_padded is not None: - new_normals = self.normals_padded().clone() - if features_padded is not None: - new_features = self.features_padded().clone() - other = self.__class__( - points=new_points, normals=new_normals, features=new_features - ) - for k in self._INTERNAL_TENSORS: - v = getattr(self, k) - if torch.is_tensor(v): - setattr(other, k, v.clone()) - return other - - def detach(self): - """ - Detach Pointclouds object. All internal tensors are detached - individually. - - Returns: - new Pointclouds object. - """ - # instantiate new pointcloud with the representation which is not None - # (either list or tensor) to save compute. - new_points, new_normals, new_features = None, None, None - if self._points_list is not None: - new_points = [v.detach() for v in self.points_list()] - normals_list = self.normals_list() - features_list = self.features_list() - if normals_list is not None: - new_normals = [n.detach() for n in normals_list] - if features_list is not None: - new_features = [f.detach() for f in features_list] - elif self._points_padded is not None: - new_points = self.points_padded().detach() - normals_padded = self.normals_padded() - features_padded = self.features_padded() - if normals_padded is not None: - new_normals = self.normals_padded().detach() - if features_padded is not None: - new_features = self.features_padded().detach() - other = self.__class__( - points=new_points, normals=new_normals, features=new_features - ) - for k in self._INTERNAL_TENSORS: - v = getattr(self, k) - if torch.is_tensor(v): - setattr(other, k, v.detach()) - return other - - def to(self, device: Device, copy: bool = False): - """ - Match functionality of torch.Tensor.to() - If copy = True or the self Tensor is on a different device, the - returned tensor is a copy of self with the desired torch.device. - If copy = False and the self Tensor already has the correct torch.device, - then self is returned. - - Args: - device: Device (as str or torch.device) for the new tensor. - copy: Boolean indicator whether or not to clone self. Default False. - - Returns: - Pointclouds object. - """ - device_ = make_device(device) - - if not copy and self.device == device_: - return self - - other = self.clone() - if self.device == device_: - return other - - other.device = device_ - if other._N > 0: - other._points_list = [v.to(device_) for v in other.points_list()] - if other._normals_list is not None: - other._normals_list = [n.to(device_) for n in other.normals_list()] - if other._features_list is not None: - other._features_list = [f.to(device_) for f in other.features_list()] - for k in self._INTERNAL_TENSORS: - v = getattr(self, k) - if torch.is_tensor(v): - setattr(other, k, v.to(device_)) - return other - - def cpu(self): - return self.to("cpu") - - def cuda(self): - return self.to("cuda") - - def get_cloud(self, index: int): - """ - Get tensors for a single cloud from the list representation. - - Args: - index: Integer in the range [0, N). - - Returns: - points: Tensor of shape (P, 3). - normals: Tensor of shape (P, 3) - features: LongTensor of shape (P, C). - """ - if not isinstance(index, int): - raise ValueError("Cloud index must be an integer.") - if index < 0 or index > self._N: - raise ValueError( - "Cloud index must be in the range [0, N) where \ - N is the number of clouds in the batch." - ) - points = self.points_list()[index] - normals, features = None, None - normals_list = self.normals_list() - if normals_list is not None: - normals = normals_list[index] - features_list = self.features_list() - if features_list is not None: - features = features_list[index] - return points, normals, features - - # TODO(nikhilar) Move function to a utils file. - def split(self, split_sizes: list): - """ - Splits Pointclouds object of size N into a list of Pointclouds objects - of size len(split_sizes), where the i-th Pointclouds object is of size - split_sizes[i]. Similar to torch.split(). - - Args: - split_sizes: List of integer sizes of Pointclouds objects to be - returned. - - Returns: - list[Pointclouds]. - """ - if not all(isinstance(x, int) for x in split_sizes): - raise ValueError("Value of split_sizes must be a list of integers.") - cloudlist = [] - curi = 0 - for i in split_sizes: - cloudlist.append(self[curi : curi + i]) - curi += i - return cloudlist - - def offset_(self, offsets_packed): - """ - Translate the point clouds by an offset. In place operation. - - Args: - offsets_packed: A Tensor of shape (3,) or the same shape - as self.points_packed giving offsets to be added to - all points. - - Returns: - self. - """ - points_packed = self.points_packed() - if offsets_packed.shape == (3,): - offsets_packed = offsets_packed.expand_as(points_packed) - if offsets_packed.shape != points_packed.shape: - raise ValueError("Offsets must have dimension (all_p, 3).") - self._points_packed = points_packed + offsets_packed - new_points_list = list( - self._points_packed.split(self.num_points_per_cloud().tolist(), 0) - ) - # Note that since _compute_packed() has been executed, points_list - # cannot be None even if not provided during construction. - self._points_list = new_points_list - if self._points_padded is not None: - for i, points in enumerate(new_points_list): - if len(points) > 0: - self._points_padded[i, : points.shape[0], :] = points - return self - - # TODO(nikhilar) Move out of place operator to a utils file. - def offset(self, offsets_packed): - """ - Out of place offset. - - Args: - offsets_packed: A Tensor of the same shape as self.points_packed - giving offsets to be added to all points. - Returns: - new Pointclouds object. - """ - new_clouds = self.clone() - return new_clouds.offset_(offsets_packed) - - def subsample(self, max_points: Union[int, Sequence[int]]) -> "Pointclouds": - """ - Subsample each cloud so that it has at most max_points points. - - Args: - max_points: maximum number of points in each cloud. - - Returns: - new Pointclouds object, or self if nothing to be done. - """ - if isinstance(max_points, int): - max_points = [max_points] * len(self) - elif len(max_points) != len(self): - raise ValueError("wrong number of max_points supplied") - if all( - int(n_points) <= int(max_) - for n_points, max_ in zip(self.num_points_per_cloud(), max_points) - ): - return self - - points_list = [] - features_list = [] - normals_list = [] - for max_, n_points, points, features, normals in zip_longest( - map(int, max_points), - map(int, self.num_points_per_cloud()), - self.points_list(), - self.features_list() or (), - self.normals_list() or (), - ): - if n_points > max_: - keep_np = np.random.choice(n_points, max_, replace=False) - keep = torch.tensor(keep_np, device=points.device, dtype=torch.int64) - points = points[keep] - if features is not None: - features = features[keep] - if normals is not None: - normals = normals[keep] - points_list.append(points) - features_list.append(features) - normals_list.append(normals) - - return Pointclouds( - points=points_list, - normals=self.normals_list() and normals_list, - features=self.features_list() and features_list, - ) - - def scale_(self, scale): - """ - Multiply the coordinates of this object by a scalar value. - - i.e. enlarge/dilate - In place operation. - - Args: - scale: A scalar, or a Tensor of shape (N,). - - Returns: - self. - """ - if not torch.is_tensor(scale): - scale = torch.full((len(self),), scale, device=self.device) - new_points_list = [] - points_list = self.points_list() - for i, old_points in enumerate(points_list): - new_points_list.append(scale[i] * old_points) - self._points_list = new_points_list - if self._points_packed is not None: - self._points_packed = torch.cat(new_points_list, dim=0) - if self._points_padded is not None: - for i, points in enumerate(new_points_list): - if len(points) > 0: - self._points_padded[i, : points.shape[0], :] = points - return self - - def scale(self, scale): - """ - Out of place scale_. - - Args: - scale: A scalar, or a Tensor of shape (N,). - - Returns: - new Pointclouds object. - """ - new_clouds = self.clone() - return new_clouds.scale_(scale) - - # TODO(nikhilar) Move function to utils file. - def get_bounding_boxes(self): - """ - Compute an axis-aligned bounding box for each cloud. - - Returns: - bboxes: Tensor of shape (N, 3, 2) where bbox[i, j] gives the - min and max values of cloud i along the jth coordinate axis. - """ - all_mins, all_maxes = [], [] - for points in self.points_list(): - cur_mins = points.min(dim=0)[0] # (3,) - cur_maxes = points.max(dim=0)[0] # (3,) - all_mins.append(cur_mins) - all_maxes.append(cur_maxes) - all_mins = torch.stack(all_mins, dim=0) # (N, 3) - all_maxes = torch.stack(all_maxes, dim=0) # (N, 3) - bboxes = torch.stack([all_mins, all_maxes], dim=2) - return bboxes - - def estimate_normals( - self, - neighborhood_size: int = 50, - disambiguate_directions: bool = True, - assign_to_self: bool = False, - ): - """ - Estimates the normals of each point in each cloud and assigns - them to the internal tensors `self._normals_list` and `self._normals_padded` - - The function uses `ops.estimate_pointcloud_local_coord_frames` - to estimate the normals. Please refer to that function for more - detailed information about the implemented algorithm. - - Args: - **neighborhood_size**: The size of the neighborhood used to estimate the - geometry around each point. - **disambiguate_directions**: If `True`, uses the algorithm from [1] to - ensure sign consistency of the normals of neighboring points. - **normals**: A tensor of normals for each input point - of shape `(minibatch, num_point, 3)`. - If `pointclouds` are of `Pointclouds` class, returns a padded tensor. - **assign_to_self**: If `True`, assigns the computed normals to the - internal buffers overwriting any previously stored normals. - - References: - [1] Tombari, Salti, Di Stefano: Unique Signatures of Histograms for - Local Surface Description, ECCV 2010. - """ - from .. import ops - - # estimate the normals - normals_est = ops.estimate_pointcloud_normals( - self, - neighborhood_size=neighborhood_size, - disambiguate_directions=disambiguate_directions, - ) - - # assign to self - if assign_to_self: - _, self._normals_padded, _ = self._parse_auxiliary_input(normals_est) - self._normals_list, self._normals_packed = None, None - if self._points_list is not None: - # update self._normals_list - self.normals_list() - if self._points_packed is not None: - # update self._normals_packed - self._normals_packed = torch.cat(self._normals_list, dim=0) - - return normals_est - - def extend(self, N: int): - """ - Create new Pointclouds which contains each cloud N times. - - Args: - N: number of new copies of each cloud. - - Returns: - new Pointclouds object. - """ - if not isinstance(N, int): - raise ValueError("N must be an integer.") - if N <= 0: - raise ValueError("N must be > 0.") - - new_points_list, new_normals_list, new_features_list = [], None, None - for points in self.points_list(): - new_points_list.extend(points.clone() for _ in range(N)) - normals_list = self.normals_list() - if normals_list is not None: - new_normals_list = [] - for normals in normals_list: - new_normals_list.extend(normals.clone() for _ in range(N)) - features_list = self.features_list() - if features_list is not None: - new_features_list = [] - for features in features_list: - new_features_list.extend(features.clone() for _ in range(N)) - return self.__class__( - points=new_points_list, normals=new_normals_list, features=new_features_list - ) - - def update_padded( - self, new_points_padded, new_normals_padded=None, new_features_padded=None - ): - """ - Returns a Pointcloud structure with updated padded tensors and copies of - the auxiliary tensors. This function allows for an update of - points_padded (and normals and features) without having to explicitly - convert it to the list representation for heterogeneous batches. - - Args: - new_points_padded: FloatTensor of shape (N, P, 3) - new_normals_padded: (optional) FloatTensor of shape (N, P, 3) - new_features_padded: (optional) FloatTensor of shape (N, P, C) - - Returns: - Pointcloud with updated padded representations - """ - - def check_shapes(x, size): - if x.shape[0] != size[0]: - raise ValueError("new values must have the same batch dimension.") - if x.shape[1] != size[1]: - raise ValueError("new values must have the same number of points.") - if size[2] is not None: - if x.shape[2] != size[2]: - raise ValueError( - "new values must have the same number of channels." - ) - - check_shapes(new_points_padded, [self._N, self._P, 3]) - if new_normals_padded is not None: - check_shapes(new_normals_padded, [self._N, self._P, 3]) - if new_features_padded is not None: - check_shapes(new_features_padded, [self._N, self._P, self._C]) - - new = self.__class__( - points=new_points_padded, - normals=new_normals_padded, - features=new_features_padded, - ) - - # overwrite the equisized flag - new.equisized = self.equisized - - # copy normals - if new_normals_padded is None: - # If no normals are provided, keep old ones (shallow copy) - new._normals_list = self._normals_list - new._normals_padded = self._normals_padded - new._normals_packed = self._normals_packed - - # copy features - if new_features_padded is None: - # If no features are provided, keep old ones (shallow copy) - new._features_list = self._features_list - new._features_padded = self._features_padded - new._features_packed = self._features_packed - - # copy auxiliary tensors - copy_tensors = [ - "_packed_to_cloud_idx", - "_cloud_to_packed_first_idx", - "_num_points_per_cloud", - "_padded_to_packed_idx", - "valid", - ] - for k in copy_tensors: - v = getattr(self, k) - if torch.is_tensor(v): - setattr(new, k, v) # shallow copy - - # update points - new._points_padded = new_points_padded - assert new._points_list is None - assert new._points_packed is None - - # update normals and features if provided - if new_normals_padded is not None: - new._normals_padded = new_normals_padded - new._normals_list = None - new._normals_packed = None - if new_features_padded is not None: - new._features_padded = new_features_padded - new._features_list = None - new._features_packed = None - return new - - def inside_box(self, box): - """ - Finds the points inside a 3D box. - - Args: - box: FloatTensor of shape (2, 3) or (N, 2, 3) where N is the number - of clouds. - box[..., 0, :] gives the min x, y & z. - box[..., 1, :] gives the max x, y & z. - Returns: - idx: BoolTensor of length sum(P_i) indicating whether the packed points are - within the input box. - """ - if box.dim() > 3 or box.dim() < 2: - raise ValueError("Input box must be of shape (2, 3) or (N, 2, 3).") - - if box.dim() == 3 and box.shape[0] != 1 and box.shape[0] != self._N: - raise ValueError( - "Input box dimension is incompatible with pointcloud size." - ) - - if box.dim() == 2: - box = box[None] - - if (box[..., 0, :] > box[..., 1, :]).any(): - raise ValueError("Input box is invalid: min values larger than max values.") - - points_packed = self.points_packed() - sumP = points_packed.shape[0] - - if box.shape[0] == 1: - box = box.expand(sumP, 2, 3) - elif box.shape[0] == self._N: - box = box.unbind(0) - box = [ - b.expand(p, 2, 3) for (b, p) in zip(box, self.num_points_per_cloud()) - ] - box = torch.cat(box, 0) - - coord_inside = (points_packed >= box[:, 0]) * (points_packed <= box[:, 1]) - return coord_inside.all(dim=-1) - - -def join_pointclouds_as_batch(pointclouds: Sequence[Pointclouds]) -> Pointclouds: - """ - Merge a list of Pointclouds objects into a single batched Pointclouds - object. All pointclouds must be on the same device. - - Args: - batch: List of Pointclouds objects each with batch dim [b1, b2, ..., bN] - Returns: - pointcloud: Poinclouds object with all input pointclouds collated into - a single object with batch dim = sum(b1, b2, ..., bN) - """ - if isinstance(pointclouds, Pointclouds) or not isinstance(pointclouds, Sequence): - raise ValueError("Wrong first argument to join_points_as_batch.") - - device = pointclouds[0].device - if not all(p.device == device for p in pointclouds): - raise ValueError("Pointclouds must all be on the same device") - - kwargs = {} - for field in ("points", "normals", "features"): - field_list = [getattr(p, field + "_list")() for p in pointclouds] - if None in field_list: - if field == "points": - raise ValueError("Pointclouds cannot have their points set to None!") - if not all(f is None for f in field_list): - raise ValueError( - f"Pointclouds in the batch have some fields '{field}'" - + " defined and some set to None." - ) - field_list = None - else: - field_list = [p for points in field_list for p in points] - if field == "features" and any( - p.shape[1] != field_list[0].shape[1] for p in field_list[1:] - ): - raise ValueError("Pointclouds must have the same number of features") - kwargs[field] = field_list - - return Pointclouds(**kwargs) - - -def join_pointclouds_as_scene( - pointclouds: Union[Pointclouds, List[Pointclouds]] -) -> Pointclouds: - """ - Joins a batch of point cloud in the form of a Pointclouds object or a list of Pointclouds - objects as a single point cloud. If the input is a list, the Pointclouds objects in the - list must all be on the same device, and they must either all or none have features and - all or none have normals. - - Args: - Pointclouds: Pointclouds object that contains a batch of point clouds, or a list of - Pointclouds objects. - - Returns: - new Pointclouds object containing a single point cloud - """ - if isinstance(pointclouds, list): - pointclouds = join_pointclouds_as_batch(pointclouds) - - if len(pointclouds) == 1: - return pointclouds - points = pointclouds.points_packed() - features = pointclouds.features_packed() - normals = pointclouds.normals_packed() - pointcloud = Pointclouds( - points=points[None], - features=None if features is None else features[None], - normals=None if normals is None else normals[None], - ) - return pointcloud diff --git a/pytorch3d/pytorch3d/structures/utils.py b/pytorch3d/pytorch3d/structures/utils.py deleted file mode 100644 index aab4fc3dad08b9f83255a1bc837c5e78b5f929ef..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/structures/utils.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, Sequence, Tuple, Union - -import torch - - -""" -Util functions for points/verts/faces/volumes. -""" - - -def list_to_padded( - x: Union[List[torch.Tensor], Tuple[torch.Tensor]], - pad_size: Union[Sequence[int], None] = None, - pad_value: float = 0.0, - equisized: bool = False, -) -> torch.Tensor: - r""" - Transforms a list of N tensors each of shape (Si_0, Si_1, ... Si_D) - into: - - a single tensor of shape (N, pad_size(0), pad_size(1), ..., pad_size(D)) - if pad_size is provided - - or a tensor of shape (N, max(Si_0), max(Si_1), ..., max(Si_D)) if pad_size is None. - - Args: - x: list of Tensors - pad_size: list(int) specifying the size of the padded tensor. - If `None` (default), the largest size of each dimension - is set as the `pad_size`. - pad_value: float value to be used to fill the padded tensor - equisized: bool indicating whether the items in x are of equal size - (sometimes this is known and if provided saves computation) - - Returns: - x_padded: tensor consisting of padded input tensors stored - over the newly allocated memory. - """ - if equisized: - return torch.stack(x, 0) - - if not all(torch.is_tensor(y) for y in x): - raise ValueError("All items have to be instances of a torch.Tensor.") - - # we set the common number of dimensions to the maximum - # of the dimensionalities of the tensors in the list - element_ndim = max(y.ndim for y in x) - - # replace empty 1D tensors with empty tensors with a correct number of dimensions - x = [ - (y.new_zeros([0] * element_ndim) if (y.ndim == 1 and y.nelement() == 0) else y) - for y in x - ] - - if any(y.ndim != x[0].ndim for y in x): - raise ValueError("All items have to have the same number of dimensions!") - - if pad_size is None: - pad_dims = [ - max(y.shape[dim] for y in x if len(y) > 0) for dim in range(x[0].ndim) - ] - else: - if any(len(pad_size) != y.ndim for y in x): - raise ValueError("Pad size must contain target size for all dimensions.") - pad_dims = pad_size - - N = len(x) - x_padded = x[0].new_full((N, *pad_dims), pad_value) - for i, y in enumerate(x): - if len(y) > 0: - slices = (i, *(slice(0, y.shape[dim]) for dim in range(y.ndim))) - x_padded[slices] = y - return x_padded - - -def padded_to_list( - x: torch.Tensor, - split_size: Union[Sequence[int], Sequence[Sequence[int]], None] = None, -): - r""" - Transforms a padded tensor of shape (N, S_1, S_2, ..., S_D) into a list - of N tensors of shape: - - (Si_1, Si_2, ..., Si_D) where (Si_1, Si_2, ..., Si_D) is specified in split_size(i) - - or (S_1, S_2, ..., S_D) if split_size is None - - or (Si_1, S_2, ..., S_D) if split_size(i) is an integer. - - Args: - x: tensor - split_size: optional 1D or 2D list/tuple of ints defining the number of - items for each tensor. - - Returns: - x_list: a list of tensors sharing the memory with the input. - """ - x_list = list(x.unbind(0)) - - if split_size is None: - return x_list - - N = len(split_size) - if x.shape[0] != N: - raise ValueError("Split size must be of same length as inputs first dimension") - - for i in range(N): - if isinstance(split_size[i], int): - x_list[i] = x_list[i][: split_size[i]] - else: - slices = tuple(slice(0, s) for s in split_size[i]) # pyre-ignore - x_list[i] = x_list[i][slices] - return x_list - - -def list_to_packed(x: List[torch.Tensor]): - r""" - Transforms a list of N tensors each of shape (Mi, K, ...) into a single - tensor of shape (sum(Mi), K, ...). - - Args: - x: list of tensors. - - Returns: - 4-element tuple containing - - - **x_packed**: tensor consisting of packed input tensors along the - 1st dimension. - - **num_items**: tensor of shape N containing Mi for each element in x. - - **item_packed_first_idx**: tensor of shape N indicating the index of - the first item belonging to the same element in the original list. - - **item_packed_to_list_idx**: tensor of shape sum(Mi) containing the - index of the element in the list the item belongs to. - """ - N = len(x) - num_items = torch.zeros(N, dtype=torch.int64, device=x[0].device) - item_packed_first_idx = torch.zeros(N, dtype=torch.int64, device=x[0].device) - item_packed_to_list_idx = [] - cur = 0 - for i, y in enumerate(x): - num = len(y) - num_items[i] = num - item_packed_first_idx[i] = cur - item_packed_to_list_idx.append( - torch.full((num,), i, dtype=torch.int64, device=y.device) - ) - cur += num - - x_packed = torch.cat(x, dim=0) - item_packed_to_list_idx = torch.cat(item_packed_to_list_idx, dim=0) - - return x_packed, num_items, item_packed_first_idx, item_packed_to_list_idx - - -def packed_to_list(x: torch.Tensor, split_size: Union[list, int]): - r""" - Transforms a tensor of shape (sum(Mi), K, L, ...) to N set of tensors of - shape (Mi, K, L, ...) where Mi's are defined in split_size - - Args: - x: tensor - split_size: list, tuple or int defining the number of items for each tensor - in the output list. - - Returns: - x_list: A list of Tensors - """ - return x.split(split_size, dim=0) - - -def padded_to_packed( - x: torch.Tensor, - split_size: Union[list, tuple, None] = None, - pad_value: Union[float, int, None] = None, -): - r""" - Transforms a padded tensor of shape (N, M, K) into a packed tensor - of shape: - - (sum(Mi), K) where (Mi, K) are the dimensions of - each of the tensors in the batch and Mi is specified by split_size(i) - - (N*M, K) if split_size is None - - Support only for 3-dimensional input tensor and 1-dimensional split size. - - Args: - x: tensor - split_size: list, tuple or int defining the number of items for each tensor - in the output list. - pad_value: optional value to use to filter the padded values in the input - tensor. - - Only one of split_size or pad_value should be provided, or both can be None. - - Returns: - x_packed: a packed tensor. - """ - if x.ndim != 3: - raise ValueError("Supports only 3-dimensional input tensors") - - N, M, D = x.shape - - if split_size is not None and pad_value is not None: - raise ValueError("Only one of split_size or pad_value should be provided.") - - x_packed = x.reshape(-1, D) # flatten padded - - if pad_value is None and split_size is None: - return x_packed - - # Convert to packed using pad value - if pad_value is not None: - mask = x_packed.ne(pad_value).any(-1) - x_packed = x_packed[mask] - return x_packed - - # Convert to packed using split sizes - # pyre-fixme[6]: Expected `Sized` for 1st param but got `Union[None, - # List[typing.Any], typing.Tuple[typing.Any, ...]]`. - N = len(split_size) - if x.shape[0] != N: - raise ValueError("Split size must be of same length as inputs first dimension") - - # pyre-fixme[16]: `None` has no attribute `__iter__`. - if not all(isinstance(i, int) for i in split_size): - raise ValueError( - "Support only 1-dimensional unbinded tensor. \ - Split size for more dimensions provided" - ) - - padded_to_packed_idx = torch.cat( - [ - torch.arange(v, dtype=torch.int64, device=x.device) + i * M - # pyre-fixme[6]: Expected `Iterable[Variable[_T]]` for 1st param but got - # `Union[None, List[typing.Any], typing.Tuple[typing.Any, ...]]`. - for (i, v) in enumerate(split_size) - ], - dim=0, - ) - - return x_packed[padded_to_packed_idx] diff --git a/pytorch3d/pytorch3d/structures/volumes.py b/pytorch3d/pytorch3d/structures/volumes.py deleted file mode 100644 index 23ed743da0f682cba3b6ea7c2a7dd68ca062b6e4..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/structures/volumes.py +++ /dev/null @@ -1,1135 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import copy -from typing import List, Optional, Tuple, Union - -import torch -from pytorch3d.common.compat import meshgrid_ij -from pytorch3d.common.datatypes import Device, make_device -from pytorch3d.transforms import Scale, Transform3d - -from . import utils as struct_utils - - -_Scalar = Union[int, float] -_Vector = Union[torch.Tensor, Tuple[_Scalar, ...], List[_Scalar]] -_ScalarOrVector = Union[_Scalar, _Vector] - -_VoxelSize = _ScalarOrVector -_Translation = _Vector - -_TensorBatch = Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]] -_ALL_CONTENT: slice = slice(0, None) - - -class Volumes: - """ - This class provides functions for working with batches of volumetric grids - of possibly varying spatial sizes. - - VOLUME DENSITIES - - The Volumes class can be either constructed from a 5D tensor of - `densities` of size `batch x density_dim x depth x height x width` or - from a list of differently-sized 4D tensors `[D_1, ..., D_batch]`, - where each `D_i` is of size `[density_dim x depth_i x height_i x width_i]`. - - In case the `Volumes` object is initialized from the list of `densities`, - the list of tensors is internally converted to a single 5D tensor by - zero-padding the relevant dimensions. Both list and padded representations can be - accessed with the `Volumes.densities()` or `Volumes.densities_list()` getters. - The sizes of the individual volumes in the structure can be retrieved - with the `Volumes.get_grid_sizes()` getter. - - The `Volumes` class is immutable. I.e. after generating a `Volumes` object, - one cannot change its properties, such as `self._densities` or `self._features` - anymore. - - - VOLUME FEATURES - - While the `densities` field is intended to represent various measures of the - "density" of the volume cells (opacity, signed/unsigned distances - from the nearest surface, ...), one can additionally initialize the - object with the `features` argument. `features` are either a 5D tensor - of shape `batch x feature_dim x depth x height x width` or a list of - of differently-sized 4D tensors `[F_1, ..., F_batch]`, - where each `F_i` is of size `[feature_dim x depth_i x height_i x width_i]`. - `features` are intended to describe other properties of volume cells, - such as per-voxel 3D vectors of RGB colors that can be later used - for rendering the volume. - - - VOLUME COORDINATES - - Additionally, using the `VolumeLocator` class the `Volumes` class keeps track - of the locations of the centers of the volume cells in the local volume - coordinates as well as in the world coordinates. - - Local coordinates: - - Represent the locations of the volume cells in the local coordinate - frame of the volume. - - The center of the voxel indexed with `[Β·, Β·, 0, 0, 0]` in the volume - has its 3D local coordinate set to `[-1, -1, -1]`, while the voxel - at index `[Β·, Β·, depth_i-1, height_i-1, width_i-1]` has its - 3D local coordinate set to `[1, 1, 1]`. - - The first/second/third coordinate of each of the 3D per-voxel - XYZ vector denotes the horizontal/vertical/depth-wise position - respectively. I.e the order of the coordinate dimensions in the - volume is reversed w.r.t. the order of the 3D coordinate vectors. - - The intermediate coordinates between `[-1, -1, -1]` and `[1, 1, 1]`. - are linearly interpolated over the spatial dimensions of the volume. - - Note that the convention is the same as for the 5D version of the - `torch.nn.functional.grid_sample` function called with - the same value of `align_corners` argument. - - Note that the local coordinate convention of `Volumes` - (+X = left to right, +Y = top to bottom, +Z = away from the user) - is *different* from the world coordinate convention of the - renderer for `Meshes` or `Pointclouds` - (+X = right to left, +Y = bottom to top, +Z = away from the user). - - World coordinates: - - These define the locations of the centers of the volume cells - in the world coordinates. - - They are specified with the following mapping that converts - points `x_local` in the local coordinates to points `x_world` - in the world coordinates:: - - x_world = ( - x_local * (volume_size - 1) * 0.5 * voxel_size - ) - volume_translation, - - here `voxel_size` specifies the size of each voxel of the volume, - and `volume_translation` is the 3D offset of the central voxel of - the volume w.r.t. the origin of the world coordinate frame. - Both `voxel_size` and `volume_translation` are specified in - the world coordinate units. `volume_size` is the spatial size of - the volume in form of a 3D vector `[width, height, depth]`. - - Given the above definition of `x_world`, one can derive the - inverse mapping from `x_world` to `x_local` as follows:: - - x_local = ( - (x_world + volume_translation) / (0.5 * voxel_size) - ) / (volume_size - 1) - - - For a trivial volume with `volume_translation==[0, 0, 0]` - with `voxel_size=-1`, `x_world` would range - from -(volume_size-1)/2` to `+(volume_size-1)/2`. - - Coordinate tensors that denote the locations of each of the volume cells in - local / world coordinates (with shape `(depth x height x width x 3)`) - can be retrieved by calling the `Volumes.get_coord_grid()` getter with the - appropriate `world_coordinates` argument. - - Internally, the mapping between `x_local` and `x_world` is represented - as a `Transform3d` object `Volumes.VolumeLocator._local_to_world_transform`. - Users can access the relevant transformations with the - `Volumes.get_world_to_local_coords_transform()` and - `Volumes.get_local_to_world_coords_transform()` - functions. - - Example coordinate conversion: - - For a "trivial" volume with `voxel_size = 1.`, - `volume_translation=[0., 0., 0.]`, and the spatial size of - `DxHxW = 5x5x5`, the point `x_world = (-2, 0, 2)` gets mapped - to `x_local=(-1, 0, 1)`. - - For a "trivial" volume `v` with `voxel_size = 1.`, - `volume_translation=[0., 0., 0.]`, the following holds: - - torch.nn.functional.grid_sample( - v.densities(), - v.get_coord_grid(world_coordinates=False), - align_corners=align_corners, - ) == v.densities(), - - i.e. sampling the volume at trivial local coordinates - (no scaling with `voxel_size`` or shift with `volume_translation`) - results in the same volume. - """ - - def __init__( - self, - densities: _TensorBatch, - features: Optional[_TensorBatch] = None, - voxel_size: _VoxelSize = 1.0, - volume_translation: _Translation = (0.0, 0.0, 0.0), - align_corners: bool = True, - ) -> None: - """ - Args: - **densities**: Batch of input feature volume occupancies of shape - `(minibatch, density_dim, depth, height, width)`, or a list - of 4D tensors `[D_1, ..., D_minibatch]` where each `D_i` has - shape `(density_dim, depth_i, height_i, width_i)`. - Typically, each voxel contains a non-negative number - corresponding to its opaqueness. - **features**: Batch of input feature volumes of shape: - `(minibatch, feature_dim, depth, height, width)` or a list - of 4D tensors `[F_1, ..., F_minibatch]` where each `F_i` has - shape `(feature_dim, depth_i, height_i, width_i)`. - The field is optional and can be set to `None` in case features are - not required. - **voxel_size**: Denotes the size of each volume voxel in world units. - Has to be one of: - a) A scalar (square voxels) - b) 3-tuple or a 3-list of scalars - c) a Tensor of shape (3,) - d) a Tensor of shape (minibatch, 3) - e) a Tensor of shape (minibatch, 1) - f) a Tensor of shape (1,) (square voxels) - **volume_translation**: Denotes the 3D translation of the center - of the volume in world units. Has to be one of: - a) 3-tuple or a 3-list of scalars - b) a Tensor of shape (3,) - c) a Tensor of shape (minibatch, 3) - d) a Tensor of shape (1,) (square voxels) - **align_corners**: If set (default), the coordinates of the corner voxels are - exactly βˆ’1 or +1 in the local coordinate system. Otherwise, the coordinates - correspond to the centers of the corner voxels. Cf. the namesake argument to - `torch.nn.functional.grid_sample`. - """ - - # handle densities - densities_, grid_sizes = self._convert_densities_features_to_tensor( - densities, "densities" - ) - - # take device from densities - self.device = densities_.device - - # assign to the internal buffers - self._densities = densities_ - - # assign a coordinate transformation member - self.locator = VolumeLocator( - batch_size=len(self), - grid_sizes=grid_sizes, - voxel_size=voxel_size, - volume_translation=volume_translation, - device=self.device, - align_corners=align_corners, - ) - - # handle features - self._features = None - if features is not None: - self._set_features(features) - - def _convert_densities_features_to_tensor( - self, x: _TensorBatch, var_name: str - ) -> Tuple[torch.Tensor, torch.LongTensor]: - """ - Handle the `densities` or `features` arguments to the constructor. - """ - if isinstance(x, (list, tuple)): - x_tensor = struct_utils.list_to_padded(x) - if any(x_.ndim != 4 for x_ in x): - raise ValueError( - f"`{var_name}` has to be a list of 4-dim tensors of shape: " - f"({var_name}_dim, height, width, depth)" - ) - if any(x_.shape[0] != x[0].shape[0] for x_ in x): - raise ValueError( - f"Each entry in the list of `{var_name}` has to have the " - "same number of channels (first dimension in the tensor)." - ) - x_shapes = torch.stack( - [ - torch.tensor( - list(x_.shape[1:]), dtype=torch.long, device=x_tensor.device - ) - for x_ in x - ], - dim=0, - ) - elif torch.is_tensor(x): - if x.ndim != 5: - raise ValueError( - f"`{var_name}` has to be a 5-dim tensor of shape: " - f"(minibatch, {var_name}_dim, height, width, depth)" - ) - x_tensor = x - x_shapes = torch.tensor( - list(x.shape[2:]), dtype=torch.long, device=x.device - )[None].repeat(x.shape[0], 1) - else: - raise ValueError( - f"{var_name} must be either a list or a tensor with " - f"shape (batch_size, {var_name}_dim, H, W, D)." - ) - # pyre-ignore[7] - return x_tensor, x_shapes - - def __len__(self) -> int: - return self._densities.shape[0] - - def __getitem__( - self, - index: Union[ - int, List[int], Tuple[int], slice, torch.BoolTensor, torch.LongTensor - ], - ) -> "Volumes": - """ - Args: - index: Specifying the index of the volume to retrieve. - Can be an int, slice, list of ints or a boolean or a long tensor. - - Returns: - Volumes object with selected volumes. The tensors are not cloned. - """ - if isinstance(index, int): - index = torch.LongTensor([index]) - elif isinstance(index, (slice, list, tuple)): - pass - elif torch.is_tensor(index): - if index.dim() != 1 or index.dtype.is_floating_point: - raise IndexError(index) - else: - raise IndexError(index) - - new = self.__class__( - # pyre-fixme[16]: `Optional` has no attribute `__getitem__`. - features=self.features()[index] if self._features is not None else None, - densities=self.densities()[index], - ) - # dont forget to update grid_sizes! - self.locator._copy_transform_and_sizes(new.locator, index=index) - return new - - def features(self) -> Optional[torch.Tensor]: - """ - Returns the features of the volume. - - Returns: - **features**: The tensor of volume features. - """ - return self._features - - def densities(self) -> torch.Tensor: - """ - Returns the densities of the volume. - - Returns: - **densities**: The tensor of volume densities. - """ - return self._densities - - def densities_list(self) -> List[torch.Tensor]: - """ - Get the list representation of the densities. - - Returns: - list of tensors of densities of shape (dim_i, D_i, H_i, W_i). - """ - return self._features_densities_list(self.densities()) - - def features_list(self) -> List[torch.Tensor]: - """ - Get the list representation of the features. - - Returns: - list of tensors of features of shape (dim_i, D_i, H_i, W_i) - or `None` for feature-less volumes. - """ - features_ = self.features() - if features_ is None: - # No features provided so return None - # pyre-fixme[7]: Expected `List[torch.Tensor]` but got `None`. - return None - return self._features_densities_list(features_) - - def get_align_corners(self) -> bool: - """ - Return whether the corners of the voxels should be aligned with the - image pixels. - """ - return self.locator._align_corners - - def _features_densities_list(self, x: torch.Tensor) -> List[torch.Tensor]: - """ - Retrieve the list representation of features/densities. - - Args: - x: self.features() or self.densities() - - Returns: - list of tensors of features/densities of shape (dim_i, D_i, H_i, W_i). - """ - x_dim = x.shape[1] - pad_sizes = torch.nn.functional.pad( - self.get_grid_sizes(), [1, 0], mode="constant", value=x_dim - ) - x_list = struct_utils.padded_to_list(x, pad_sizes.tolist()) - return x_list - - def update_padded( - self, new_densities: torch.Tensor, new_features: Optional[torch.Tensor] = None - ) -> "Volumes": - """ - Returns a Volumes structure with updated padded tensors and copies of - the auxiliary tensors `self._local_to_world_transform`, - `device` and `self._grid_sizes`. This function allows for an update of - densities (and features) without having to explicitly - convert it to the list representation for heterogeneous batches. - - Args: - new_densities: FloatTensor of shape (N, dim_density, D, H, W) - new_features: (optional) FloatTensor of shape (N, dim_feature, D, H, W) - - Returns: - Volumes with updated features and densities - """ - new = copy.copy(self) - new._set_densities(new_densities) - if new_features is None: - new._features = None - else: - new._set_features(new_features) - return new - - def _set_features(self, features: _TensorBatch) -> None: - self._set_densities_features("features", features) - - def _set_densities(self, densities: _TensorBatch) -> None: - self._set_densities_features("densities", densities) - - def _set_densities_features(self, var_name: str, x: _TensorBatch) -> None: - x_tensor, grid_sizes = self._convert_densities_features_to_tensor(x, var_name) - if x_tensor.device != self.device: - raise ValueError( - f"`{var_name}` have to be on the same device as `self.densities`." - ) - if len(x_tensor.shape) != 5: - raise ValueError( - f"{var_name} has to be a 5-dim tensor of shape: " - f"(minibatch, {var_name}_dim, height, width, depth)" - ) - - if not ( - (self.get_grid_sizes().shape == grid_sizes.shape) - and torch.allclose(self.get_grid_sizes(), grid_sizes) - ): - raise ValueError( - f"The size of every grid in `{var_name}` has to match the size of" - "the corresponding `densities` grid." - ) - setattr(self, "_" + var_name, x_tensor) - - def clone(self) -> "Volumes": - """ - Deep copy of Volumes object. All internal tensors are cloned - individually. - - Returns: - new Volumes object. - """ - return copy.deepcopy(self) - - def to(self, device: Device, copy: bool = False) -> "Volumes": - """ - Match the functionality of torch.Tensor.to() - If copy = True or the self Tensor is on a different device, the - returned tensor is a copy of self with the desired torch.device. - If copy = False and the self Tensor already has the correct torch.device, - then self is returned. - - Args: - device: Device (as str or torch.device) for the new tensor. - copy: Boolean indicator whether or not to clone self. Default False. - - Returns: - Volumes object. - """ - device_ = make_device(device) - if not copy and self.device == device_: - return self - - other = self.clone() - if self.device == device_: - return other - - other.device = device_ - other._densities = self._densities.to(device_) - if self._features is not None: - # pyre-fixme[16]: `Optional` has no attribute `to`. - other._features = self.features().to(device_) - self.locator._copy_transform_and_sizes(other.locator, device=device_) - other.locator = other.locator.to(device, copy) - return other - - def cpu(self) -> "Volumes": - return self.to("cpu") - - def cuda(self) -> "Volumes": - return self.to("cuda") - - def get_grid_sizes(self) -> torch.LongTensor: - """ - Returns the sizes of individual volumetric grids in the structure. - - Returns: - **grid_sizes**: Tensor of spatial sizes of each of the volumes - of size (batchsize, 3), where i-th row holds (D_i, H_i, W_i). - """ - return self.locator.get_grid_sizes() - - def get_local_to_world_coords_transform(self) -> Transform3d: - """ - Return a Transform3d object that converts points in the - the local coordinate frame of the volume to world coordinates. - Local volume coordinates are scaled s.t. the coordinates along one - side of the volume are in range [-1, 1]. - - Returns: - **local_to_world_transform**: A Transform3d object converting - points from local coordinates to the world coordinates. - """ - return self.locator.get_local_to_world_coords_transform() - - def get_world_to_local_coords_transform(self) -> Transform3d: - """ - Return a Transform3d object that converts points in the - world coordinates to the local coordinate frame of the volume. - Local volume coordinates are scaled s.t. the coordinates along one - side of the volume are in range [-1, 1]. - - Returns: - **world_to_local_transform**: A Transform3d object converting - points from world coordinates to local coordinates. - """ - return self.get_local_to_world_coords_transform().inverse() - - def world_to_local_coords(self, points_3d_world: torch.Tensor) -> torch.Tensor: - """ - Convert a batch of 3D point coordinates `points_3d_world` of shape - (minibatch, ..., dim) in the world coordinates to - the local coordinate frame of the volume. Local volume - coordinates are scaled s.t. the coordinates along one side of the volume - are in range [-1, 1]. - - Args: - **points_3d_world**: A tensor of shape `(minibatch, ..., 3)` - containing the 3D coordinates of a set of points that will - be converted from the local volume coordinates (ranging - within [-1, 1]) to the world coordinates - defined by the `self.center` and `self.voxel_size` parameters. - - Returns: - **points_3d_local**: `points_3d_world` converted to the local - volume coordinates of shape `(minibatch, ..., 3)`. - """ - return self.locator.world_to_local_coords(points_3d_world) - - def local_to_world_coords(self, points_3d_local: torch.Tensor) -> torch.Tensor: - """ - Convert a batch of 3D point coordinates `points_3d_local` of shape - (minibatch, ..., dim) in the local coordinate frame of the volume - to the world coordinates. - - Args: - **points_3d_local**: A tensor of shape `(minibatch, ..., 3)` - containing the 3D coordinates of a set of points that will - be converted from the local volume coordinates (ranging - within [-1, 1]) to the world coordinates - defined by the `self.center` and `self.voxel_size` parameters. - - Returns: - **points_3d_world**: `points_3d_local` converted to the world - coordinates of the volume of shape `(minibatch, ..., 3)`. - """ - return self.locator.local_to_world_coords(points_3d_local) - - def get_coord_grid(self, world_coordinates: bool = True) -> torch.Tensor: - """ - Return the 3D coordinate grid of the volumetric grid - in local (`world_coordinates=False`) or world coordinates - (`world_coordinates=True`). - - The grid records location of each center of the corresponding volume voxel. - - Local coordinates are scaled s.t. the values along one side of the - volume are in range [-1, 1]. - - Args: - **world_coordinates**: if `True`, the method - returns the grid in the world coordinates, - otherwise, in local coordinates. - - Returns: - **coordinate_grid**: The grid of coordinates of shape - `(minibatch, depth, height, width, 3)`, where `minibatch`, - `height`, `width` and `depth` are the batch size, height, width - and depth of the volume `features` or `densities`. - """ - return self.locator.get_coord_grid(world_coordinates) - - -class VolumeLocator: - """ - The `VolumeLocator` class keeps track of the locations of the - centers of the volume cells in the local volume coordinates as well as in - the world coordinates for a voxel grid structure in 3D. - - Local coordinates: - - Represent the locations of the volume cells in the local coordinate - frame of the volume. - - The center of the voxel indexed with `[Β·, Β·, 0, 0, 0]` in the volume - has its 3D local coordinate set to `[-1, -1, -1]`, while the voxel - at index `[Β·, Β·, depth_i-1, height_i-1, width_i-1]` has its - 3D local coordinate set to `[1, 1, 1]`. - - The first/second/third coordinate of each of the 3D per-voxel - XYZ vector denotes the horizontal/vertical/depth-wise position - respectively. I.e the order of the coordinate dimensions in the - volume is reversed w.r.t. the order of the 3D coordinate vectors. - - The intermediate coordinates between `[-1, -1, -1]` and `[1, 1, 1]`. - are linearly interpolated over the spatial dimensions of the volume. - - Note that the convention is the same as for the 5D version of the - `torch.nn.functional.grid_sample` function called with - the same value of `align_corners` argument. - - Note that the local coordinate convention of `VolumeLocator` - (+X = left to right, +Y = top to bottom, +Z = away from the user) - is *different* from the world coordinate convention of the - renderer for `Meshes` or `Pointclouds` - (+X = right to left, +Y = bottom to top, +Z = away from the user). - - World coordinates: - - These define the locations of the centers of the volume cells - in the world coordinates. - - They are specified with the following mapping that converts - points `x_local` in the local coordinates to points `x_world` - in the world coordinates:: - - x_world = ( - x_local * (volume_size - 1) * 0.5 * voxel_size - ) - volume_translation, - - here `voxel_size` specifies the size of each voxel of the volume, - and `volume_translation` is the 3D offset of the central voxel of - the volume w.r.t. the origin of the world coordinate frame. - Both `voxel_size` and `volume_translation` are specified in - the world coordinate units. `volume_size` is the spatial size of - the volume in form of a 3D vector `[width, height, depth]`. - - Given the above definition of `x_world`, one can derive the - inverse mapping from `x_world` to `x_local` as follows:: - - x_local = ( - (x_world + volume_translation) / (0.5 * voxel_size) - ) / (volume_size - 1) - - - For a trivial volume with `volume_translation==[0, 0, 0]` - with `voxel_size=-1`, `x_world` would range - from -(volume_size-1)/2` to `+(volume_size-1)/2`. - - Coordinate tensors that denote the locations of each of the volume cells in - local / world coordinates (with shape `(depth x height x width x 3)`) - can be retrieved by calling the `VolumeLocator.get_coord_grid()` getter with the - appropriate `world_coordinates` argument. - - Internally, the mapping between `x_local` and `x_world` is represented - as a `Transform3d` object `VolumeLocator._local_to_world_transform`. - Users can access the relevant transformations with the - `VolumeLocator.get_world_to_local_coords_transform()` and - `VolumeLocator.get_local_to_world_coords_transform()` - functions. - - Example coordinate conversion: - - For a "trivial" volume with `voxel_size = 1.`, - `volume_translation=[0., 0., 0.]`, and the spatial size of - `DxHxW = 5x5x5`, the point `x_world = (-2, 0, 2)` gets mapped - to `x_local=(-1, 0, 1)`. - - For a "trivial" volume `v` with `voxel_size = 1.`, - `volume_translation=[0., 0., 0.]`, the following holds:: - - torch.nn.functional.grid_sample( - v.densities(), - v.get_coord_grid(world_coordinates=False), - align_corners=align_corners, - ) == v.densities(), - - i.e. sampling the volume at trivial local coordinates - (no scaling with `voxel_size`` or shift with `volume_translation`) - results in the same volume. - """ - - def __init__( - self, - batch_size: int, - grid_sizes: Union[ - torch.LongTensor, Tuple[int, int, int], List[torch.LongTensor] - ], - device: torch.device, - voxel_size: _VoxelSize = 1.0, - volume_translation: _Translation = (0.0, 0.0, 0.0), - align_corners: bool = True, - ): - """ - **batch_size** : Batch size of the underlying grids - **grid_sizes** : Represents the resolutions of different grids in the batch. Can be - a) tuple of form (H, W, D) - b) list/tuple of length batch_size of lists/tuples of form (H, W, D) - c) torch.Tensor of shape (batch_size, H, W, D) - H, W, D are height, width, depth respectively. If `grid_sizes` is a tuple than - all the grids in the batch have the same resolution. - **voxel_size**: Denotes the size of each volume voxel in world units. - Has to be one of: - a) A scalar (square voxels) - b) 3-tuple or a 3-list of scalars - c) a Tensor of shape (3,) - d) a Tensor of shape (minibatch, 3) - e) a Tensor of shape (minibatch, 1) - f) a Tensor of shape (1,) (square voxels) - **volume_translation**: Denotes the 3D translation of the center - of the volume in world units. Has to be one of: - a) 3-tuple or a 3-list of scalars - b) a Tensor of shape (3,) - c) a Tensor of shape (minibatch, 3) - d) a Tensor of shape (1,) (square voxels) - **align_corners**: If set (default), the coordinates of the corner voxels are - exactly βˆ’1 or +1 in the local coordinate system. Otherwise, the coordinates - correspond to the centers of the corner voxels. Cf. the namesake argument to - `torch.nn.functional.grid_sample`. - """ - self.device = device - self._batch_size = batch_size - self._grid_sizes = self._convert_grid_sizes2tensor(grid_sizes) - self._resolution = tuple(torch.max(self._grid_sizes.cpu(), dim=0).values) - self._align_corners = align_corners - - # set the local_to_world transform - self._set_local_to_world_transform( - voxel_size=voxel_size, - volume_translation=volume_translation, - ) - - def _convert_grid_sizes2tensor( - self, x: Union[torch.LongTensor, List[torch.LongTensor], Tuple[int, int, int]] - ) -> torch.LongTensor: - """ - Handle the grid_sizes argument to the constructor. - """ - if isinstance(x, (list, tuple)): - if isinstance(x[0], (torch.LongTensor, list, tuple)): - if self._batch_size != len(x): - raise ValueError("x should have a batch size of 'batch_size'") - # pyre-ignore[6] - if any(len(x_) != 3 for x_ in x): - raise ValueError( - "`grid_sizes` has to be a list of 3-dim tensors of shape: " - "(height, width, depth)" - ) - x_shapes = torch.stack( - [ - torch.tensor( - # pyre-ignore[6] - list(x_), - dtype=torch.long, - device=self.device, - ) - for x_ in x - ], - dim=0, - ) - elif isinstance(x[0], int): - x_shapes = torch.stack( - [ - torch.tensor(list(x), dtype=torch.long, device=self.device) - for _ in range(self._batch_size) - ], - dim=0, - ) - else: - raise ValueError( - "`grid_sizes` can be a list/tuple of int or torch.Tensor not of " - + "{type(x[0])}." - ) - - elif torch.is_tensor(x): - if x.ndim != 2: - raise ValueError( - "`grid_sizes` has to be a 2-dim tensor of shape: (minibatch, 3)" - ) - x_shapes = x.to(self.device) - else: - raise ValueError( - "grid_sizes must be either a list of tensors with shape (H, W, D), tensor with" - "shape (batch_size, H, W, D) or a tuple of (H, W, D)." - ) - # pyre-ignore[7] - return x_shapes - - def _voxel_size_translation_to_transform( - self, - voxel_size: torch.Tensor, - volume_translation: torch.Tensor, - batch_size: int, - ) -> Transform3d: - """ - Converts the `voxel_size` and `volume_translation` constructor arguments - to the internal `Transform3d` object `local_to_world_transform`. - """ - volume_size_zyx = self.get_grid_sizes().float() - volume_size_xyz = volume_size_zyx[:, [2, 1, 0]] - - # x_local = ( - # (x_world + volume_translation) / (0.5 * voxel_size) - # ) / (volume_size - 1) - - # x_world = ( - # x_local * (volume_size - 1) * 0.5 * voxel_size - # ) - volume_translation - - local_to_world_transform = Scale( - (volume_size_xyz - 1) * voxel_size * 0.5, device=self.device - ).translate(-volume_translation) - - return local_to_world_transform - - def get_coord_grid(self, world_coordinates: bool = True) -> torch.Tensor: - """ - Return the 3D coordinate grid of the volumetric grid - in local (`world_coordinates=False`) or world coordinates - (`world_coordinates=True`). - - The grid records location of each center of the corresponding volume voxel. - - Local coordinates are scaled s.t. the values along one side of the - volume are in range [-1, 1]. - - Args: - **world_coordinates**: if `True`, the method - returns the grid in the world coordinates, - otherwise, in local coordinates. - - Returns: - **coordinate_grid**: The grid of coordinates of shape - `(minibatch, depth, height, width, 3)`, where `minibatch`, - `height`, `width` and `depth` are the batch size, height, width - and depth of the volume `features` or `densities`. - """ - # TODO(dnovotny): Implement caching of the coordinate grid. - return self._calculate_coordinate_grid(world_coordinates=world_coordinates) - - def _calculate_coordinate_grid( - self, world_coordinates: bool = True - ) -> torch.Tensor: - """ - Calculate the 3D coordinate grid of the volumetric grid either - in local (`world_coordinates=False`) or - world coordinates (`world_coordinates=True`) . - """ - - ba, (de, he, wi) = self._batch_size, self._resolution - grid_sizes = self.get_grid_sizes() - - # generate coordinate axes - def corner_coord_adjustment(r): - return 0.0 if self._align_corners else 1.0 / r - - vol_axes = [ - torch.linspace( - -1.0 + corner_coord_adjustment(r), - 1.0 - corner_coord_adjustment(r), - r, - dtype=torch.float32, - device=self.device, - ) - for r in (de, he, wi) - ] - - # generate per-coord meshgrids - Z, Y, X = meshgrid_ij(vol_axes) - - # stack the coord grids ... this order matches the coordinate convention - # of torch.nn.grid_sample - vol_coords_local = torch.stack((X, Y, Z), dim=3)[None].repeat(ba, 1, 1, 1, 1) - - # get grid sizes relative to the maximal volume size - grid_sizes_relative = ( - torch.tensor([[de, he, wi]], device=grid_sizes.device, dtype=torch.float32) - - 1 - ) / (grid_sizes - 1).float() - - if (grid_sizes_relative != 1.0).any(): - # if any of the relative sizes != 1.0, adjust the grid - grid_sizes_relative_reshape = grid_sizes_relative[:, [2, 1, 0]][ - :, None, None, None - ] - vol_coords_local *= grid_sizes_relative_reshape - vol_coords_local += grid_sizes_relative_reshape - 1 - - if world_coordinates: - vol_coords = self.local_to_world_coords(vol_coords_local) - else: - vol_coords = vol_coords_local - - return vol_coords - - def get_local_to_world_coords_transform(self) -> Transform3d: - """ - Return a Transform3d object that converts points in the - the local coordinate frame of the volume to world coordinates. - Local volume coordinates are scaled s.t. the coordinates along one - side of the volume are in range [-1, 1]. - - Returns: - **local_to_world_transform**: A Transform3d object converting - points from local coordinates to the world coordinates. - """ - return self._local_to_world_transform - - def get_world_to_local_coords_transform(self) -> Transform3d: - """ - Return a Transform3d object that converts points in the - world coordinates to the local coordinate frame of the volume. - Local volume coordinates are scaled s.t. the coordinates along one - side of the volume are in range [-1, 1]. - - Returns: - **world_to_local_transform**: A Transform3d object converting - points from world coordinates to local coordinates. - """ - return self.get_local_to_world_coords_transform().inverse() - - def world_to_local_coords(self, points_3d_world: torch.Tensor) -> torch.Tensor: - """ - Convert a batch of 3D point coordinates `points_3d_world` of shape - (minibatch, ..., dim) in the world coordinates to - the local coordinate frame of the volume. Local volume - coordinates are scaled s.t. the coordinates along one side of the volume - are in range [-1, 1]. - - Args: - **points_3d_world**: A tensor of shape `(minibatch, ..., 3)` - containing the 3D coordinates of a set of points that will - be converted from the local volume coordinates (ranging - within [-1, 1]) to the world coordinates - defined by the `self.center` and `self.voxel_size` parameters. - - Returns: - **points_3d_local**: `points_3d_world` converted to the local - volume coordinates of shape `(minibatch, ..., 3)`. - """ - pts_shape = points_3d_world.shape - return ( - self.get_world_to_local_coords_transform() - .transform_points(points_3d_world.view(pts_shape[0], -1, 3)) - .view(pts_shape) - ) - - def local_to_world_coords(self, points_3d_local: torch.Tensor) -> torch.Tensor: - """ - Convert a batch of 3D point coordinates `points_3d_local` of shape - (minibatch, ..., dim) in the local coordinate frame of the volume - to the world coordinates. - - Args: - **points_3d_local**: A tensor of shape `(minibatch, ..., 3)` - containing the 3D coordinates of a set of points that will - be converted from the local volume coordinates (ranging - within [-1, 1]) to the world coordinates - defined by the `self.center` and `self.voxel_size` parameters. - - Returns: - **points_3d_world**: `points_3d_local` converted to the world - coordinates of the volume of shape `(minibatch, ..., 3)`. - """ - pts_shape = points_3d_local.shape - return ( - self.get_local_to_world_coords_transform() - .transform_points(points_3d_local.view(pts_shape[0], -1, 3)) - .view(pts_shape) - ) - - def get_grid_sizes(self) -> torch.LongTensor: - """ - Returns the sizes of individual volumetric grids in the structure. - - Returns: - **grid_sizes**: Tensor of spatial sizes of each of the volumes - of size (batchsize, 3), where i-th row holds (D_i, H_i, W_i). - """ - return self._grid_sizes - - def _set_local_to_world_transform( - self, - voxel_size: _VoxelSize = 1.0, - volume_translation: _Translation = (0.0, 0.0, 0.0), - ): - """ - Sets the internal representation of the transformation between the - world and local volume coordinates by specifying - `voxel_size` and `volume_translation` - - Args: - **voxel_size**: Denotes the size of input voxels. Has to be one of: - a) A scalar (square voxels) - b) 3-tuple or a 3-list of scalars - c) a Tensor of shape (3,) - d) a Tensor of shape (minibatch, 3) - e) a Tensor of shape (1,) (square voxels) - **volume_translation**: Denotes the 3D translation of the center - of the volume in world units. Has to be one of: - a) 3-tuple or a 3-list of scalars - b) a Tensor of shape (3,) - c) a Tensor of shape (minibatch, 3) - d) a Tensor of shape (1,) (square voxels) - """ - # handle voxel size and center - # here we force the tensors to lie on self.device - voxel_size = self._handle_voxel_size(voxel_size, len(self)) - volume_translation = self._handle_volume_translation( - volume_translation, len(self) - ) - self._local_to_world_transform = self._voxel_size_translation_to_transform( - voxel_size, volume_translation, len(self) - ) - - def _copy_transform_and_sizes( - self, - other: "VolumeLocator", - device: Optional[torch.device] = None, - index: Optional[ - Union[int, List[int], Tuple[int], slice, torch.Tensor] - ] = _ALL_CONTENT, - ) -> None: - """ - Copies the local to world transform and grid sizes to other VolumeLocator object - and moves it to specified device. Operates in place on other. - - Args: - other: VolumeLocator object to which to copy - device: torch.device on which to put the result, defatults to self.device - index: Specifies which parts to copy. - Can be an int, slice, list of ints or a boolean or a long tensor. - Defaults to all items (`:`). - """ - device = device if device is not None else self.device - other._grid_sizes = self._grid_sizes[index].to(device) - other._local_to_world_transform = self.get_local_to_world_coords_transform()[ - # pyre-fixme[6]: For 1st param expected `Union[List[int], int, slice, - # BoolTensor, LongTensor]` but got `Union[None, List[int], Tuple[int], - # int, slice, Tensor]`. - index - ].to(device) - - def _handle_voxel_size( - self, voxel_size: _VoxelSize, batch_size: int - ) -> torch.Tensor: - """ - Handle the `voxel_size` argument to the `VolumeLocator` constructor. - """ - err_msg = ( - "voxel_size has to be either a 3-tuple of scalars, or a scalar, or" - " a torch.Tensor of shape (3,) or (1,) or (minibatch, 3) or (minibatch, 1)." - ) - if isinstance(voxel_size, (float, int)): - # convert a scalar to a 3-element tensor - voxel_size = torch.full( - (1, 3), voxel_size, device=self.device, dtype=torch.float32 - ) - elif isinstance(voxel_size, torch.Tensor): - if voxel_size.numel() == 1: - # convert a single-element tensor to a 3-element one - voxel_size = voxel_size.view(-1).repeat(3) - elif len(voxel_size.shape) == 2 and ( - voxel_size.shape[0] == batch_size and voxel_size.shape[1] == 1 - ): - voxel_size = voxel_size.repeat(1, 3) - return self._convert_volume_property_to_tensor(voxel_size, batch_size, err_msg) - - def _handle_volume_translation( - self, translation: _Translation, batch_size: int - ) -> torch.Tensor: - """ - Handle the `volume_translation` argument to the `VolumeLocator` constructor. - """ - err_msg = ( - "`volume_translation` has to be either a 3-tuple of scalars, or" - " a Tensor of shape (1,3) or (minibatch, 3) or (3,)`." - ) - return self._convert_volume_property_to_tensor(translation, batch_size, err_msg) - - def __len__(self) -> int: - return self._batch_size - - def _convert_volume_property_to_tensor( - self, x: _Vector, batch_size: int, err_msg: str - ) -> torch.Tensor: - """ - Handle the `volume_translation` or `voxel_size` argument to - the VolumeLocator constructor. - Return a tensor of shape (N, 3) where N is the batch_size. - """ - if isinstance(x, (list, tuple)): - if len(x) != 3: - raise ValueError(err_msg) - x = torch.tensor(x, device=self.device, dtype=torch.float32)[None] - x = x.repeat((batch_size, 1)) - elif isinstance(x, torch.Tensor): - ok = ( - (x.shape[0] == 1 and x.shape[1] == 3) - or (x.shape[0] == 3 and len(x.shape) == 1) - or (x.shape[0] == batch_size and x.shape[1] == 3) - ) - if not ok: - raise ValueError(err_msg) - if x.device != self.device: - x = x.to(self.device) - if x.shape[0] == 3 and len(x.shape) == 1: - x = x[None] - if x.shape[0] == 1: - x = x.repeat((batch_size, 1)) - else: - raise ValueError(err_msg) - - return x - - def to(self, device: Device, copy: bool = False) -> "VolumeLocator": - """ - Match the functionality of torch.Tensor.to() - If copy = True or the self Tensor is on a different device, the - returned tensor is a copy of self with the desired torch.device. - If copy = False and the self Tensor already has the correct torch.device, - then self is returned. - - Args: - device: Device (as str or torch.device) for the new tensor. - copy: Boolean indicator whether or not to clone self. Default False. - - Returns: - VolumeLocator object. - """ - device_ = make_device(device) - if not copy and self.device == device_: - return self - - other = self.clone() - if self.device == device_: - return other - - other.device = device_ - other._grid_sizes = self._grid_sizes.to(device_) - other._local_to_world_transform = self.get_local_to_world_coords_transform().to( - device - ) - return other - - def clone(self) -> "VolumeLocator": - """ - Deep copy of VoluVolumeLocatormes object. All internal tensors are cloned - individually. - - Returns: - new VolumeLocator object. - """ - return copy.deepcopy(self) - - def cpu(self) -> "VolumeLocator": - return self.to("cpu") - - def cuda(self) -> "VolumeLocator": - return self.to("cuda") diff --git a/pytorch3d/pytorch3d/transforms/__init__.py b/pytorch3d/pytorch3d/transforms/__init__.py deleted file mode 100644 index 9d8ee713452e2a5ebd95c11e8c4db036d219b598..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/transforms/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .math import acos_linear_extrapolation -from .rotation_conversions import ( - axis_angle_to_matrix, - axis_angle_to_quaternion, - euler_angles_to_matrix, - matrix_to_axis_angle, - matrix_to_euler_angles, - matrix_to_quaternion, - matrix_to_rotation_6d, - quaternion_apply, - quaternion_invert, - quaternion_multiply, - quaternion_raw_multiply, - quaternion_to_axis_angle, - quaternion_to_matrix, - random_quaternions, - random_rotation, - random_rotations, - rotation_6d_to_matrix, - standardize_quaternion, -) -from .se3 import se3_exp_map, se3_log_map -from .so3 import ( - so3_exp_map, - so3_exponential_map, - so3_log_map, - so3_relative_angle, - so3_rotation_angle, -) -from .transform3d import Rotate, RotateAxisAngle, Scale, Transform3d, Translate - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/transforms/math.py b/pytorch3d/pytorch3d/transforms/math.py deleted file mode 100644 index e2b93c9337996751569106795ba4785c99d1051c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/transforms/math.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -from typing import Tuple - -import torch - - -DEFAULT_ACOS_BOUND: float = 1.0 - 1e-4 - - -def acos_linear_extrapolation( - x: torch.Tensor, - bounds: Tuple[float, float] = (-DEFAULT_ACOS_BOUND, DEFAULT_ACOS_BOUND), -) -> torch.Tensor: - """ - Implements `arccos(x)` which is linearly extrapolated outside `x`'s original - domain of `(-1, 1)`. This allows for stable backpropagation in case `x` - is not guaranteed to be strictly within `(-1, 1)`. - - More specifically:: - - bounds=(lower_bound, upper_bound) - if lower_bound <= x <= upper_bound: - acos_linear_extrapolation(x) = acos(x) - elif x <= lower_bound: # 1st order Taylor approximation - acos_linear_extrapolation(x) - = acos(lower_bound) + dacos/dx(lower_bound) * (x - lower_bound) - else: # x >= upper_bound - acos_linear_extrapolation(x) - = acos(upper_bound) + dacos/dx(upper_bound) * (x - upper_bound) - - Args: - x: Input `Tensor`. - bounds: A float 2-tuple defining the region for the - linear extrapolation of `acos`. - The first/second element of `bound` - describes the lower/upper bound that defines the lower/upper - extrapolation region, i.e. the region where - `x <= bound[0]`/`bound[1] <= x`. - Note that all elements of `bound` have to be within (-1, 1). - Returns: - acos_linear_extrapolation: `Tensor` containing the extrapolated `arccos(x)`. - """ - - lower_bound, upper_bound = bounds - - if lower_bound > upper_bound: - raise ValueError("lower bound has to be smaller or equal to upper bound.") - - if lower_bound <= -1.0 or upper_bound >= 1.0: - raise ValueError("Both lower bound and upper bound have to be within (-1, 1).") - - # init an empty tensor and define the domain sets - acos_extrap = torch.empty_like(x) - x_upper = x >= upper_bound - x_lower = x <= lower_bound - x_mid = (~x_upper) & (~x_lower) - - # acos calculation for upper_bound < x < lower_bound - acos_extrap[x_mid] = torch.acos(x[x_mid]) - # the linear extrapolation for x >= upper_bound - acos_extrap[x_upper] = _acos_linear_approximation(x[x_upper], upper_bound) - # the linear extrapolation for x <= lower_bound - acos_extrap[x_lower] = _acos_linear_approximation(x[x_lower], lower_bound) - - return acos_extrap - - -def _acos_linear_approximation(x: torch.Tensor, x0: float) -> torch.Tensor: - """ - Calculates the 1st order Taylor expansion of `arccos(x)` around `x0`. - """ - return (x - x0) * _dacos_dx(x0) + math.acos(x0) - - -def _dacos_dx(x: float) -> float: - """ - Calculates the derivative of `arccos(x)` w.r.t. `x`. - """ - return (-1.0) / math.sqrt(1.0 - x * x) diff --git a/pytorch3d/pytorch3d/transforms/rotation_conversions.py b/pytorch3d/pytorch3d/transforms/rotation_conversions.py deleted file mode 100644 index 459441ca184ff484e252b2b4e4fc86b9b24d4c0e..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/transforms/rotation_conversions.py +++ /dev/null @@ -1,596 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Optional - -import torch -import torch.nn.functional as F - -from ..common.datatypes import Device - - -""" -The transformation matrices returned from the functions in this file assume -the points on which the transformation will be applied are column vectors. -i.e. the R matrix is structured as - - R = [ - [Rxx, Rxy, Rxz], - [Ryx, Ryy, Ryz], - [Rzx, Rzy, Rzz], - ] # (3, 3) - -This matrix can be applied to column vectors by post multiplication -by the points e.g. - - points = [[0], [1], [2]] # (3 x 1) xyz coordinates of a point - transformed_points = R * points - -To apply the same matrix to points which are row vectors, the R matrix -can be transposed and pre multiplied by the points: - -e.g. - points = [[0, 1, 2]] # (1 x 3) xyz coordinates of a point - transformed_points = points * R.transpose(1, 0) -""" - - -def quaternion_to_matrix(quaternions: torch.Tensor) -> torch.Tensor: - """ - Convert rotations given as quaternions to rotation matrices. - - Args: - quaternions: quaternions with real part first, - as tensor of shape (..., 4). - - Returns: - Rotation matrices as tensor of shape (..., 3, 3). - """ - r, i, j, k = torch.unbind(quaternions, -1) - # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. - two_s = 2.0 / (quaternions * quaternions).sum(-1) - - o = torch.stack( - ( - 1 - two_s * (j * j + k * k), - two_s * (i * j - k * r), - two_s * (i * k + j * r), - two_s * (i * j + k * r), - 1 - two_s * (i * i + k * k), - two_s * (j * k - i * r), - two_s * (i * k - j * r), - two_s * (j * k + i * r), - 1 - two_s * (i * i + j * j), - ), - -1, - ) - return o.reshape(quaternions.shape[:-1] + (3, 3)) - - -def _copysign(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: - """ - Return a tensor where each element has the absolute value taken from the, - corresponding element of a, with sign taken from the corresponding - element of b. This is like the standard copysign floating-point operation, - but is not careful about negative 0 and NaN. - - Args: - a: source tensor. - b: tensor whose signs will be used, of the same shape as a. - - Returns: - Tensor of the same shape as a with the signs of b. - """ - signs_differ = (a < 0) != (b < 0) - return torch.where(signs_differ, -a, a) - - -def _sqrt_positive_part(x: torch.Tensor) -> torch.Tensor: - """ - Returns torch.sqrt(torch.max(0, x)) - but with a zero subgradient where x is 0. - """ - ret = torch.zeros_like(x) - positive_mask = x > 0 - ret[positive_mask] = torch.sqrt(x[positive_mask]) - return ret - - -def matrix_to_quaternion(matrix: torch.Tensor) -> torch.Tensor: - """ - Convert rotations given as rotation matrices to quaternions. - - Args: - matrix: Rotation matrices as tensor of shape (..., 3, 3). - - Returns: - quaternions with real part first, as tensor of shape (..., 4). - """ - if matrix.size(-1) != 3 or matrix.size(-2) != 3: - raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.") - - batch_dim = matrix.shape[:-2] - m00, m01, m02, m10, m11, m12, m20, m21, m22 = torch.unbind( - matrix.reshape(batch_dim + (9,)), dim=-1 - ) - - q_abs = _sqrt_positive_part( - torch.stack( - [ - 1.0 + m00 + m11 + m22, - 1.0 + m00 - m11 - m22, - 1.0 - m00 + m11 - m22, - 1.0 - m00 - m11 + m22, - ], - dim=-1, - ) - ) - - # we produce the desired quaternion multiplied by each of r, i, j, k - quat_by_rijk = torch.stack( - [ - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and - # `int`. - torch.stack([q_abs[..., 0] ** 2, m21 - m12, m02 - m20, m10 - m01], dim=-1), - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and - # `int`. - torch.stack([m21 - m12, q_abs[..., 1] ** 2, m10 + m01, m02 + m20], dim=-1), - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and - # `int`. - torch.stack([m02 - m20, m10 + m01, q_abs[..., 2] ** 2, m12 + m21], dim=-1), - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and - # `int`. - torch.stack([m10 - m01, m20 + m02, m21 + m12, q_abs[..., 3] ** 2], dim=-1), - ], - dim=-2, - ) - - # We floor here at 0.1 but the exact level is not important; if q_abs is small, - # the candidate won't be picked. - flr = torch.tensor(0.1).to(dtype=q_abs.dtype, device=q_abs.device) - quat_candidates = quat_by_rijk / (2.0 * q_abs[..., None].max(flr)) - - # if not for numerical problems, quat_candidates[i] should be same (up to a sign), - # forall i; we pick the best-conditioned one (with the largest denominator) - out = quat_candidates[ - F.one_hot(q_abs.argmax(dim=-1), num_classes=4) > 0.5, : - ].reshape(batch_dim + (4,)) - return standardize_quaternion(out) - - -def _axis_angle_rotation(axis: str, angle: torch.Tensor) -> torch.Tensor: - """ - Return the rotation matrices for one of the rotations about an axis - of which Euler angles describe, for each value of the angle given. - - Args: - axis: Axis label "X" or "Y or "Z". - angle: any shape tensor of Euler angles in radians - - Returns: - Rotation matrices as tensor of shape (..., 3, 3). - """ - - cos = torch.cos(angle) - sin = torch.sin(angle) - one = torch.ones_like(angle) - zero = torch.zeros_like(angle) - - if axis == "X": - R_flat = (one, zero, zero, zero, cos, -sin, zero, sin, cos) - elif axis == "Y": - R_flat = (cos, zero, sin, zero, one, zero, -sin, zero, cos) - elif axis == "Z": - R_flat = (cos, -sin, zero, sin, cos, zero, zero, zero, one) - else: - raise ValueError("letter must be either X, Y or Z.") - - return torch.stack(R_flat, -1).reshape(angle.shape + (3, 3)) - - -def euler_angles_to_matrix(euler_angles: torch.Tensor, convention: str) -> torch.Tensor: - """ - Convert rotations given as Euler angles in radians to rotation matrices. - - Args: - euler_angles: Euler angles in radians as tensor of shape (..., 3). - convention: Convention string of three uppercase letters from - {"X", "Y", and "Z"}. - - Returns: - Rotation matrices as tensor of shape (..., 3, 3). - """ - if euler_angles.dim() == 0 or euler_angles.shape[-1] != 3: - raise ValueError("Invalid input euler angles.") - if len(convention) != 3: - raise ValueError("Convention must have 3 letters.") - if convention[1] in (convention[0], convention[2]): - raise ValueError(f"Invalid convention {convention}.") - for letter in convention: - if letter not in ("X", "Y", "Z"): - raise ValueError(f"Invalid letter {letter} in convention string.") - matrices = [ - _axis_angle_rotation(c, e) - for c, e in zip(convention, torch.unbind(euler_angles, -1)) - ] - # return functools.reduce(torch.matmul, matrices) - return torch.matmul(torch.matmul(matrices[0], matrices[1]), matrices[2]) - - -def _angle_from_tan( - axis: str, other_axis: str, data, horizontal: bool, tait_bryan: bool -) -> torch.Tensor: - """ - Extract the first or third Euler angle from the two members of - the matrix which are positive constant times its sine and cosine. - - Args: - axis: Axis label "X" or "Y or "Z" for the angle we are finding. - other_axis: Axis label "X" or "Y or "Z" for the middle axis in the - convention. - data: Rotation matrices as tensor of shape (..., 3, 3). - horizontal: Whether we are looking for the angle for the third axis, - which means the relevant entries are in the same row of the - rotation matrix. If not, they are in the same column. - tait_bryan: Whether the first and third axes in the convention differ. - - Returns: - Euler Angles in radians for each matrix in data as a tensor - of shape (...). - """ - - i1, i2 = {"X": (2, 1), "Y": (0, 2), "Z": (1, 0)}[axis] - if horizontal: - i2, i1 = i1, i2 - even = (axis + other_axis) in ["XY", "YZ", "ZX"] - if horizontal == even: - return torch.atan2(data[..., i1], data[..., i2]) - if tait_bryan: - return torch.atan2(-data[..., i2], data[..., i1]) - return torch.atan2(data[..., i2], -data[..., i1]) - - -def _index_from_letter(letter: str) -> int: - if letter == "X": - return 0 - if letter == "Y": - return 1 - if letter == "Z": - return 2 - raise ValueError("letter must be either X, Y or Z.") - - -def matrix_to_euler_angles(matrix: torch.Tensor, convention: str) -> torch.Tensor: - """ - Convert rotations given as rotation matrices to Euler angles in radians. - - Args: - matrix: Rotation matrices as tensor of shape (..., 3, 3). - convention: Convention string of three uppercase letters. - - Returns: - Euler angles in radians as tensor of shape (..., 3). - """ - if len(convention) != 3: - raise ValueError("Convention must have 3 letters.") - if convention[1] in (convention[0], convention[2]): - raise ValueError(f"Invalid convention {convention}.") - for letter in convention: - if letter not in ("X", "Y", "Z"): - raise ValueError(f"Invalid letter {letter} in convention string.") - if matrix.size(-1) != 3 or matrix.size(-2) != 3: - raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.") - i0 = _index_from_letter(convention[0]) - i2 = _index_from_letter(convention[2]) - tait_bryan = i0 != i2 - if tait_bryan: - central_angle = torch.asin( - matrix[..., i0, i2] * (-1.0 if i0 - i2 in [-1, 2] else 1.0) - ) - else: - central_angle = torch.acos(matrix[..., i0, i0]) - - o = ( - _angle_from_tan( - convention[0], convention[1], matrix[..., i2], False, tait_bryan - ), - central_angle, - _angle_from_tan( - convention[2], convention[1], matrix[..., i0, :], True, tait_bryan - ), - ) - return torch.stack(o, -1) - - -def random_quaternions( - n: int, dtype: Optional[torch.dtype] = None, device: Optional[Device] = None -) -> torch.Tensor: - """ - Generate random quaternions representing rotations, - i.e. versors with nonnegative real part. - - Args: - n: Number of quaternions in a batch to return. - dtype: Type to return. - device: Desired device of returned tensor. Default: - uses the current device for the default tensor type. - - Returns: - Quaternions as tensor of shape (N, 4). - """ - if isinstance(device, str): - device = torch.device(device) - o = torch.randn((n, 4), dtype=dtype, device=device) - s = (o * o).sum(1) - o = o / _copysign(torch.sqrt(s), o[:, 0])[:, None] - return o - - -def random_rotations( - n: int, dtype: Optional[torch.dtype] = None, device: Optional[Device] = None -) -> torch.Tensor: - """ - Generate random rotations as 3x3 rotation matrices. - - Args: - n: Number of rotation matrices in a batch to return. - dtype: Type to return. - device: Device of returned tensor. Default: if None, - uses the current device for the default tensor type. - - Returns: - Rotation matrices as tensor of shape (n, 3, 3). - """ - quaternions = random_quaternions(n, dtype=dtype, device=device) - return quaternion_to_matrix(quaternions) - - -def random_rotation( - dtype: Optional[torch.dtype] = None, device: Optional[Device] = None -) -> torch.Tensor: - """ - Generate a single random 3x3 rotation matrix. - - Args: - dtype: Type to return - device: Device of returned tensor. Default: if None, - uses the current device for the default tensor type - - Returns: - Rotation matrix as tensor of shape (3, 3). - """ - return random_rotations(1, dtype, device)[0] - - -def standardize_quaternion(quaternions: torch.Tensor) -> torch.Tensor: - """ - Convert a unit quaternion to a standard form: one in which the real - part is non negative. - - Args: - quaternions: Quaternions with real part first, - as tensor of shape (..., 4). - - Returns: - Standardized quaternions as tensor of shape (..., 4). - """ - return torch.where(quaternions[..., 0:1] < 0, -quaternions, quaternions) - - -def quaternion_raw_multiply(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: - """ - Multiply two quaternions. - Usual torch rules for broadcasting apply. - - Args: - a: Quaternions as tensor of shape (..., 4), real part first. - b: Quaternions as tensor of shape (..., 4), real part first. - - Returns: - The product of a and b, a tensor of quaternions shape (..., 4). - """ - aw, ax, ay, az = torch.unbind(a, -1) - bw, bx, by, bz = torch.unbind(b, -1) - ow = aw * bw - ax * bx - ay * by - az * bz - ox = aw * bx + ax * bw + ay * bz - az * by - oy = aw * by - ax * bz + ay * bw + az * bx - oz = aw * bz + ax * by - ay * bx + az * bw - return torch.stack((ow, ox, oy, oz), -1) - - -def quaternion_multiply(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: - """ - Multiply two quaternions representing rotations, returning the quaternion - representing their composition, i.e. the versorΒ with nonnegative real part. - Usual torch rules for broadcasting apply. - - Args: - a: Quaternions as tensor of shape (..., 4), real part first. - b: Quaternions as tensor of shape (..., 4), real part first. - - Returns: - The product of a and b, a tensor of quaternions of shape (..., 4). - """ - ab = quaternion_raw_multiply(a, b) - return standardize_quaternion(ab) - - -def quaternion_invert(quaternion: torch.Tensor) -> torch.Tensor: - """ - Given a quaternion representing rotation, get the quaternion representing - its inverse. - - Args: - quaternion: Quaternions as tensor of shape (..., 4), with real part - first, which must be versors (unit quaternions). - - Returns: - The inverse, a tensor of quaternions of shape (..., 4). - """ - - scaling = torch.tensor([1, -1, -1, -1], device=quaternion.device) - return quaternion * scaling - - -def quaternion_apply(quaternion: torch.Tensor, point: torch.Tensor) -> torch.Tensor: - """ - Apply the rotation given by a quaternion to a 3D point. - Usual torch rules for broadcasting apply. - - Args: - quaternion: Tensor of quaternions, real part first, of shape (..., 4). - point: Tensor of 3D points of shape (..., 3). - - Returns: - Tensor of rotated points of shape (..., 3). - """ - if point.size(-1) != 3: - raise ValueError(f"Points are not in 3D, {point.shape}.") - real_parts = point.new_zeros(point.shape[:-1] + (1,)) - point_as_quaternion = torch.cat((real_parts, point), -1) - out = quaternion_raw_multiply( - quaternion_raw_multiply(quaternion, point_as_quaternion), - quaternion_invert(quaternion), - ) - return out[..., 1:] - - -def axis_angle_to_matrix(axis_angle: torch.Tensor) -> torch.Tensor: - """ - Convert rotations given as axis/angle to rotation matrices. - - Args: - axis_angle: Rotations given as a vector in axis angle form, - as a tensor of shape (..., 3), where the magnitude is - the angle turned anticlockwise in radians around the - vector's direction. - - Returns: - Rotation matrices as tensor of shape (..., 3, 3). - """ - return quaternion_to_matrix(axis_angle_to_quaternion(axis_angle)) - - -def matrix_to_axis_angle(matrix: torch.Tensor) -> torch.Tensor: - """ - Convert rotations given as rotation matrices to axis/angle. - - Args: - matrix: Rotation matrices as tensor of shape (..., 3, 3). - - Returns: - Rotations given as a vector in axis angle form, as a tensor - of shape (..., 3), where the magnitude is the angle - turned anticlockwise in radians around the vector's - direction. - """ - return quaternion_to_axis_angle(matrix_to_quaternion(matrix)) - - -def axis_angle_to_quaternion(axis_angle: torch.Tensor) -> torch.Tensor: - """ - Convert rotations given as axis/angle to quaternions. - - Args: - axis_angle: Rotations given as a vector in axis angle form, - as a tensor of shape (..., 3), where the magnitude is - the angle turned anticlockwise in radians around the - vector's direction. - - Returns: - quaternions with real part first, as tensor of shape (..., 4). - """ - angles = torch.norm(axis_angle, p=2, dim=-1, keepdim=True) - half_angles = angles * 0.5 - eps = 1e-6 - small_angles = angles.abs() < eps - sin_half_angles_over_angles = torch.empty_like(angles) - sin_half_angles_over_angles[~small_angles] = ( - torch.sin(half_angles[~small_angles]) / angles[~small_angles] - ) - # for x small, sin(x/2) is about x/2 - (x/2)^3/6 - # so sin(x/2)/x is about 1/2 - (x*x)/48 - sin_half_angles_over_angles[small_angles] = ( - 0.5 - (angles[small_angles] * angles[small_angles]) / 48 - ) - quaternions = torch.cat( - [torch.cos(half_angles), axis_angle * sin_half_angles_over_angles], dim=-1 - ) - return quaternions - - -def quaternion_to_axis_angle(quaternions: torch.Tensor) -> torch.Tensor: - """ - Convert rotations given as quaternions to axis/angle. - - Args: - quaternions: quaternions with real part first, - as tensor of shape (..., 4). - - Returns: - Rotations given as a vector in axis angle form, as a tensor - of shape (..., 3), where the magnitude is the angle - turned anticlockwise in radians around the vector's - direction. - """ - norms = torch.norm(quaternions[..., 1:], p=2, dim=-1, keepdim=True) - half_angles = torch.atan2(norms, quaternions[..., :1]) - angles = 2 * half_angles - eps = 1e-6 - small_angles = angles.abs() < eps - sin_half_angles_over_angles = torch.empty_like(angles) - sin_half_angles_over_angles[~small_angles] = ( - torch.sin(half_angles[~small_angles]) / angles[~small_angles] - ) - # for x small, sin(x/2) is about x/2 - (x/2)^3/6 - # so sin(x/2)/x is about 1/2 - (x*x)/48 - sin_half_angles_over_angles[small_angles] = ( - 0.5 - (angles[small_angles] * angles[small_angles]) / 48 - ) - return quaternions[..., 1:] / sin_half_angles_over_angles - - -def rotation_6d_to_matrix(d6: torch.Tensor) -> torch.Tensor: - """ - Converts 6D rotation representation by Zhou et al. [1] to rotation matrix - using Gram--Schmidt orthogonalization per Section B of [1]. - Args: - d6: 6D rotation representation, of size (*, 6) - - Returns: - batch of rotation matrices of size (*, 3, 3) - - [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H. - On the Continuity of Rotation Representations in Neural Networks. - IEEE Conference on Computer Vision and Pattern Recognition, 2019. - Retrieved from http://arxiv.org/abs/1812.07035 - """ - - a1, a2 = d6[..., :3], d6[..., 3:] - b1 = F.normalize(a1, dim=-1) - b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1 - b2 = F.normalize(b2, dim=-1) - b3 = torch.cross(b1, b2, dim=-1) - return torch.stack((b1, b2, b3), dim=-2) - - -def matrix_to_rotation_6d(matrix: torch.Tensor) -> torch.Tensor: - """ - Converts rotation matrices to 6D rotation representation by Zhou et al. [1] - by dropping the last row. Note that 6D representation is not unique. - Args: - matrix: batch of rotation matrices of size (*, 3, 3) - - Returns: - 6D rotation representation, of size (*, 6) - - [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H. - On the Continuity of Rotation Representations in Neural Networks. - IEEE Conference on Computer Vision and Pattern Recognition, 2019. - Retrieved from http://arxiv.org/abs/1812.07035 - """ - batch_dim = matrix.size()[:-2] - return matrix[..., :2, :].clone().reshape(batch_dim + (6,)) diff --git a/pytorch3d/pytorch3d/transforms/se3.py b/pytorch3d/pytorch3d/transforms/se3.py deleted file mode 100644 index 1c8a5a1b10827914ba48ff0ee8653d7aca93ac3a..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/transforms/se3.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch - -from .so3 import _so3_exp_map, hat, so3_log_map - - -def se3_exp_map(log_transform: torch.Tensor, eps: float = 1e-4) -> torch.Tensor: - """ - Convert a batch of logarithmic representations of SE(3) matrices `log_transform` - to a batch of 4x4 SE(3) matrices using the exponential map. - See e.g. [1], Sec 9.4.2. for more detailed description. - - A SE(3) matrix has the following form: - ``` - [ R 0 ] - [ T 1 ] , - ``` - where `R` is a 3x3 rotation matrix and `T` is a 3-D translation vector. - SE(3) matrices are commonly used to represent rigid motions or camera extrinsics. - - In the SE(3) logarithmic representation SE(3) matrices are - represented as 6-dimensional vectors `[log_translation | log_rotation]`, - i.e. a concatenation of two 3D vectors `log_translation` and `log_rotation`. - - The conversion from the 6D representation to a 4x4 SE(3) matrix `transform` - is done as follows: - ``` - transform = exp( [ hat(log_rotation) 0 ] - [ log_translation 1 ] ) , - ``` - where `exp` is the matrix exponential and `hat` is the Hat operator [2]. - - Note that for any `log_transform` with `0 <= ||log_rotation|| < 2pi` - (i.e. the rotation angle is between 0 and 2pi), the following identity holds: - ``` - se3_log_map(se3_exponential_map(log_transform)) == log_transform - ``` - - The conversion has a singularity around `||log(transform)|| = 0` - which is handled by clamping controlled with the `eps` argument. - - Args: - log_transform: Batch of vectors of shape `(minibatch, 6)`. - eps: A threshold for clipping the squared norm of the rotation logarithm - to avoid unstable gradients in the singular case. - - Returns: - Batch of transformation matrices of shape `(minibatch, 4, 4)`. - - Raises: - ValueError if `log_transform` is of incorrect shape. - - [1] https://jinyongjeong.github.io/Download/SE3/jlblanco2010geometry3d_techrep.pdf - [2] https://en.wikipedia.org/wiki/Hat_operator - """ - - if log_transform.ndim != 2 or log_transform.shape[1] != 6: - raise ValueError("Expected input to be of shape (N, 6).") - - N, _ = log_transform.shape - - log_translation = log_transform[..., :3] - log_rotation = log_transform[..., 3:] - - # rotation is an exponential map of log_rotation - ( - R, - rotation_angles, - log_rotation_hat, - log_rotation_hat_square, - ) = _so3_exp_map(log_rotation, eps=eps) - - # translation is V @ T - V = _se3_V_matrix( - log_rotation, - log_rotation_hat, - log_rotation_hat_square, - rotation_angles, - eps=eps, - ) - T = torch.bmm(V, log_translation[:, :, None])[:, :, 0] - - transform = torch.zeros( - N, 4, 4, dtype=log_transform.dtype, device=log_transform.device - ) - - transform[:, :3, :3] = R - transform[:, :3, 3] = T - transform[:, 3, 3] = 1.0 - - return transform.permute(0, 2, 1) - - -def se3_log_map( - transform: torch.Tensor, eps: float = 1e-4, cos_bound: float = 1e-4 -) -> torch.Tensor: - """ - Convert a batch of 4x4 transformation matrices `transform` - to a batch of 6-dimensional SE(3) logarithms of the SE(3) matrices. - See e.g. [1], Sec 9.4.2. for more detailed description. - - A SE(3) matrix has the following form: - ``` - [ R 0 ] - [ T 1 ] , - ``` - where `R` is an orthonormal 3x3 rotation matrix and `T` is a 3-D translation vector. - SE(3) matrices are commonly used to represent rigid motions or camera extrinsics. - - In the SE(3) logarithmic representation SE(3) matrices are - represented as 6-dimensional vectors `[log_translation | log_rotation]`, - i.e. a concatenation of two 3D vectors `log_translation` and `log_rotation`. - - The conversion from the 4x4 SE(3) matrix `transform` to the - 6D representation `log_transform = [log_translation | log_rotation]` - is done as follows: - ``` - log_transform = log(transform) - log_translation = log_transform[3, :3] - log_rotation = inv_hat(log_transform[:3, :3]) - ``` - where `log` is the matrix logarithm - and `inv_hat` is the inverse of the Hat operator [2]. - - Note that for any valid 4x4 `transform` matrix, the following identity holds: - ``` - se3_exp_map(se3_log_map(transform)) == transform - ``` - - The conversion has a singularity around `(transform=I)` which is handled - by clamping controlled with the `eps` and `cos_bound` arguments. - - Args: - transform: batch of SE(3) matrices of shape `(minibatch, 4, 4)`. - eps: A threshold for clipping the squared norm of the rotation logarithm - to avoid division by zero in the singular case. - cos_bound: Clamps the cosine of the rotation angle to - [-1 + cos_bound, 3 - cos_bound] to avoid non-finite outputs. - The non-finite outputs can be caused by passing small rotation angles - to the `acos` function in `so3_rotation_angle` of `so3_log_map`. - - Returns: - Batch of logarithms of input SE(3) matrices - of shape `(minibatch, 6)`. - - Raises: - ValueError if `transform` is of incorrect shape. - ValueError if `R` has an unexpected trace. - - [1] https://jinyongjeong.github.io/Download/SE3/jlblanco2010geometry3d_techrep.pdf - [2] https://en.wikipedia.org/wiki/Hat_operator - """ - - if transform.ndim != 3: - raise ValueError("Input tensor shape has to be (N, 4, 4).") - - N, dim1, dim2 = transform.shape - if dim1 != 4 or dim2 != 4: - raise ValueError("Input tensor shape has to be (N, 4, 4).") - - if not torch.allclose(transform[:, :3, 3], torch.zeros_like(transform[:, :3, 3])): - raise ValueError("All elements of `transform[:, :3, 3]` should be 0.") - - # log_rot is just so3_log_map of the upper left 3x3 block - R = transform[:, :3, :3].permute(0, 2, 1) - log_rotation = so3_log_map(R, eps=eps, cos_bound=cos_bound) - - # log_translation is V^-1 @ T - T = transform[:, 3, :3] - V = _se3_V_matrix(*_get_se3_V_input(log_rotation), eps=eps) - log_translation = torch.linalg.solve(V, T[:, :, None])[:, :, 0] - - return torch.cat((log_translation, log_rotation), dim=1) - - -def _se3_V_matrix( - log_rotation: torch.Tensor, - log_rotation_hat: torch.Tensor, - log_rotation_hat_square: torch.Tensor, - rotation_angles: torch.Tensor, - eps: float = 1e-4, -) -> torch.Tensor: - """ - A helper function that computes the "V" matrix from [1], Sec 9.4.2. - [1] https://jinyongjeong.github.io/Download/SE3/jlblanco2010geometry3d_techrep.pdf - """ - - V = ( - torch.eye(3, dtype=log_rotation.dtype, device=log_rotation.device)[None] - + log_rotation_hat - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - * ((1 - torch.cos(rotation_angles)) / (rotation_angles**2))[:, None, None] - + ( - log_rotation_hat_square - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and - # `int`. - * ((rotation_angles - torch.sin(rotation_angles)) / (rotation_angles**3))[ - :, None, None - ] - ) - ) - - return V - - -def _get_se3_V_input(log_rotation: torch.Tensor, eps: float = 1e-4): - """ - A helper function that computes the input variables to the `_se3_V_matrix` - function. - """ - # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`. - nrms = (log_rotation**2).sum(-1) - rotation_angles = torch.clamp(nrms, eps).sqrt() - log_rotation_hat = hat(log_rotation) - log_rotation_hat_square = torch.bmm(log_rotation_hat, log_rotation_hat) - return log_rotation, log_rotation_hat, log_rotation_hat_square, rotation_angles diff --git a/pytorch3d/pytorch3d/transforms/so3.py b/pytorch3d/pytorch3d/transforms/so3.py deleted file mode 100644 index dea68a904cda120c68cf6da38c35f505d7f5ab96..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/transforms/so3.py +++ /dev/null @@ -1,268 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import warnings -from typing import Tuple - -import torch -from pytorch3d.transforms import rotation_conversions - -from ..transforms import acos_linear_extrapolation - - -def so3_relative_angle( - R1: torch.Tensor, - R2: torch.Tensor, - cos_angle: bool = False, - cos_bound: float = 1e-4, - eps: float = 1e-4, -) -> torch.Tensor: - """ - Calculates the relative angle (in radians) between pairs of - rotation matrices `R1` and `R2` with `angle = acos(0.5 * (Trace(R1 R2^T)-1))` - - .. note:: - This corresponds to a geodesic distance on the 3D manifold of rotation - matrices. - - Args: - R1: Batch of rotation matrices of shape `(minibatch, 3, 3)`. - R2: Batch of rotation matrices of shape `(minibatch, 3, 3)`. - cos_angle: If==True return cosine of the relative angle rather than - the angle itself. This can avoid the unstable calculation of `acos`. - cos_bound: Clamps the cosine of the relative rotation angle to - [-1 + cos_bound, 1 - cos_bound] to avoid non-finite outputs/gradients - of the `acos` call. Note that the non-finite outputs/gradients - are returned when the angle is requested (i.e. `cos_angle==False`) - and the rotation angle is close to 0 or Ο€. - eps: Tolerance for the valid trace check of the relative rotation matrix - in `so3_rotation_angle`. - Returns: - Corresponding rotation angles of shape `(minibatch,)`. - If `cos_angle==True`, returns the cosine of the angles. - - Raises: - ValueError if `R1` or `R2` is of incorrect shape. - ValueError if `R1` or `R2` has an unexpected trace. - """ - R12 = torch.bmm(R1, R2.permute(0, 2, 1)) - return so3_rotation_angle(R12, cos_angle=cos_angle, cos_bound=cos_bound, eps=eps) - - -def so3_rotation_angle( - R: torch.Tensor, - eps: float = 1e-4, - cos_angle: bool = False, - cos_bound: float = 1e-4, -) -> torch.Tensor: - """ - Calculates angles (in radians) of a batch of rotation matrices `R` with - `angle = acos(0.5 * (Trace(R)-1))`. The trace of the - input matrices is checked to be in the valid range `[-1-eps,3+eps]`. - The `eps` argument is a small constant that allows for small errors - caused by limited machine precision. - - Args: - R: Batch of rotation matrices of shape `(minibatch, 3, 3)`. - eps: Tolerance for the valid trace check. - cos_angle: If==True return cosine of the rotation angles rather than - the angle itself. This can avoid the unstable - calculation of `acos`. - cos_bound: Clamps the cosine of the rotation angle to - [-1 + cos_bound, 1 - cos_bound] to avoid non-finite outputs/gradients - of the `acos` call. Note that the non-finite outputs/gradients - are returned when the angle is requested (i.e. `cos_angle==False`) - and the rotation angle is close to 0 or Ο€. - - Returns: - Corresponding rotation angles of shape `(minibatch,)`. - If `cos_angle==True`, returns the cosine of the angles. - - Raises: - ValueError if `R` is of incorrect shape. - ValueError if `R` has an unexpected trace. - """ - - N, dim1, dim2 = R.shape - if dim1 != 3 or dim2 != 3: - raise ValueError("Input has to be a batch of 3x3 Tensors.") - - rot_trace = R[:, 0, 0] + R[:, 1, 1] + R[:, 2, 2] - - if ((rot_trace < -1.0 - eps) + (rot_trace > 3.0 + eps)).any(): - raise ValueError("A matrix has trace outside valid range [-1-eps,3+eps].") - - # phi ... rotation angle - phi_cos = (rot_trace - 1.0) * 0.5 - - if cos_angle: - return phi_cos - else: - if cos_bound > 0.0: - bound = 1.0 - cos_bound - return acos_linear_extrapolation(phi_cos, (-bound, bound)) - else: - return torch.acos(phi_cos) - - -def so3_exp_map(log_rot: torch.Tensor, eps: float = 0.0001) -> torch.Tensor: - """ - Convert a batch of logarithmic representations of rotation matrices `log_rot` - to a batch of 3x3 rotation matrices using Rodrigues formula [1]. - - In the logarithmic representation, each rotation matrix is represented as - a 3-dimensional vector (`log_rot`) who's l2-norm and direction correspond - to the magnitude of the rotation angle and the axis of rotation respectively. - - The conversion has a singularity around `log(R) = 0` - which is handled by clamping controlled with the `eps` argument. - - Args: - log_rot: Batch of vectors of shape `(minibatch, 3)`. - eps: A float constant handling the conversion singularity. - - Returns: - Batch of rotation matrices of shape `(minibatch, 3, 3)`. - - Raises: - ValueError if `log_rot` is of incorrect shape. - - [1] https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula - """ - return _so3_exp_map(log_rot, eps=eps)[0] - - -def so3_exponential_map(log_rot: torch.Tensor, eps: float = 0.0001) -> torch.Tensor: - warnings.warn( - """so3_exponential_map is deprecated, - Use so3_exp_map instead. - so3_exponential_map will be removed in future releases.""", - PendingDeprecationWarning, - ) - - return so3_exp_map(log_rot, eps) - - -def _so3_exp_map( - log_rot: torch.Tensor, eps: float = 0.0001 -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - """ - A helper function that computes the so3 exponential map and, - apart from the rotation matrix, also returns intermediate variables - that can be re-used in other functions. - """ - _, dim = log_rot.shape - if dim != 3: - raise ValueError("Input tensor shape has to be Nx3.") - - nrms = (log_rot * log_rot).sum(1) - # phis ... rotation angles - rot_angles = torch.clamp(nrms, eps).sqrt() - skews = hat(log_rot) - skews_square = torch.bmm(skews, skews) - - R = rotation_conversions.axis_angle_to_matrix(log_rot) - - return R, rot_angles, skews, skews_square - - -def so3_log_map( - R: torch.Tensor, eps: float = 0.0001, cos_bound: float = 1e-4 -) -> torch.Tensor: - """ - Convert a batch of 3x3 rotation matrices `R` - to a batch of 3-dimensional matrix logarithms of rotation matrices - The conversion has a singularity around `(R=I)`. - - Args: - R: batch of rotation matrices of shape `(minibatch, 3, 3)`. - eps: (unused, for backward compatibility) - cos_bound: (unused, for backward compatibility) - - Returns: - Batch of logarithms of input rotation matrices - of shape `(minibatch, 3)`. - """ - - N, dim1, dim2 = R.shape - if dim1 != 3 or dim2 != 3: - raise ValueError("Input has to be a batch of 3x3 Tensors.") - - return rotation_conversions.matrix_to_axis_angle(R) - - -def hat_inv(h: torch.Tensor) -> torch.Tensor: - """ - Compute the inverse Hat operator [1] of a batch of 3x3 matrices. - - Args: - h: Batch of skew-symmetric matrices of shape `(minibatch, 3, 3)`. - - Returns: - Batch of 3d vectors of shape `(minibatch, 3, 3)`. - - Raises: - ValueError if `h` is of incorrect shape. - ValueError if `h` not skew-symmetric. - - [1] https://en.wikipedia.org/wiki/Hat_operator - """ - - N, dim1, dim2 = h.shape - if dim1 != 3 or dim2 != 3: - raise ValueError("Input has to be a batch of 3x3 Tensors.") - - ss_diff = torch.abs(h + h.permute(0, 2, 1)).max() - - HAT_INV_SKEW_SYMMETRIC_TOL = 1e-5 - if float(ss_diff) > HAT_INV_SKEW_SYMMETRIC_TOL: - raise ValueError("One of input matrices is not skew-symmetric.") - - x = h[:, 2, 1] - y = h[:, 0, 2] - z = h[:, 1, 0] - - v = torch.stack((x, y, z), dim=1) - - return v - - -def hat(v: torch.Tensor) -> torch.Tensor: - """ - Compute the Hat operator [1] of a batch of 3D vectors. - - Args: - v: Batch of vectors of shape `(minibatch , 3)`. - - Returns: - Batch of skew-symmetric matrices of shape - `(minibatch, 3 , 3)` where each matrix is of the form: - `[ 0 -v_z v_y ] - [ v_z 0 -v_x ] - [ -v_y v_x 0 ]` - - Raises: - ValueError if `v` is of incorrect shape. - - [1] https://en.wikipedia.org/wiki/Hat_operator - """ - - N, dim = v.shape - if dim != 3: - raise ValueError("Input vectors have to be 3-dimensional.") - - h = torch.zeros((N, 3, 3), dtype=v.dtype, device=v.device) - - x, y, z = v.unbind(1) - - h[:, 0, 1] = -z - h[:, 0, 2] = y - h[:, 1, 0] = z - h[:, 1, 2] = -x - h[:, 2, 0] = -y - h[:, 2, 1] = x - - return h diff --git a/pytorch3d/pytorch3d/transforms/transform3d.py b/pytorch3d/pytorch3d/transforms/transform3d.py deleted file mode 100644 index cbef7cbbdb769f7ad0986e308a93a8561fc94691..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/transforms/transform3d.py +++ /dev/null @@ -1,855 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import math -import os -import warnings -from typing import List, Optional, Union - -import torch - -from ..common.datatypes import Device, get_device, make_device -from ..common.workaround import _safe_det_3x3 -from .rotation_conversions import _axis_angle_rotation -from .se3 import se3_log_map - - -class Transform3d: - """ - A Transform3d object encapsulates a batch of N 3D transformations, and knows - how to transform points and normal vectors. Suppose that t is a Transform3d; - then we can do the following: - - .. code-block:: python - - N = len(t) - points = torch.randn(N, P, 3) - normals = torch.randn(N, P, 3) - points_transformed = t.transform_points(points) # => (N, P, 3) - normals_transformed = t.transform_normals(normals) # => (N, P, 3) - - - BROADCASTING - Transform3d objects supports broadcasting. Suppose that t1 and tN are - Transform3d objects with len(t1) == 1 and len(tN) == N respectively. Then we - can broadcast transforms like this: - - .. code-block:: python - - t1.transform_points(torch.randn(P, 3)) # => (P, 3) - t1.transform_points(torch.randn(1, P, 3)) # => (1, P, 3) - t1.transform_points(torch.randn(M, P, 3)) # => (M, P, 3) - tN.transform_points(torch.randn(P, 3)) # => (N, P, 3) - tN.transform_points(torch.randn(1, P, 3)) # => (N, P, 3) - - - COMBINING TRANSFORMS - Transform3d objects can be combined in two ways: composing and stacking. - Composing is function composition. Given Transform3d objects t1, t2, t3, - the following all compute the same thing: - - .. code-block:: python - - y1 = t3.transform_points(t2.transform_points(t1.transform_points(x))) - y2 = t1.compose(t2).compose(t3).transform_points(x) - y3 = t1.compose(t2, t3).transform_points(x) - - - Composing transforms should broadcast. - - .. code-block:: python - - if len(t1) == 1 and len(t2) == N, then len(t1.compose(t2)) == N. - - We can also stack a sequence of Transform3d objects, which represents - composition along the batch dimension; then the following should compute the - same thing. - - .. code-block:: python - - N, M = len(tN), len(tM) - xN = torch.randn(N, P, 3) - xM = torch.randn(M, P, 3) - y1 = torch.cat([tN.transform_points(xN), tM.transform_points(xM)], dim=0) - y2 = tN.stack(tM).transform_points(torch.cat([xN, xM], dim=0)) - - BUILDING TRANSFORMS - We provide convenience methods for easily building Transform3d objects - as compositions of basic transforms. - - .. code-block:: python - - # Scale by 0.5, then translate by (1, 2, 3) - t1 = Transform3d().scale(0.5).translate(1, 2, 3) - - # Scale each axis by a different amount, then translate, then scale - t2 = Transform3d().scale(1, 3, 3).translate(2, 3, 1).scale(2.0) - - t3 = t1.compose(t2) - tN = t1.stack(t3, t3) - - - BACKPROP THROUGH TRANSFORMS - When building transforms, we can also parameterize them by Torch tensors; - in this case we can backprop through the construction and application of - Transform objects, so they could be learned via gradient descent or - predicted by a neural network. - - .. code-block:: python - - s1_params = torch.randn(N, requires_grad=True) - t_params = torch.randn(N, 3, requires_grad=True) - s2_params = torch.randn(N, 3, requires_grad=True) - - t = Transform3d().scale(s1_params).translate(t_params).scale(s2_params) - x = torch.randn(N, 3) - y = t.transform_points(x) - loss = compute_loss(y) - loss.backward() - - with torch.no_grad(): - s1_params -= lr * s1_params.grad - t_params -= lr * t_params.grad - s2_params -= lr * s2_params.grad - - CONVENTIONS - We adopt a right-hand coordinate system, meaning that rotation about an axis - with a positive angle results in a counter clockwise rotation. - - This class assumes that transformations are applied on inputs which - are row vectors. The internal representation of the Nx4x4 transformation - matrix is of the form: - - .. code-block:: python - - M = [ - [Rxx, Ryx, Rzx, 0], - [Rxy, Ryy, Rzy, 0], - [Rxz, Ryz, Rzz, 0], - [Tx, Ty, Tz, 1], - ] - - To apply the transformation to points, which are row vectors, the latter are - converted to homogeneous (4D) coordinates and right-multiplied by the M matrix: - - .. code-block:: python - - points = [[0, 1, 2]] # (1 x 3) xyz coordinates of a point - [transformed_points, 1] ∝ [points, 1] @ M - - """ - - def __init__( - self, - dtype: torch.dtype = torch.float32, - device: Device = "cpu", - matrix: Optional[torch.Tensor] = None, - ) -> None: - """ - Args: - dtype: The data type of the transformation matrix. - to be used if `matrix = None`. - device: The device for storing the implemented transformation. - If `matrix != None`, uses the device of input `matrix`. - matrix: A tensor of shape (4, 4) or of shape (minibatch, 4, 4) - representing the 4x4 3D transformation matrix. - If `None`, initializes with identity using - the specified `device` and `dtype`. - """ - - if matrix is None: - self._matrix = torch.eye(4, dtype=dtype, device=device).view(1, 4, 4) - else: - if matrix.ndim not in (2, 3): - raise ValueError('"matrix" has to be a 2- or a 3-dimensional tensor.') - if matrix.shape[-2] != 4 or matrix.shape[-1] != 4: - raise ValueError( - '"matrix" has to be a tensor of shape (minibatch, 4, 4) or (4, 4).' - ) - # set dtype and device from matrix - dtype = matrix.dtype - device = matrix.device - self._matrix = matrix.view(-1, 4, 4) - - self._transforms = [] # store transforms to compose - self._lu = None - self.device = make_device(device) - self.dtype = dtype - - def __len__(self) -> int: - return self.get_matrix().shape[0] - - def __getitem__( - self, index: Union[int, List[int], slice, torch.BoolTensor, torch.LongTensor] - ) -> "Transform3d": - """ - Args: - index: Specifying the index of the transform to retrieve. - Can be an int, slice, list of ints, boolean, long tensor. - Supports negative indices. - - Returns: - Transform3d object with selected transforms. The tensors are not cloned. - """ - if isinstance(index, int): - index = [index] - return self.__class__(matrix=self.get_matrix()[index]) - - def compose(self, *others: "Transform3d") -> "Transform3d": - """ - Return a new Transform3d representing the composition of self with the - given other transforms, which will be stored as an internal list. - - Args: - *others: Any number of Transform3d objects - - Returns: - A new Transform3d with the stored transforms - """ - out = Transform3d(dtype=self.dtype, device=self.device) - out._matrix = self._matrix.clone() - for other in others: - if not isinstance(other, Transform3d): - msg = "Only possible to compose Transform3d objects; got %s" - raise ValueError(msg % type(other)) - out._transforms = self._transforms + list(others) - return out - - def get_matrix(self) -> torch.Tensor: - """ - Returns a 4Γ—4 matrix corresponding to each transform in the batch. - - If the transform was composed from others, the matrix for the composite - transform will be returned. - For example, if self.transforms contains transforms t1, t2, and t3, and - given a set of points x, the following should be true: - - .. code-block:: python - - y1 = t1.compose(t2, t3).transform(x) - y2 = t3.transform(t2.transform(t1.transform(x))) - y1.get_matrix() == y2.get_matrix() - - Where necessary, those transforms are broadcast against each other. - - Returns: - A (N, 4, 4) batch of transformation matrices representing - the stored transforms. See the class documentation for the conventions. - """ - composed_matrix = self._matrix.clone() - if len(self._transforms) > 0: - for other in self._transforms: - other_matrix = other.get_matrix() - composed_matrix = _broadcast_bmm(composed_matrix, other_matrix) - return composed_matrix - - def get_se3_log(self, eps: float = 1e-4, cos_bound: float = 1e-4) -> torch.Tensor: - """ - Returns a 6D SE(3) log vector corresponding to each transform in the batch. - - In the SE(3) logarithmic representation SE(3) matrices are - represented as 6-dimensional vectors `[log_translation | log_rotation]`, - i.e. a concatenation of two 3D vectors `log_translation` and `log_rotation`. - - The conversion from the 4x4 SE(3) matrix `transform` to the - 6D representation `log_transform = [log_translation | log_rotation]` - is done as follows:: - - log_transform = log(transform.get_matrix()) - log_translation = log_transform[3, :3] - log_rotation = inv_hat(log_transform[:3, :3]) - - where `log` is the matrix logarithm - and `inv_hat` is the inverse of the Hat operator [2]. - - See the docstring for `se3.se3_log_map` and [1], Sec 9.4.2. for more - detailed description. - - Args: - eps: A threshold for clipping the squared norm of the rotation logarithm - to avoid division by zero in the singular case. - cos_bound: Clamps the cosine of the rotation angle to - [-1 + cos_bound, 3 - cos_bound] to avoid non-finite outputs. - The non-finite outputs can be caused by passing small rotation angles - to the `acos` function in `so3_rotation_angle` of `so3_log_map`. - - Returns: - A (N, 6) tensor, rows of which represent the individual transforms - stored in the object as SE(3) logarithms. - - Raises: - ValueError if the stored transform is not Euclidean (e.g. R is not a rotation - matrix or the last column has non-zeros in the first three places). - - [1] https://jinyongjeong.github.io/Download/SE3/jlblanco2010geometry3d_techrep.pdf - [2] https://en.wikipedia.org/wiki/Hat_operator - """ - return se3_log_map(self.get_matrix(), eps, cos_bound) - - def _get_matrix_inverse(self) -> torch.Tensor: - """ - Return the inverse of self._matrix. - """ - return torch.inverse(self._matrix) - - def inverse(self, invert_composed: bool = False) -> "Transform3d": - """ - Returns a new Transform3d object that represents an inverse of the - current transformation. - - Args: - invert_composed: - - True: First compose the list of stored transformations - and then apply inverse to the result. This is - potentially slower for classes of transformations - with inverses that can be computed efficiently - (e.g. rotations and translations). - - False: Invert the individual stored transformations - independently without composing them. - - Returns: - A new Transform3d object containing the inverse of the original - transformation. - """ - - tinv = Transform3d(dtype=self.dtype, device=self.device) - - if invert_composed: - # first compose then invert - tinv._matrix = torch.inverse(self.get_matrix()) - else: - # self._get_matrix_inverse() implements efficient inverse - # of self._matrix - i_matrix = self._get_matrix_inverse() - - # 2 cases: - if len(self._transforms) > 0: - # a) Either we have a non-empty list of transforms: - # Here we take self._matrix and append its inverse at the - # end of the reverted _transforms list. After composing - # the transformations with get_matrix(), this correctly - # right-multiplies by the inverse of self._matrix - # at the end of the composition. - tinv._transforms = [t.inverse() for t in reversed(self._transforms)] - last = Transform3d(dtype=self.dtype, device=self.device) - last._matrix = i_matrix - tinv._transforms.append(last) - else: - # b) Or there are no stored transformations - # we just set inverted matrix - tinv._matrix = i_matrix - - return tinv - - def stack(self, *others: "Transform3d") -> "Transform3d": - """ - Return a new batched Transform3d representing the batch elements from - self and all the given other transforms all batched together. - - Args: - *others: Any number of Transform3d objects - - Returns: - A new Transform3d. - """ - transforms = [self] + list(others) - matrix = torch.cat([t.get_matrix() for t in transforms], dim=0) - out = Transform3d(dtype=self.dtype, device=self.device) - out._matrix = matrix - return out - - def transform_points(self, points, eps: Optional[float] = None) -> torch.Tensor: - """ - Use this transform to transform a set of 3D points. Assumes row major - ordering of the input points. - - Args: - points: Tensor of shape (P, 3) or (N, P, 3) - eps: If eps!=None, the argument is used to clamp the - last coordinate before performing the final division. - The clamping corresponds to: - last_coord := (last_coord.sign() + (last_coord==0)) * - torch.clamp(last_coord.abs(), eps), - i.e. the last coordinates that are exactly 0 will - be clamped to +eps. - - Returns: - points_out: points of shape (N, P, 3) or (P, 3) depending - on the dimensions of the transform - """ - points_batch = points.clone() - if points_batch.dim() == 2: - points_batch = points_batch[None] # (P, 3) -> (1, P, 3) - if points_batch.dim() != 3: - msg = "Expected points to have dim = 2 or dim = 3: got shape %r" - raise ValueError(msg % repr(points.shape)) - - N, P, _3 = points_batch.shape - ones = torch.ones(N, P, 1, dtype=points.dtype, device=points.device) - points_batch = torch.cat([points_batch, ones], dim=2) - - composed_matrix = self.get_matrix() - points_out = _broadcast_bmm(points_batch, composed_matrix) - denom = points_out[..., 3:] # denominator - if eps is not None: - denom_sign = denom.sign() + (denom == 0.0).type_as(denom) - denom = denom_sign * torch.clamp(denom.abs(), eps) - points_out = points_out[..., :3] / denom - - # When transform is (1, 4, 4) and points is (P, 3) return - # points_out of shape (P, 3) - if points_out.shape[0] == 1 and points.dim() == 2: - points_out = points_out.reshape(points.shape) - - return points_out - - def transform_normals(self, normals) -> torch.Tensor: - """ - Use this transform to transform a set of normal vectors. - - Args: - normals: Tensor of shape (P, 3) or (N, P, 3) - - Returns: - normals_out: Tensor of shape (P, 3) or (N, P, 3) depending - on the dimensions of the transform - """ - if normals.dim() not in [2, 3]: - msg = "Expected normals to have dim = 2 or dim = 3: got shape %r" - raise ValueError(msg % (normals.shape,)) - composed_matrix = self.get_matrix() - - # TODO: inverse is bad! Solve a linear system instead - mat = composed_matrix[:, :3, :3] - normals_out = _broadcast_bmm(normals, mat.transpose(1, 2).inverse()) - - # This doesn't pass unit tests. TODO investigate further - # if self._lu is None: - # self._lu = self._matrix[:, :3, :3].transpose(1, 2).lu() - # normals_out = normals.lu_solve(*self._lu) - - # When transform is (1, 4, 4) and normals is (P, 3) return - # normals_out of shape (P, 3) - if normals_out.shape[0] == 1 and normals.dim() == 2: - normals_out = normals_out.reshape(normals.shape) - - return normals_out - - def translate(self, *args, **kwargs) -> "Transform3d": - return self.compose( - Translate(*args, device=self.device, dtype=self.dtype, **kwargs) - ) - - def scale(self, *args, **kwargs) -> "Transform3d": - return self.compose( - Scale(*args, device=self.device, dtype=self.dtype, **kwargs) - ) - - def rotate(self, *args, **kwargs) -> "Transform3d": - return self.compose( - Rotate(*args, device=self.device, dtype=self.dtype, **kwargs) - ) - - def rotate_axis_angle(self, *args, **kwargs) -> "Transform3d": - return self.compose( - RotateAxisAngle(*args, device=self.device, dtype=self.dtype, **kwargs) - ) - - def clone(self) -> "Transform3d": - """ - Deep copy of Transforms object. All internal tensors are cloned - individually. - - Returns: - new Transforms object. - """ - other = Transform3d(dtype=self.dtype, device=self.device) - if self._lu is not None: - other._lu = [elem.clone() for elem in self._lu] - other._matrix = self._matrix.clone() - other._transforms = [t.clone() for t in self._transforms] - return other - - def to( - self, - device: Device, - copy: bool = False, - dtype: Optional[torch.dtype] = None, - ) -> "Transform3d": - """ - Match functionality of torch.Tensor.to() - If copy = True or the self Tensor is on a different device, the - returned tensor is a copy of self with the desired torch.device. - If copy = False and the self Tensor already has the correct torch.device, - then self is returned. - - Args: - device: Device (as str or torch.device) for the new tensor. - copy: Boolean indicator whether or not to clone self. Default False. - dtype: If not None, casts the internal tensor variables - to a given torch.dtype. - - Returns: - Transform3d object. - """ - device_ = make_device(device) - dtype_ = self.dtype if dtype is None else dtype - skip_to = self.device == device_ and self.dtype == dtype_ - - if not copy and skip_to: - return self - - other = self.clone() - - if skip_to: - return other - - other.device = device_ - other.dtype = dtype_ - other._matrix = other._matrix.to(device=device_, dtype=dtype_) - other._transforms = [ - t.to(device_, copy=copy, dtype=dtype_) for t in other._transforms - ] - return other - - def cpu(self) -> "Transform3d": - return self.to("cpu") - - def cuda(self) -> "Transform3d": - return self.to("cuda") - - -class Translate(Transform3d): - def __init__( - self, - x, - y=None, - z=None, - dtype: torch.dtype = torch.float32, - device: Optional[Device] = None, - ) -> None: - """ - Create a new Transform3d representing 3D translations. - - Option I: Translate(xyz, dtype=torch.float32, device='cpu') - xyz should be a tensor of shape (N, 3) - - Option II: Translate(x, y, z, dtype=torch.float32, device='cpu') - Here x, y, and z will be broadcast against each other and - concatenated to form the translation. Each can be: - - A python scalar - - A torch scalar - - A 1D torch tensor - """ - xyz = _handle_input(x, y, z, dtype, device, "Translate") - super().__init__(device=xyz.device, dtype=dtype) - N = xyz.shape[0] - - mat = torch.eye(4, dtype=dtype, device=self.device) - mat = mat.view(1, 4, 4).repeat(N, 1, 1) - mat[:, 3, :3] = xyz - self._matrix = mat - - def _get_matrix_inverse(self) -> torch.Tensor: - """ - Return the inverse of self._matrix. - """ - inv_mask = self._matrix.new_ones([1, 4, 4]) - inv_mask[0, 3, :3] = -1.0 - i_matrix = self._matrix * inv_mask - return i_matrix - - -class Scale(Transform3d): - def __init__( - self, - x, - y=None, - z=None, - dtype: torch.dtype = torch.float32, - device: Optional[Device] = None, - ) -> None: - """ - A Transform3d representing a scaling operation, with different scale - factors along each coordinate axis. - - Option I: Scale(s, dtype=torch.float32, device='cpu') - s can be one of - - Python scalar or torch scalar: Single uniform scale - - 1D torch tensor of shape (N,): A batch of uniform scale - - 2D torch tensor of shape (N, 3): Scale differently along each axis - - Option II: Scale(x, y, z, dtype=torch.float32, device='cpu') - Each of x, y, and z can be one of - - python scalar - - torch scalar - - 1D torch tensor - """ - xyz = _handle_input(x, y, z, dtype, device, "scale", allow_singleton=True) - super().__init__(device=xyz.device, dtype=dtype) - N = xyz.shape[0] - - # TODO: Can we do this all in one go somehow? - mat = torch.eye(4, dtype=dtype, device=self.device) - mat = mat.view(1, 4, 4).repeat(N, 1, 1) - mat[:, 0, 0] = xyz[:, 0] - mat[:, 1, 1] = xyz[:, 1] - mat[:, 2, 2] = xyz[:, 2] - self._matrix = mat - - def _get_matrix_inverse(self) -> torch.Tensor: - """ - Return the inverse of self._matrix. - """ - xyz = torch.stack([self._matrix[:, i, i] for i in range(4)], dim=1) - # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. - ixyz = 1.0 / xyz - # pyre-fixme[6]: For 1st param expected `Tensor` but got `float`. - imat = torch.diag_embed(ixyz, dim1=1, dim2=2) - return imat - - -class Rotate(Transform3d): - def __init__( - self, - R: torch.Tensor, - dtype: torch.dtype = torch.float32, - device: Optional[Device] = None, - orthogonal_tol: float = 1e-5, - ) -> None: - """ - Create a new Transform3d representing 3D rotation using a rotation - matrix as the input. - - Args: - R: a tensor of shape (3, 3) or (N, 3, 3) - orthogonal_tol: tolerance for the test of the orthogonality of R - - """ - device_ = get_device(R, device) - super().__init__(device=device_, dtype=dtype) - if R.dim() == 2: - R = R[None] - if R.shape[-2:] != (3, 3): - msg = "R must have shape (3, 3) or (N, 3, 3); got %s" - raise ValueError(msg % repr(R.shape)) - R = R.to(device=device_, dtype=dtype) - if os.environ.get("PYTORCH3D_CHECK_ROTATION_MATRICES", "0") == "1": - # Note: aten::all_close in the check is computationally slow, so we - # only run the check when PYTORCH3D_CHECK_ROTATION_MATRICES is on. - _check_valid_rotation_matrix(R, tol=orthogonal_tol) - N = R.shape[0] - mat = torch.eye(4, dtype=dtype, device=device_) - mat = mat.view(1, 4, 4).repeat(N, 1, 1) - mat[:, :3, :3] = R - self._matrix = mat - - def _get_matrix_inverse(self) -> torch.Tensor: - """ - Return the inverse of self._matrix. - """ - return self._matrix.permute(0, 2, 1).contiguous() - - -class RotateAxisAngle(Rotate): - def __init__( - self, - angle, - axis: str = "X", - degrees: bool = True, - dtype: torch.dtype = torch.float32, - device: Optional[Device] = None, - ) -> None: - """ - Create a new Transform3d representing 3D rotation about an axis - by an angle. - - Assuming a right-hand coordinate system, positive rotation angles result - in a counter clockwise rotation. - - Args: - angle: - - A torch tensor of shape (N,) - - A python scalar - - A torch scalar - axis: - string: one of ["X", "Y", "Z"] indicating the axis about which - to rotate. - NOTE: All batch elements are rotated about the same axis. - """ - axis = axis.upper() - if axis not in ["X", "Y", "Z"]: - msg = "Expected axis to be one of ['X', 'Y', 'Z']; got %s" - raise ValueError(msg % axis) - angle = _handle_angle_input(angle, dtype, device, "RotateAxisAngle") - angle = (angle / 180.0 * math.pi) if degrees else angle - # We assume the points on which this transformation will be applied - # are row vectors. The rotation matrix returned from _axis_angle_rotation - # is for transforming column vectors. Therefore we transpose this matrix. - # R will always be of shape (N, 3, 3) - R = _axis_angle_rotation(axis, angle).transpose(1, 2) - super().__init__(device=angle.device, R=R, dtype=dtype) - - -def _handle_coord(c, dtype: torch.dtype, device: torch.device) -> torch.Tensor: - """ - Helper function for _handle_input. - - Args: - c: Python scalar, torch scalar, or 1D torch tensor - - Returns: - c_vec: 1D torch tensor - """ - if not torch.is_tensor(c): - c = torch.tensor(c, dtype=dtype, device=device) - if c.dim() == 0: - c = c.view(1) - if c.device != device or c.dtype != dtype: - c = c.to(device=device, dtype=dtype) - return c - - -def _handle_input( - x, - y, - z, - dtype: torch.dtype, - device: Optional[Device], - name: str, - allow_singleton: bool = False, -) -> torch.Tensor: - """ - Helper function to handle parsing logic for building transforms. The output - is always a tensor of shape (N, 3), but there are several types of allowed - input. - - Case I: Single Matrix - In this case x is a tensor of shape (N, 3), and y and z are None. Here just - return x. - - Case II: Vectors and Scalars - In this case each of x, y, and z can be one of the following - - Python scalar - - Torch scalar - - Torch tensor of shape (N, 1) or (1, 1) - In this case x, y and z are broadcast to tensors of shape (N, 1) - and concatenated to a tensor of shape (N, 3) - - Case III: Singleton (only if allow_singleton=True) - In this case y and z are None, and x can be one of the following: - - Python scalar - - Torch scalar - - Torch tensor of shape (N, 1) or (1, 1) - Here x will be duplicated 3 times, and we return a tensor of shape (N, 3) - - Returns: - xyz: Tensor of shape (N, 3) - """ - device_ = get_device(x, device) - # If x is actually a tensor of shape (N, 3) then just return it - if torch.is_tensor(x) and x.dim() == 2: - if x.shape[1] != 3: - msg = "Expected tensor of shape (N, 3); got %r (in %s)" - raise ValueError(msg % (x.shape, name)) - if y is not None or z is not None: - msg = "Expected y and z to be None (in %s)" % name - raise ValueError(msg) - return x.to(device=device_, dtype=dtype) - - if allow_singleton and y is None and z is None: - y = x - z = x - - # Convert all to 1D tensors - xyz = [_handle_coord(c, dtype, device_) for c in [x, y, z]] - - # Broadcast and concatenate - sizes = [c.shape[0] for c in xyz] - N = max(sizes) - for c in xyz: - if c.shape[0] != 1 and c.shape[0] != N: - msg = "Got non-broadcastable sizes %r (in %s)" % (sizes, name) - raise ValueError(msg) - xyz = [c.expand(N) for c in xyz] - xyz = torch.stack(xyz, dim=1) - return xyz - - -def _handle_angle_input( - x, dtype: torch.dtype, device: Optional[Device], name: str -) -> torch.Tensor: - """ - Helper function for building a rotation function using angles. - The output is always of shape (N,). - - The input can be one of: - - Torch tensor of shape (N,) - - Python scalar - - Torch scalar - """ - device_ = get_device(x, device) - if torch.is_tensor(x) and x.dim() > 1: - msg = "Expected tensor of shape (N,); got %r (in %s)" - raise ValueError(msg % (x.shape, name)) - else: - return _handle_coord(x, dtype, device_) - - -def _broadcast_bmm(a, b) -> torch.Tensor: - """ - Batch multiply two matrices and broadcast if necessary. - - Args: - a: torch tensor of shape (P, K) or (M, P, K) - b: torch tensor of shape (N, K, K) - - Returns: - a and b broadcast multiplied. The output batch dimension is max(N, M). - - To broadcast transforms across a batch dimension if M != N then - expect that either M = 1 or N = 1. The tensor with batch dimension 1 is - expanded to have shape N or M. - """ - if a.dim() == 2: - a = a[None] - if len(a) != len(b): - if not ((len(a) == 1) or (len(b) == 1)): - msg = "Expected batch dim for bmm to be equal or 1; got %r, %r" - raise ValueError(msg % (a.shape, b.shape)) - if len(a) == 1: - a = a.expand(len(b), -1, -1) - if len(b) == 1: - b = b.expand(len(a), -1, -1) - return a.bmm(b) - - -@torch.no_grad() -def _check_valid_rotation_matrix(R, tol: float = 1e-7) -> None: - """ - Determine if R is a valid rotation matrix by checking it satisfies the - following conditions: - - ``RR^T = I and det(R) = 1`` - - Args: - R: an (N, 3, 3) matrix - - Returns: - None - - Emits a warning if R is an invalid rotation matrix. - """ - N = R.shape[0] - eye = torch.eye(3, dtype=R.dtype, device=R.device) - eye = eye.view(1, 3, 3).expand(N, -1, -1) - orthogonal = torch.allclose(R.bmm(R.transpose(1, 2)), eye, atol=tol) - det_R = _safe_det_3x3(R) - no_distortion = torch.allclose(det_R, torch.ones_like(det_R)) - if not (orthogonal and no_distortion): - msg = "R is not a valid rotation matrix" - warnings.warn(msg) - return diff --git a/pytorch3d/pytorch3d/utils/__init__.py b/pytorch3d/pytorch3d/utils/__init__.py deleted file mode 100644 index f3681e8236e65a5f802044a408c70b3b4d42e7a5..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/utils/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from .camera_conversions import ( - cameras_from_opencv_projection, - opencv_from_cameras_projection, - pulsar_from_cameras_projection, - pulsar_from_opencv_projection, -) -from .checkerboard import checkerboard -from .ico_sphere import ico_sphere -from .torus import torus - - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/pytorch3d/pytorch3d/utils/camera_conversions.py b/pytorch3d/pytorch3d/utils/camera_conversions.py deleted file mode 100644 index 83ce2bb5fb05a360a4d7523c227477a585143378..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/utils/camera_conversions.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Tuple - -import torch - -from ..renderer import PerspectiveCameras -from ..renderer.camera_conversions import ( - _cameras_from_opencv_projection, - _opencv_from_cameras_projection, - _pulsar_from_cameras_projection, - _pulsar_from_opencv_projection, -) - - -def cameras_from_opencv_projection( - R: torch.Tensor, - tvec: torch.Tensor, - camera_matrix: torch.Tensor, - image_size: torch.Tensor, -) -> PerspectiveCameras: - """ - Converts a batch of OpenCV-conventioned cameras parametrized with the - rotation matrices `R`, translation vectors `tvec`, and the camera - calibration matrices `camera_matrix` to `PerspectiveCameras` in PyTorch3D - convention. - - More specifically, the conversion is carried out such that a projection - of a 3D shape to the OpenCV-conventioned screen of size `image_size` results - in the same image as a projection with the corresponding PyTorch3D camera - to the NDC screen convention of PyTorch3D. - - More specifically, the OpenCV convention projects points to the OpenCV screen - space as follows:: - - x_screen_opencv = camera_matrix @ (R @ x_world + tvec) - - followed by the homogenization of `x_screen_opencv`. - - Note: - The parameters `R, tvec, camera_matrix` correspond to the inputs of - `cv2.projectPoints(x_world, rvec, tvec, camera_matrix, [])`, - where `rvec` is an axis-angle vector that can be obtained from - the rotation matrix `R` expected here by calling the `so3_log_map` function. - Correspondingly, `R` can be obtained from `rvec` by calling `so3_exp_map`. - - Args: - R: A batch of rotation matrices of shape `(N, 3, 3)`. - tvec: A batch of translation vectors of shape `(N, 3)`. - camera_matrix: A batch of camera calibration matrices of shape `(N, 3, 3)`. - image_size: A tensor of shape `(N, 2)` containing the sizes of the images - (height, width) attached to each camera. - - Returns: - cameras_pytorch3d: A batch of `N` cameras in the PyTorch3D convention. - """ - return _cameras_from_opencv_projection(R, tvec, camera_matrix, image_size) - - -def opencv_from_cameras_projection( - cameras: PerspectiveCameras, - image_size: torch.Tensor, -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Converts a batch of `PerspectiveCameras` into OpenCV-convention - rotation matrices `R`, translation vectors `tvec`, and the camera - calibration matrices `camera_matrix`. This operation is exactly the inverse - of `cameras_from_opencv_projection`. - - Note: - The outputs `R, tvec, camera_matrix` correspond to the inputs of - `cv2.projectPoints(x_world, rvec, tvec, camera_matrix, [])`, - where `rvec` is an axis-angle vector that can be obtained from - the rotation matrix `R` output here by calling the `so3_log_map` function. - Correspondingly, `R` can be obtained from `rvec` by calling `so3_exp_map`. - - Args: - cameras: A batch of `N` cameras in the PyTorch3D convention. - image_size: A tensor of shape `(N, 2)` containing the sizes of the images - (height, width) attached to each camera. - return_as_rotmat (bool): If set to True, return the full 3x3 rotation - matrices. Otherwise, return an axis-angle vector (default). - - Returns: - R: A batch of rotation matrices of shape `(N, 3, 3)`. - tvec: A batch of translation vectors of shape `(N, 3)`. - camera_matrix: A batch of camera calibration matrices of shape `(N, 3, 3)`. - """ - return _opencv_from_cameras_projection(cameras, image_size) - - -def pulsar_from_opencv_projection( - R: torch.Tensor, - tvec: torch.Tensor, - camera_matrix: torch.Tensor, - image_size: torch.Tensor, - znear: float = 0.1, -) -> torch.Tensor: - """ - Convert OpenCV style camera parameters to Pulsar style camera parameters. - - Note: - * Pulsar does NOT support different focal lengths for x and y. - For conversion, we use the average of fx and fy. - * The Pulsar renderer MUST use a left-handed coordinate system for this - mapping to work. - * The resulting image will be vertically flipped - which has to be - addressed AFTER rendering by the user. - * The parameters `R, tvec, camera_matrix` correspond to the outputs - of `cv2.decomposeProjectionMatrix`. - - Args: - R: A batch of rotation matrices of shape `(N, 3, 3)`. - tvec: A batch of translation vectors of shape `(N, 3)`. - camera_matrix: A batch of camera calibration matrices of shape `(N, 3, 3)`. - image_size: A tensor of shape `(N, 2)` containing the sizes of the images - (height, width) attached to each camera. - znear (float): The near clipping value to use for Pulsar. - - Returns: - cameras_pulsar: A batch of `N` Pulsar camera vectors in the Pulsar - convention `(N, 13)` (3 translation, 6 rotation, focal_length, sensor_width, - c_x, c_y). - """ - return _pulsar_from_opencv_projection(R, tvec, camera_matrix, image_size, znear) - - -def pulsar_from_cameras_projection( - cameras: PerspectiveCameras, - image_size: torch.Tensor, -) -> torch.Tensor: - """ - Convert PyTorch3D `PerspectiveCameras` to Pulsar style camera parameters. - - Note: - * Pulsar does NOT support different focal lengths for x and y. - For conversion, we use the average of fx and fy. - * The Pulsar renderer MUST use a left-handed coordinate system for this - mapping to work. - * The resulting image will be vertically flipped - which has to be - addressed AFTER rendering by the user. - - Args: - cameras: A batch of `N` cameras in the PyTorch3D convention. - image_size: A tensor of shape `(N, 2)` containing the sizes of the images - (height, width) attached to each camera. - - Returns: - cameras_pulsar: A batch of `N` Pulsar camera vectors in the Pulsar - convention `(N, 13)` (3 translation, 6 rotation, focal_length, sensor_width, - c_x, c_y). - """ - return _pulsar_from_cameras_projection(cameras, image_size) diff --git a/pytorch3d/pytorch3d/utils/checkerboard.py b/pytorch3d/pytorch3d/utils/checkerboard.py deleted file mode 100644 index 625c08684525ce937ed0ba728394ec49c5e0203c..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/utils/checkerboard.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -from typing import Optional, Tuple - -import torch -from pytorch3d.common.compat import meshgrid_ij -from pytorch3d.renderer.mesh.textures import TexturesAtlas -from pytorch3d.structures.meshes import Meshes - - -def checkerboard( - radius: int = 4, - color1: Tuple[float, ...] = (0.0, 0.0, 0.0), - color2: Tuple[float, ...] = (1.0, 1.0, 1.0), - device: Optional[torch.types._device] = None, -) -> Meshes: - """ - Returns a mesh of squares in the xy-plane where each unit is one of the two given - colors and adjacent squares have opposite colors. - Args: - radius: how many squares in each direction from the origin - color1: background color - color2: foreground color (must have the same number of channels as color1) - Returns: - new Meshes object containing one mesh. - """ - - if device is None: - device = torch.device("cpu") - if radius < 1: - raise ValueError("radius must be > 0") - - num_verts_per_row = 2 * radius + 1 - - # construct 2D grid of 3D vertices - x = torch.arange(-radius, radius + 1, device=device) - grid_y, grid_x = meshgrid_ij(x, x) - verts = torch.stack( - [grid_x, grid_y, torch.zeros((2 * radius + 1, 2 * radius + 1))], dim=-1 - ) - verts = verts.view(1, -1, 3) - - top_triangle_idx = torch.arange(0, num_verts_per_row * (num_verts_per_row - 1)) - top_triangle_idx = torch.stack( - [ - top_triangle_idx, - top_triangle_idx + 1, - top_triangle_idx + num_verts_per_row + 1, - ], - dim=-1, - ) - - bottom_triangle_idx = top_triangle_idx[:, [0, 2, 1]] + torch.tensor( - [0, 0, num_verts_per_row - 1] - ) - - faces = torch.zeros( - (1, len(top_triangle_idx) + len(bottom_triangle_idx), 3), - dtype=torch.long, - device=device, - ) - faces[0, ::2] = top_triangle_idx - faces[0, 1::2] = bottom_triangle_idx - - # construct range of indices that excludes the boundary to avoid wrong triangles - indexing_range = torch.arange(0, 2 * num_verts_per_row * num_verts_per_row).view( - num_verts_per_row, num_verts_per_row, 2 - ) - indexing_range = indexing_range[:-1, :-1] # removes boundaries from list of indices - indexing_range = indexing_range.reshape( - 2 * (num_verts_per_row - 1) * (num_verts_per_row - 1) - ) - - faces = faces[:, indexing_range] - - # adding color - colors = torch.tensor(color1).repeat(2 * num_verts_per_row * num_verts_per_row, 1) - colors[2::4] = torch.tensor(color2) - colors[3::4] = torch.tensor(color2) - colors = colors[None, indexing_range, None, None] - - texture_atlas = TexturesAtlas(colors) - - return Meshes(verts=verts, faces=faces, textures=texture_atlas) diff --git a/pytorch3d/pytorch3d/utils/ico_sphere.py b/pytorch3d/pytorch3d/utils/ico_sphere.py deleted file mode 100644 index da7ed10b9e137920d18c5092f7390ecfe3dd3cf4..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/utils/ico_sphere.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import torch -from pytorch3d.ops.subdivide_meshes import SubdivideMeshes -from pytorch3d.structures.meshes import Meshes - - -# Vertex coordinates for a level 0 ico-sphere. -_ico_verts0 = [ - [-0.5257, 0.8507, 0.0000], - [0.5257, 0.8507, 0.0000], - [-0.5257, -0.8507, 0.0000], - [0.5257, -0.8507, 0.0000], - [0.0000, -0.5257, 0.8507], - [0.0000, 0.5257, 0.8507], - [0.0000, -0.5257, -0.8507], - [0.0000, 0.5257, -0.8507], - [0.8507, 0.0000, -0.5257], - [0.8507, 0.0000, 0.5257], - [-0.8507, 0.0000, -0.5257], - [-0.8507, 0.0000, 0.5257], -] - - -# Faces for level 0 ico-sphere -_ico_faces0 = [ - [0, 11, 5], - [0, 5, 1], - [0, 1, 7], - [0, 7, 10], - [0, 10, 11], - [1, 5, 9], - [5, 11, 4], - [11, 10, 2], - [10, 7, 6], - [7, 1, 8], - [3, 9, 4], - [3, 4, 2], - [3, 2, 6], - [3, 6, 8], - [3, 8, 9], - [4, 9, 5], - [2, 4, 11], - [6, 2, 10], - [8, 6, 7], - [9, 8, 1], -] - - -def ico_sphere(level: int = 0, device=None): - """ - Create verts and faces for a unit ico-sphere, with all faces oriented - consistently. - - Args: - level: integer specifying the number of iterations for subdivision - of the mesh faces. Each additional level will result in four new - faces per face. - device: A torch.device object on which the outputs will be allocated. - - Returns: - Meshes object with verts and faces. - """ - if device is None: - device = torch.device("cpu") - if level < 0: - raise ValueError("level must be >= 0.") - if level == 0: - verts = torch.tensor(_ico_verts0, dtype=torch.float32, device=device) - faces = torch.tensor(_ico_faces0, dtype=torch.int64, device=device) - - else: - mesh = ico_sphere(level - 1, device) - subdivide = SubdivideMeshes() - mesh = subdivide(mesh) - verts = mesh.verts_list()[0] - verts /= verts.norm(p=2, dim=1, keepdim=True) - faces = mesh.faces_list()[0] - return Meshes(verts=[verts], faces=[faces]) diff --git a/pytorch3d/pytorch3d/utils/torus.py b/pytorch3d/pytorch3d/utils/torus.py deleted file mode 100644 index c5c34785832c2d580f343a264c8a8228bd0d5a44..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/utils/torus.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from itertools import tee -from math import cos, pi, sin -from typing import Iterator, Optional, Tuple - -import torch -from pytorch3d.structures.meshes import Meshes - - -# Make an iterator over the adjacent pairs: (-1, 0), (0, 1), ..., (N - 2, N - 1) -def _make_pair_range(N: int) -> Iterator[Tuple[int, int]]: - i, j = tee(range(-1, N)) - next(j, None) - return zip(i, j) - - -def torus( - r: float, R: float, sides: int, rings: int, device: Optional[torch.device] = None -) -> Meshes: - """ - Create vertices and faces for a torus. - - Args: - r: Inner radius of the torus. - R: Outer radius of the torus. - sides: Number of inner divisions. - rings: Number of outer divisions. - device: Device on which the outputs will be allocated. - - Returns: - Meshes object with the generated vertices and faces. - """ - if not (sides > 0): - raise ValueError("sides must be > 0.") - if not (rings > 0): - raise ValueError("rings must be > 0.") - device = device if device else torch.device("cpu") - - verts = [] - for i in range(rings): - # phi ranges from 0 to 2 pi (rings - 1) / rings - phi = 2 * pi * i / rings - for j in range(sides): - # theta ranges from 0 to 2 pi (sides - 1) / sides - theta = 2 * pi * j / sides - x = (R + r * cos(theta)) * cos(phi) - y = (R + r * cos(theta)) * sin(phi) - z = r * sin(theta) - # This vertex has index i * sides + j - verts.append([x, y, z]) - - faces = [] - for i0, i1 in _make_pair_range(rings): - index0 = (i0 % rings) * sides - index1 = (i1 % rings) * sides - for j0, j1 in _make_pair_range(sides): - index00 = index0 + (j0 % sides) - index01 = index0 + (j1 % sides) - index10 = index1 + (j0 % sides) - index11 = index1 + (j1 % sides) - faces.append([index00, index10, index11]) - faces.append([index11, index01, index00]) - - verts_list = [torch.tensor(verts, dtype=torch.float32, device=device)] - faces_list = [torch.tensor(faces, dtype=torch.int64, device=device)] - return Meshes(verts_list, faces_list) diff --git a/pytorch3d/pytorch3d/vis/__init__.py b/pytorch3d/pytorch3d/vis/__init__.py deleted file mode 100644 index 972cc5cebae7494281cc235b88df7c6e244d1cd0..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/vis/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import warnings - - -try: - from .plotly_vis import get_camera_wireframe, plot_batch_individually, plot_scene -except ModuleNotFoundError as err: - if "plotly" in str(err): - warnings.warn( - "Cannot import plotly-based visualization code." - " Please install plotly to enable (pip install plotly)." - ) - else: - raise - -from .texture_vis import texturesuv_image_matplotlib, texturesuv_image_PIL diff --git a/pytorch3d/pytorch3d/vis/plotly_vis.py b/pytorch3d/pytorch3d/vis/plotly_vis.py deleted file mode 100644 index 155e143d8ea9d761776c19fde7f77b5f1aeabb7d..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/vis/plotly_vis.py +++ /dev/null @@ -1,1048 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import warnings -from typing import Dict, List, NamedTuple, Optional, Tuple, Union - -import plotly.graph_objects as go -import torch -from plotly.subplots import make_subplots -from pytorch3d.renderer import ( - HeterogeneousRayBundle, - ray_bundle_to_ray_points, - RayBundle, - TexturesAtlas, - TexturesVertex, -) -from pytorch3d.renderer.camera_utils import camera_to_eye_at_up -from pytorch3d.renderer.cameras import CamerasBase -from pytorch3d.structures import join_meshes_as_scene, Meshes, Pointclouds - - -Struct = Union[CamerasBase, Meshes, Pointclouds, RayBundle, HeterogeneousRayBundle] - - -def _get_len(struct: Union[Struct, List[Struct]]) -> int: # pragma: no cover - """ - Returns the length (usually corresponds to the batch size) of the input structure. - """ - # pyre-ignore[6] - if not _is_ray_bundle(struct): - # pyre-ignore[6] - return len(struct) - if _is_heterogeneous_ray_bundle(struct): - # pyre-ignore[16] - return len(struct.camera_counts) - # pyre-ignore[16] - return len(struct.directions) - - -def _is_ray_bundle(struct: Struct) -> bool: - """ - Args: - struct: Struct object to test - Returns: - True if something is a RayBundle, HeterogeneousRayBundle or - ImplicitronRayBundle, else False - """ - return hasattr(struct, "directions") - - -def _is_heterogeneous_ray_bundle(struct: Union[List[Struct], Struct]) -> bool: - """ - Args: - struct :object to test - Returns: - True if something is a HeterogeneousRayBundle or ImplicitronRayBundle - and cant be reduced to RayBundle else False - """ - # pyre-ignore[16] - return hasattr(struct, "camera_counts") and struct.camera_counts is not None - - -def get_camera_wireframe(scale: float = 0.3): # pragma: no cover - """ - Returns a wireframe of a 3D line-plot of a camera symbol. - """ - a = 0.5 * torch.tensor([-2, 1.5, 4]) - up1 = 0.5 * torch.tensor([0, 1.5, 4]) - up2 = 0.5 * torch.tensor([0, 2, 4]) - b = 0.5 * torch.tensor([2, 1.5, 4]) - c = 0.5 * torch.tensor([-2, -1.5, 4]) - d = 0.5 * torch.tensor([2, -1.5, 4]) - C = torch.zeros(3) - F = torch.tensor([0, 0, 3]) - camera_points = [a, up1, up2, up1, b, d, c, a, C, b, d, C, c, C, F] - lines = torch.stack([x.float() for x in camera_points]) * scale - return lines - - -class AxisArgs(NamedTuple): # pragma: no cover - showgrid: bool = False - zeroline: bool = False - showline: bool = False - ticks: str = "" - showticklabels: bool = False - backgroundcolor: str = "#fff" - showaxeslabels: bool = False - - -class Lighting(NamedTuple): # pragma: no cover - ambient: float = 0.8 - diffuse: float = 1.0 - fresnel: float = 0.0 - specular: float = 0.0 - roughness: float = 0.5 - facenormalsepsilon: float = 1e-6 - vertexnormalsepsilon: float = 1e-12 - - -@torch.no_grad() -def plot_scene( - plots: Dict[str, Dict[str, Struct]], - *, - viewpoint_cameras: Optional[CamerasBase] = None, - ncols: int = 1, - camera_scale: float = 0.3, - pointcloud_max_points: int = 20000, - pointcloud_marker_size: int = 1, - raybundle_max_rays: int = 20000, - raybundle_max_points_per_ray: int = 1000, - raybundle_ray_point_marker_size: int = 1, - raybundle_ray_line_width: int = 1, - **kwargs, -): # pragma: no cover - """ - Main function to visualize Cameras, Meshes, Pointclouds, and RayBundle. - Plots input Cameras, Meshes, Pointclouds, and RayBundle data into named subplots, - with named traces based on the dictionary keys. Cameras are - rendered at the camera center location using a wireframe. - - Args: - plots: A dict containing subplot and trace names, - as well as the Meshes, Cameras and Pointclouds objects to be rendered. - See below for examples of the format. - viewpoint_cameras: an instance of a Cameras object providing a location - to view the plotly plot from. If the batch size is equal - to the number of subplots, it is a one to one mapping. - If the batch size is 1, then that viewpoint will be used - for all the subplots will be viewed from that point. - Otherwise, the viewpoint_cameras will not be used. - ncols: the number of subplots per row - camera_scale: determines the size of the wireframe used to render cameras. - pointcloud_max_points: the maximum number of points to plot from - a pointcloud. If more are present, a random sample of size - pointcloud_max_points is used. - pointcloud_marker_size: the size of the points rendered by plotly - when plotting a pointcloud. - raybundle_max_rays: maximum number of rays of a RayBundle to visualize. Randomly - subsamples without replacement in case the number of rays is bigger than max_rays. - raybundle_max_points_per_ray: the maximum number of points per ray in RayBundle - to visualize. If more are present, a random sample of size - max_points_per_ray is used. - raybundle_ray_point_marker_size: the size of the ray points of a plotted RayBundle - raybundle_ray_line_width: the width of the plotted rays of a RayBundle - **kwargs: Accepts lighting (a Lighting object) and any of the args xaxis, - yaxis and zaxis which Plotly's scene accepts. Accepts axis_args, - which is an AxisArgs object that is applied to all 3 axes. - Example settings for axis_args and lighting are given at the - top of this file. - - Example: - - ..code-block::python - - mesh = ... - point_cloud = ... - fig = plot_scene({ - "subplot_title": { - "mesh_trace_title": mesh, - "pointcloud_trace_title": point_cloud - } - }) - fig.show() - - The above example will render one subplot which has both a mesh and pointcloud. - - If the Meshes, Pointclouds, or Cameras objects are batched, then every object in that batch - will be plotted in a single trace. - - ..code-block::python - mesh = ... # batch size 2 - point_cloud = ... # batch size 2 - fig = plot_scene({ - "subplot_title": { - "mesh_trace_title": mesh, - "pointcloud_trace_title": point_cloud - } - }) - fig.show() - - The above example renders one subplot with 2 traces, each of which renders - both objects from their respective batched data. - - Multiple subplots follow the same pattern: - ..code-block::python - mesh = ... # batch size 2 - point_cloud = ... # batch size 2 - fig = plot_scene({ - "subplot1_title": { - "mesh_trace_title": mesh[0], - "pointcloud_trace_title": point_cloud[0] - }, - "subplot2_title": { - "mesh_trace_title": mesh[1], - "pointcloud_trace_title": point_cloud[1] - } - }, - ncols=2) # specify the number of subplots per row - fig.show() - - The above example will render two subplots, each containing a mesh - and a pointcloud. The ncols argument will render two subplots in one row - instead of having them vertically stacked because the default is one subplot - per row. - - To view plotly plots from a PyTorch3D camera's point of view, we can use - viewpoint_cameras: - ..code-block::python - mesh = ... # batch size 2 - R, T = look_at_view_transform(2.7, 0, [0, 180]) # 2 camera angles, front and back - # Any instance of CamerasBase works, here we use FoVPerspectiveCameras - cameras = FoVPerspectiveCameras(device=device, R=R, T=T) - fig = plot_scene({ - "subplot1_title": { - "mesh_trace_title": mesh[0] - }, - "subplot2_title": { - "mesh_trace_title": mesh[1] - } - }, - viewpoint_cameras=cameras) - fig.show() - - The above example will render the first subplot seen from the camera on the +z axis, - and the second subplot from the viewpoint of the camera on the -z axis. - - We can visualize these cameras as well: - ..code-block::python - mesh = ... - R, T = look_at_view_transform(2.7, 0, [0, 180]) # 2 camera angles, front and back - # Any instance of CamerasBase works, here we use FoVPerspectiveCameras - cameras = FoVPerspectiveCameras(device=device, R=R, T=T) - fig = plot_scene({ - "subplot1_title": { - "mesh_trace_title": mesh, - "cameras_trace_title": cameras, - }, - }) - fig.show() - - The above example will render one subplot with the mesh object - and two cameras. - - RayBundle visualization is also supproted: - ..code-block::python - cameras = PerspectiveCameras(...) - ray_bundle = RayBundle(origins=..., lengths=..., directions=..., xys=...) - fig = plot_scene({ - "subplot1_title": { - "ray_bundle_trace_title": ray_bundle, - "cameras_trace_title": cameras, - }, - }) - fig.show() - - For an example of using kwargs, see below: - ..code-block::python - mesh = ... - point_cloud = ... - fig = plot_scene({ - "subplot_title": { - "mesh_trace_title": mesh, - "pointcloud_trace_title": point_cloud - } - }, - axis_args=AxisArgs(backgroundcolor="rgb(200,230,200)")) # kwarg axis_args - fig.show() - - The above example will render each axis with the input background color. - - See the tutorials in pytorch3d/docs/tutorials for more examples - (namely rendered_color_points.ipynb and rendered_textured_meshes.ipynb). - """ - - subplots = list(plots.keys()) - fig = _gen_fig_with_subplots(len(subplots), ncols, subplots) - lighting = kwargs.get("lighting", Lighting())._asdict() - axis_args_dict = kwargs.get("axis_args", AxisArgs())._asdict() - - # Set axis arguments to defaults defined at the top of this file - x_settings = {**axis_args_dict} - y_settings = {**axis_args_dict} - z_settings = {**axis_args_dict} - - # Update the axes with any axis settings passed in as kwargs. - x_settings.update(**kwargs.get("xaxis", {})) - y_settings.update(**kwargs.get("yaxis", {})) - z_settings.update(**kwargs.get("zaxis", {})) - - camera = { - "up": { - "x": 0.0, - "y": 1.0, - "z": 0.0, - } # set the up vector to match PyTorch3D world coordinates conventions - } - viewpoints_eye_at_up_world = None - if viewpoint_cameras: - n_viewpoint_cameras = len(viewpoint_cameras) - if n_viewpoint_cameras == len(subplots) or n_viewpoint_cameras == 1: - # Calculate the vectors eye, at, up in world space - # to initialize the position of the camera in - # the plotly figure - viewpoints_eye_at_up_world = camera_to_eye_at_up( - viewpoint_cameras.get_world_to_view_transform().cpu() - ) - else: - msg = "Invalid number {} of viewpoint cameras were provided. Either 1 \ - or {} cameras are required".format( - len(viewpoint_cameras), len(subplots) - ) - warnings.warn(msg) - - for subplot_idx in range(len(subplots)): - subplot_name = subplots[subplot_idx] - traces = plots[subplot_name] - for trace_name, struct in traces.items(): - if isinstance(struct, Meshes): - _add_mesh_trace(fig, struct, trace_name, subplot_idx, ncols, lighting) - elif isinstance(struct, Pointclouds): - _add_pointcloud_trace( - fig, - struct, - trace_name, - subplot_idx, - ncols, - pointcloud_max_points, - pointcloud_marker_size, - ) - elif isinstance(struct, CamerasBase): - _add_camera_trace( - fig, struct, trace_name, subplot_idx, ncols, camera_scale - ) - elif _is_ray_bundle(struct): - _add_ray_bundle_trace( - fig, - struct, - trace_name, - subplot_idx, - ncols, - raybundle_max_rays, - raybundle_max_points_per_ray, - raybundle_ray_point_marker_size, - raybundle_ray_line_width, - ) - else: - raise ValueError( - "struct {} is not a Cameras, Meshes, Pointclouds,".format(struct) - + " , RayBundle or HeterogeneousRayBundle object." - ) - - # Ensure update for every subplot. - plot_scene = "scene" + str(subplot_idx + 1) - current_layout = fig["layout"][plot_scene] - xaxis = current_layout["xaxis"] - yaxis = current_layout["yaxis"] - zaxis = current_layout["zaxis"] - - # Update the axes with our above default and provided settings. - xaxis.update(**x_settings) - yaxis.update(**y_settings) - zaxis.update(**z_settings) - - # update camera viewpoint if provided - if viewpoints_eye_at_up_world is not None: - # Use camera params for batch index or the first camera if only one provided. - viewpoint_idx = min(n_viewpoint_cameras - 1, subplot_idx) - - eye, at, up = (i[viewpoint_idx] for i in viewpoints_eye_at_up_world) - eye_x, eye_y, eye_z = eye.tolist() - at_x, at_y, at_z = at.tolist() - up_x, up_y, up_z = up.tolist() - - # scale camera eye to plotly [-1, 1] ranges - x_range = xaxis["range"] - y_range = yaxis["range"] - z_range = zaxis["range"] - - eye_x = _scale_camera_to_bounds(eye_x, x_range, True) - eye_y = _scale_camera_to_bounds(eye_y, y_range, True) - eye_z = _scale_camera_to_bounds(eye_z, z_range, True) - - at_x = _scale_camera_to_bounds(at_x, x_range, True) - at_y = _scale_camera_to_bounds(at_y, y_range, True) - at_z = _scale_camera_to_bounds(at_z, z_range, True) - - up_x = _scale_camera_to_bounds(up_x, x_range, False) - up_y = _scale_camera_to_bounds(up_y, y_range, False) - up_z = _scale_camera_to_bounds(up_z, z_range, False) - - camera["eye"] = {"x": eye_x, "y": eye_y, "z": eye_z} - camera["center"] = {"x": at_x, "y": at_y, "z": at_z} - camera["up"] = {"x": up_x, "y": up_y, "z": up_z} - - current_layout.update( - { - "xaxis": xaxis, - "yaxis": yaxis, - "zaxis": zaxis, - "aspectmode": "cube", - "camera": camera, - } - ) - - return fig - - -@torch.no_grad() -def plot_batch_individually( - batched_structs: Union[ - List[Struct], - Struct, - ], - *, - viewpoint_cameras: Optional[CamerasBase] = None, - ncols: int = 1, - extend_struct: bool = True, - subplot_titles: Optional[List[str]] = None, - **kwargs, -): # pragma: no cover - """ - This is a higher level plotting function than plot_scene, for plotting - Cameras, Meshes, Pointclouds, and RayBundle in simple cases. The simplest use - is to plot a single Cameras, Meshes, Pointclouds, or a RayBundle object, - where you just pass it in as a one element list. This will plot each batch - element in a separate subplot. - - More generally, you can supply multiple Cameras, Meshes, Pointclouds, or RayBundle - having the same batch size `n`. In this case, there will be `n` subplots, - each depicting the corresponding batch element of all the inputs. - - In addition, you can include Cameras, Meshes, Pointclouds, or RayBundle of size 1 in - the input. These will either be rendered in the first subplot - (if extend_struct is False), or in every subplot. - RayBundle includes ImplicitronRayBundle and HeterogeneousRaybundle. - - Args: - batched_structs: a list of Cameras, Meshes, Pointclouds and RayBundle to be - rendered. Each structure's corresponding batch element will be plotted in a - single subplot, resulting in n subplots for a batch of size n. Every struct - should either have the same batch size or be of batch size 1. See extend_struct - and the description above for how batch size 1 structs are handled. Also accepts - a single Cameras, Meshes, Pointclouds, and RayBundle object, which will have - each individual element plotted in its own subplot. - viewpoint_cameras: an instance of a Cameras object providing a location - to view the plotly plot from. If the batch size is equal - to the number of subplots, it is a one to one mapping. - If the batch size is 1, then that viewpoint will be used - for all the subplots will be viewed from that point. - Otherwise, the viewpoint_cameras will not be used. - ncols: the number of subplots per row - extend_struct: if True, indicates that structs of batch size 1 - should be plotted in every subplot. - subplot_titles: strings to name each subplot - **kwargs: keyword arguments which are passed to plot_scene. - See plot_scene documentation for details. - - Example: - - ..code-block::python - - mesh = ... # mesh of batch size 2 - point_cloud = ... # point_cloud of batch size 2 - fig = plot_batch_individually([mesh, point_cloud], subplot_titles=["plot1", "plot2"]) - fig.show() - - # this is equivalent to the below figure - fig = plot_scene({ - "plot1": { - "trace1-1": mesh[0], - "trace1-2": point_cloud[0] - }, - "plot2":{ - "trace2-1": mesh[1], - "trace2-2": point_cloud[1] - } - }) - fig.show() - - The above example will render two subplots which each have both a mesh and pointcloud. - For more examples look at the pytorch3d tutorials at `pytorch3d/docs/tutorials`, - in particular the files rendered_color_points.ipynb and rendered_textured_meshes.ipynb. - """ - - # check that every batch is the same size or is size 1 - if _get_len(batched_structs) == 0: - msg = "No structs to plot" - warnings.warn(msg) - return - max_size = 0 - if isinstance(batched_structs, list): - max_size = max(_get_len(s) for s in batched_structs) - for struct in batched_structs: - struct_len = _get_len(struct) - if struct_len not in (1, max_size): - msg = "invalid batch size {} provided: {}".format(struct_len, struct) - raise ValueError(msg) - else: - max_size = _get_len(batched_structs) - - if max_size == 0: - msg = "No data is provided with at least one element" - raise ValueError(msg) - - if subplot_titles: - if len(subplot_titles) != max_size: - msg = "invalid number of subplot titles" - raise ValueError(msg) - - # if we are dealing with HeterogeneousRayBundle of ImplicitronRayBundle create - # first indexes for faster - first_idxs = None - if _is_heterogeneous_ray_bundle(batched_structs): - # pyre-ignore[16] - cumsum = batched_structs.camera_counts.cumsum(dim=0) - first_idxs = torch.cat((cumsum.new_zeros((1,)), cumsum)) - - scene_dictionary = {} - # construct the scene dictionary - for scene_num in range(max_size): - subplot_title = ( - subplot_titles[scene_num] - if subplot_titles - else "subplot " + str(scene_num + 1) - ) - scene_dictionary[subplot_title] = {} - - if isinstance(batched_structs, list): - for i, batched_struct in enumerate(batched_structs): - first_idxs = None - if _is_heterogeneous_ray_bundle(batched_structs[i]): - # pyre-ignore[16] - cumsum = batched_struct.camera_counts.cumsum(dim=0) - first_idxs = torch.cat((cumsum.new_zeros((1,)), cumsum)) - # check for whether this struct needs to be extended - batched_struct_len = _get_len(batched_struct) - if i >= batched_struct_len and not extend_struct: - continue - _add_struct_from_batch( - batched_struct, - scene_num, - subplot_title, - scene_dictionary, - i + 1, - first_idxs=first_idxs, - ) - else: # batched_structs is a single struct - _add_struct_from_batch( - batched_structs, - scene_num, - subplot_title, - scene_dictionary, - first_idxs=first_idxs, - ) - - return plot_scene( - scene_dictionary, viewpoint_cameras=viewpoint_cameras, ncols=ncols, **kwargs - ) - - -def _add_struct_from_batch( - batched_struct: Struct, - scene_num: int, - subplot_title: str, - scene_dictionary: Dict[str, Dict[str, Struct]], - trace_idx: int = 1, - first_idxs: Optional[torch.Tensor] = None, -) -> None: # pragma: no cover - """ - Adds the struct corresponding to the given scene_num index to - a provided scene_dictionary to be passed in to plot_scene - - Args: - batched_struct: the batched data structure to add to the dict - scene_num: the subplot from plot_batch_individually which this struct - should be added to - subplot_title: the title of the subplot - scene_dictionary: the dictionary to add the indexed struct to - trace_idx: the trace number, starting at 1 for this struct's trace - """ - struct = None - if isinstance(batched_struct, CamerasBase): - # we can't index directly into camera batches - R, T = batched_struct.R, batched_struct.T - r_idx = min(scene_num, len(R) - 1) - t_idx = min(scene_num, len(T) - 1) - R = R[r_idx].unsqueeze(0) - T = T[t_idx].unsqueeze(0) - struct = CamerasBase(device=batched_struct.device, R=R, T=T) - elif _is_ray_bundle(batched_struct) and not _is_heterogeneous_ray_bundle( - batched_struct - ): - # for RayBundle we treat the camera count as the batch index - struct_idx = min(scene_num, _get_len(batched_struct) - 1) - - struct = RayBundle( - **{ - attr: getattr(batched_struct, attr)[struct_idx] - for attr in ["origins", "directions", "lengths", "xys"] - } - ) - elif _is_heterogeneous_ray_bundle(batched_struct): - # for RayBundle we treat the camera count as the batch index - struct_idx = min(scene_num, _get_len(batched_struct) - 1) - - struct = RayBundle( - **{ - attr: getattr(batched_struct, attr)[ - # pyre-ignore[16] - first_idxs[struct_idx] : first_idxs[struct_idx + 1] - ] - for attr in ["origins", "directions", "lengths", "xys"] - } - ) - - else: # batched meshes and pointclouds are indexable - struct_idx = min(scene_num, _get_len(batched_struct) - 1) - # pyre-ignore[16] - struct = batched_struct[struct_idx] - trace_name = "trace{}-{}".format(scene_num + 1, trace_idx) - scene_dictionary[subplot_title][trace_name] = struct - - -def _add_mesh_trace( - fig: go.Figure, # pyre-ignore[11] - meshes: Meshes, - trace_name: str, - subplot_idx: int, - ncols: int, - lighting: Lighting, -) -> None: # pragma: no cover - """ - Adds a trace rendering a Meshes object to the passed in figure, with - a given name and in a specific subplot. - - Args: - fig: plotly figure to add the trace within. - meshes: Meshes object to render. It can be batched. - trace_name: name to label the trace with. - subplot_idx: identifies the subplot, with 0 being the top left. - ncols: the number of subplots per row. - lighting: a Lighting object that specifies the Mesh3D lighting. - """ - - mesh = join_meshes_as_scene(meshes) - mesh = mesh.detach().cpu() - verts = mesh.verts_packed() - faces = mesh.faces_packed() - # If mesh has vertex colors or face colors, use them - # for figure, otherwise use plotly's default colors. - verts_rgb = None - faces_rgb = None - if isinstance(mesh.textures, TexturesVertex): - verts_rgb = mesh.textures.verts_features_packed() - verts_rgb.clamp_(min=0.0, max=1.0) - verts_rgb = torch.tensor(255.0) * verts_rgb - if isinstance(mesh.textures, TexturesAtlas): - atlas = mesh.textures.atlas_packed() - # If K==1 - if atlas.shape[1] == 1 and atlas.shape[3] == 3: - faces_rgb = atlas[:, 0, 0] - - # Reposition the unused vertices to be "inside" the object - # (i.e. they won't be visible in the plot). - verts_used = torch.zeros((verts.shape[0],), dtype=torch.bool) - verts_used[torch.unique(faces)] = True - verts_center = verts[verts_used].mean(0) - verts[~verts_used] = verts_center - - row, col = subplot_idx // ncols + 1, subplot_idx % ncols + 1 - fig.add_trace( - go.Mesh3d( - x=verts[:, 0], - y=verts[:, 1], - z=verts[:, 2], - vertexcolor=verts_rgb, - facecolor=faces_rgb, - i=faces[:, 0], - j=faces[:, 1], - k=faces[:, 2], - lighting=lighting, - name=trace_name, - ), - row=row, - col=col, - ) - - # Access the current subplot's scene configuration - plot_scene = "scene" + str(subplot_idx + 1) - current_layout = fig["layout"][plot_scene] - - # update the bounds of the axes for the current trace - max_expand = (verts.max(0)[0] - verts.min(0)[0]).max() - _update_axes_bounds(verts_center, max_expand, current_layout) - - -def _add_pointcloud_trace( - fig: go.Figure, - pointclouds: Pointclouds, - trace_name: str, - subplot_idx: int, - ncols: int, - max_points_per_pointcloud: int, - marker_size: int, -) -> None: # pragma: no cover - """ - Adds a trace rendering a Pointclouds object to the passed in figure, with - a given name and in a specific subplot. - - Args: - fig: plotly figure to add the trace within. - pointclouds: Pointclouds object to render. It can be batched. - trace_name: name to label the trace with. - subplot_idx: identifies the subplot, with 0 being the top left. - ncols: the number of subplots per row. - max_points_per_pointcloud: the number of points to render, which are randomly sampled. - marker_size: the size of the rendered points - """ - pointclouds = pointclouds.detach().cpu().subsample(max_points_per_pointcloud) - verts = pointclouds.points_packed() - features = pointclouds.features_packed() - - color = None - if features is not None: - if features.shape[1] == 4: # rgba - template = "rgb(%d, %d, %d, %f)" - rgb = (features[:, :3].clamp(0.0, 1.0) * 255).int() - color = [template % (*rgb_, a_) for rgb_, a_ in zip(rgb, features[:, 3])] - - if features.shape[1] == 3: - template = "rgb(%d, %d, %d)" - rgb = (features.clamp(0.0, 1.0) * 255).int() - color = [template % (r, g, b) for r, g, b in rgb] - - row = subplot_idx // ncols + 1 - col = subplot_idx % ncols + 1 - fig.add_trace( - go.Scatter3d( - x=verts[:, 0], - y=verts[:, 1], - z=verts[:, 2], - marker={"color": color, "size": marker_size}, - mode="markers", - name=trace_name, - ), - row=row, - col=col, - ) - - # Access the current subplot's scene configuration - plot_scene = "scene" + str(subplot_idx + 1) - current_layout = fig["layout"][plot_scene] - - # update the bounds of the axes for the current trace - verts_center = verts.mean(0) - max_expand = (verts.max(0)[0] - verts.min(0)[0]).max() - _update_axes_bounds(verts_center, max_expand, current_layout) - - -def _add_camera_trace( - fig: go.Figure, - cameras: CamerasBase, - trace_name: str, - subplot_idx: int, - ncols: int, - camera_scale: float, -) -> None: # pragma: no cover - """ - Adds a trace rendering a Cameras object to the passed in figure, with - a given name and in a specific subplot. - - Args: - fig: plotly figure to add the trace within. - cameras: the Cameras object to render. It can be batched. - trace_name: name to label the trace with. - subplot_idx: identifies the subplot, with 0 being the top left. - ncols: the number of subplots per row. - camera_scale: the size of the wireframe used to render the Cameras object. - """ - cam_wires = get_camera_wireframe(camera_scale).to(cameras.device) - cam_trans = cameras.get_world_to_view_transform().inverse() - cam_wires_trans = cam_trans.transform_points(cam_wires).detach().cpu() - # if batch size is 1, unsqueeze to add dimension - if len(cam_wires_trans.shape) < 3: - cam_wires_trans = cam_wires_trans.unsqueeze(0) - - nan_tensor = torch.Tensor([[float("NaN")] * 3]) - all_cam_wires = cam_wires_trans[0] - for wire in cam_wires_trans[1:]: - # We combine camera points into a single tensor to plot them in a - # single trace. The NaNs are inserted between sets of camera - # points so that the lines drawn by Plotly are not drawn between - # points that belong to different cameras. - all_cam_wires = torch.cat((all_cam_wires, nan_tensor, wire)) - x, y, z = all_cam_wires.detach().cpu().numpy().T.astype(float) - - row, col = subplot_idx // ncols + 1, subplot_idx % ncols + 1 - fig.add_trace( - go.Scatter3d(x=x, y=y, z=z, marker={"size": 1}, name=trace_name), - row=row, - col=col, - ) - - # Access the current subplot's scene configuration - plot_scene = "scene" + str(subplot_idx + 1) - current_layout = fig["layout"][plot_scene] - - # flatten for bounds calculations - flattened_wires = cam_wires_trans.flatten(0, 1) - verts_center = flattened_wires.mean(0) - max_expand = (flattened_wires.max(0)[0] - flattened_wires.min(0)[0]).max() - _update_axes_bounds(verts_center, max_expand, current_layout) - - -def _add_ray_bundle_trace( - fig: go.Figure, - ray_bundle: Union[RayBundle, HeterogeneousRayBundle], - trace_name: str, - subplot_idx: int, - ncols: int, - max_rays: int, - max_points_per_ray: int, - marker_size: int, - line_width: int, -) -> None: # pragma: no cover - """ - Adds a trace rendering a ray bundle object - to the passed in figure, with a given name and in a specific subplot. - - Args: - fig: plotly figure to add the trace within. - ray_bundle: the RayBundle, ImplicitronRayBundle or HeterogeneousRaybundle to render. - It can be batched. - trace_name: name to label the trace with. - subplot_idx: identifies the subplot, with 0 being the top left. - ncols: the number of subplots per row. - max_rays: maximum number of plotted rays in total. Randomly subsamples - without replacement in case the number of rays is bigger than max_rays. - max_points_per_ray: maximum number of points plotted per ray. - marker_size: the size of the ray point markers. - line_width: the width of the ray lines. - """ - - n_pts_per_ray = ray_bundle.lengths.shape[-1] - n_rays = ray_bundle.lengths.shape[:-1].numel() - - # flatten all batches of rays into a single big bundle - ray_bundle_flat = RayBundle( - **{ - attr: torch.flatten(getattr(ray_bundle, attr), start_dim=0, end_dim=-2) - for attr in ["origins", "directions", "lengths", "xys"] - } - ) - - # subsample the rays (if needed) - if n_rays > max_rays: - indices_rays = torch.randperm(n_rays)[:max_rays] - ray_bundle_flat = RayBundle( - **{ - attr: getattr(ray_bundle_flat, attr)[indices_rays] - for attr in ["origins", "directions", "lengths", "xys"] - } - ) - - # make ray line endpoints - min_max_ray_depth = torch.stack( - [ - ray_bundle_flat.lengths.min(dim=1).values, - ray_bundle_flat.lengths.max(dim=1).values, - ], - dim=-1, - ) - ray_lines_endpoints = ray_bundle_to_ray_points( - ray_bundle_flat._replace(lengths=min_max_ray_depth) - ) - - # make the ray lines for plotly plotting - nan_tensor = torch.tensor( - [[float("NaN")] * 3], - device=ray_lines_endpoints.device, - dtype=ray_lines_endpoints.dtype, - ) - ray_lines = torch.empty(size=(1, 3), device=ray_lines_endpoints.device) - for ray_line in ray_lines_endpoints: - # We combine the ray lines into a single tensor to plot them in a - # single trace. The NaNs are inserted between sets of ray lines - # so that the lines drawn by Plotly are not drawn between - # lines that belong to different rays. - ray_lines = torch.cat((ray_lines, nan_tensor, ray_line)) - x, y, z = ray_lines.detach().cpu().numpy().T.astype(float) - row, col = subplot_idx // ncols + 1, subplot_idx % ncols + 1 - fig.add_trace( - go.Scatter3d( - x=x, - y=y, - z=z, - marker={"size": 0.1}, - line={"width": line_width}, - name=trace_name, - ), - row=row, - col=col, - ) - - # subsample the ray points (if needed) - if n_pts_per_ray > max_points_per_ray: - indices_ray_pts = torch.cat( - [ - torch.randperm(n_pts_per_ray)[:max_points_per_ray] + ri * n_pts_per_ray - for ri in range(ray_bundle_flat.lengths.shape[0]) - ] - ) - ray_bundle_flat = ray_bundle_flat._replace( - lengths=ray_bundle_flat.lengths.reshape(-1)[indices_ray_pts].reshape( - ray_bundle_flat.lengths.shape[0], -1 - ) - ) - - # plot the ray points - ray_points = ( - ray_bundle_to_ray_points(ray_bundle_flat) - .view(-1, 3) - .detach() - .cpu() - .numpy() - .astype(float) - ) - fig.add_trace( - go.Scatter3d( - x=ray_points[:, 0], - y=ray_points[:, 1], - z=ray_points[:, 2], - mode="markers", - name=trace_name + "_points", - marker={"size": marker_size}, - ), - row=row, - col=col, - ) - - # Access the current subplot's scene configuration - plot_scene = "scene" + str(subplot_idx + 1) - current_layout = fig["layout"][plot_scene] - - # update the bounds of the axes for the current trace - all_ray_points = ray_bundle_to_ray_points(ray_bundle).reshape(-1, 3) - ray_points_center = all_ray_points.mean(dim=0) - max_expand = (all_ray_points.max(0)[0] - all_ray_points.min(0)[0]).max().item() - _update_axes_bounds(ray_points_center, float(max_expand), current_layout) - - -def _gen_fig_with_subplots( - batch_size: int, ncols: int, subplot_titles: List[str] -): # pragma: no cover - """ - Takes in the number of objects to be plotted and generate a plotly figure - with the appropriate number and orientation of titled subplots. - Args: - batch_size: the number of elements in the batch of objects to be visualized. - ncols: number of subplots in the same row. - subplot_titles: titles for the subplot(s). list of strings of length batch_size. - - Returns: - Plotly figure with ncols subplots per row, and batch_size subplots. - """ - fig_rows = batch_size // ncols - if batch_size % ncols != 0: - fig_rows += 1 # allow for non-uniform rows - fig_cols = ncols - fig_type = [{"type": "scene"}] - specs = [fig_type * fig_cols] * fig_rows - # subplot_titles must have one title per subplot - fig = make_subplots( - rows=fig_rows, - cols=fig_cols, - specs=specs, - subplot_titles=subplot_titles, - column_widths=[1.0] * fig_cols, - ) - return fig - - -def _update_axes_bounds( - verts_center: torch.Tensor, - max_expand: float, - current_layout: go.Scene, # pyre-ignore[11] -) -> None: # pragma: no cover - """ - Takes in the vertices' center point and max spread, and the current plotly figure - layout and updates the layout to have bounds that include all traces for that subplot. - Args: - verts_center: tensor of size (3) corresponding to a trace's vertices' center point. - max_expand: the maximum spread in any dimension of the trace's vertices. - current_layout: the plotly figure layout scene corresponding to the referenced trace. - """ - verts_center = verts_center.detach().cpu() - verts_min = verts_center - max_expand - verts_max = verts_center + max_expand - bounds = torch.t(torch.stack((verts_min, verts_max))) - - # Ensure that within a subplot, the bounds capture all traces - old_xrange, old_yrange, old_zrange = ( - current_layout["xaxis"]["range"], - current_layout["yaxis"]["range"], - current_layout["zaxis"]["range"], - ) - x_range, y_range, z_range = bounds - if old_xrange is not None: - x_range[0] = min(x_range[0], old_xrange[0]) - x_range[1] = max(x_range[1], old_xrange[1]) - if old_yrange is not None: - y_range[0] = min(y_range[0], old_yrange[0]) - y_range[1] = max(y_range[1], old_yrange[1]) - if old_zrange is not None: - z_range[0] = min(z_range[0], old_zrange[0]) - z_range[1] = max(z_range[1], old_zrange[1]) - - xaxis = {"range": x_range} - yaxis = {"range": y_range} - zaxis = {"range": z_range} - current_layout.update({"xaxis": xaxis, "yaxis": yaxis, "zaxis": zaxis}) - - -def _scale_camera_to_bounds( - coordinate: float, axis_bounds: Tuple[float, float], is_position: bool -) -> float: # pragma: no cover - """ - We set our plotly plot's axes' bounding box to [-1,1]x[-1,1]x[-1,1]. As such, - the plotly camera location has to be scaled accordingly to have its world coordinates - correspond to its relative plotted coordinates for viewing the plotly plot. - This function does the scaling and offset to transform the coordinates. - - Args: - coordinate: the float value to be transformed - axis_bounds: the bounds of the plotly plot for the axis which - the coordinate argument refers to - is_position: If true, the float value is the coordinate of a position, and so must - be moved in to [-1,1]. Otherwise it is a component of a direction, and so needs only - to be scaled. - """ - scale = (axis_bounds[1] - axis_bounds[0]) / 2 - if not is_position: - return coordinate / scale - offset = (axis_bounds[1] / scale) - 1 - return coordinate / scale - offset diff --git a/pytorch3d/pytorch3d/vis/texture_vis.py b/pytorch3d/pytorch3d/vis/texture_vis.py deleted file mode 100644 index 0d36abb6f154a7e383fa2ab887aade5a2ca6c9da..0000000000000000000000000000000000000000 --- a/pytorch3d/pytorch3d/vis/texture_vis.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import Any, Optional - -import numpy as np -from PIL import Image, ImageDraw -from pytorch3d.renderer.mesh import TexturesUV - - -def texturesuv_image_matplotlib( - texture: TexturesUV, - *, - texture_index: int = 0, - radius: float = 1, - color=(1.0, 0.0, 0.0), - subsample: Optional[int] = 10000, - origin: str = "upper", -) -> None: # pragma: no cover - """ - Plot the texture image for one element of a TexturesUV with - matplotlib together with verts_uvs positions circled. - In particular a value in verts_uvs which is never referenced - in faces_uvs will still be plotted. - This is for debugging purposes, e.g. to align the map with - the uv coordinates. In particular, matplotlib - is used which is not an official dependency of PyTorch3D. - - Args: - texture: a TexturesUV object with one mesh - texture_index: index in the batch to plot - radius: plotted circle radius in pixels - color: any matplotlib-understood color for the circles. - subsample: if not None, number of points to plot. - Otherwise all points are plotted. - origin: "upper" or "lower" like matplotlib.imshow . - upper (the default) matches texturesuv_image_PIL. - """ - - import matplotlib.pyplot as plt - from matplotlib.patches import Circle - - texture_image = texture.maps_padded() - centers = texture.centers_for_image(index=texture_index).numpy() - - ax = plt.gca() - ax.imshow(texture_image[texture_index].detach().cpu().numpy(), origin=origin) - - n_points = centers.shape[0] - if subsample is None or n_points <= subsample: - indices = range(n_points) - else: - indices = np.random.choice(n_points, subsample, replace=False) - for i in indices: - # setting clip_on=False makes it obvious when - # we have UV coordinates outside the correct range - ax.add_patch(Circle(centers[i], radius, color=color, clip_on=False)) - - -def texturesuv_image_PIL( - texture: TexturesUV, - *, - texture_index: int = 0, - radius: float = 1, - color: Any = "red", - subsample: Optional[int] = 10000, -): # pragma: no cover - """ - Return a PIL image of the texture image of one element of the batch - from a TexturesUV, together with the verts_uvs positions circled. - In particular a value in verts_uvs which is never referenced - in faces_uvs will still be plotted. - This is for debugging purposes, e.g. to align the map with - the uv coordinates. In particular, matplotlib - is used which is not an official dependency of PyTorch3D. - - Args: - texture: a TexturesUV object with one mesh - texture_index: index in the batch to plot - radius: plotted circle radius in pixels - color: any PIL-understood color for the circles. - subsample: if not None, number of points to plot. - Otherwise all points are plotted. - - Returns: - PIL Image object. - """ - - centers = texture.centers_for_image(index=texture_index).numpy() - texture_image = texture.maps_padded() - texture_array = (texture_image[texture_index] * 255).cpu().numpy().astype(np.uint8) - - image = Image.fromarray(texture_array) - draw = ImageDraw.Draw(image) - - n_points = centers.shape[0] - if subsample is None or n_points <= subsample: - indices = range(n_points) - else: - indices = np.random.choice(n_points, subsample, replace=False) - - for i in indices: - x = centers[i][0] - y = centers[i][1] - draw.ellipse([(x - radius, y - radius), (x + radius, y + radius)], fill=color) - - return image diff --git a/pytorch3d/scripts/build_website.sh b/pytorch3d/scripts/build_website.sh deleted file mode 100644 index 2fc2db9ad112ebc0f68d6db7528042e1e0978d52..0000000000000000000000000000000000000000 --- a/pytorch3d/scripts/build_website.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# run this script from the project root using `./scripts/build_docs.sh` - -set -e - -usage() { - echo "Usage: $0 [-b]" - echo "" - echo "Build PyTorch3D documentation." - echo "" - echo " -b Build static version of documentation (otherwise start server)" - echo "" - exit 1 -} - -BUILD_STATIC=false - -while getopts 'hb' flag; do - case "${flag}" in - h) - usage - ;; - b) - BUILD_STATIC=true - ;; - *) - usage - ;; - esac -done - - -echo "-----------------------------------" -echo "Building PyTorch3D Docusaurus site" -echo "-----------------------------------" -cd website -yarn -cd .. - -echo "-----------------------------------" -echo "Generating tutorials" -echo "-----------------------------------" -cwd=$(pwd) -mkdir -p "website/_tutorials" -mkdir -p "website/static/files" -python scripts/parse_tutorials.py --repo_dir "${cwd}" - -cd website - -if [[ $BUILD_STATIC == true ]]; then - echo "-----------------------------------" - echo "Building static site" - echo "-----------------------------------" - yarn build -else - echo "-----------------------------------" - echo "Starting local server" - echo "-----------------------------------" - yarn start -fi diff --git a/pytorch3d/scripts/parse_tutorials.py b/pytorch3d/scripts/parse_tutorials.py deleted file mode 100644 index a7c7cd267b00c4267db9d719eda1c07b8c3f6694..0000000000000000000000000000000000000000 --- a/pytorch3d/scripts/parse_tutorials.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import argparse -import json -import os - -import nbformat -from bs4 import BeautifulSoup -from nbconvert import HTMLExporter, ScriptExporter - - -TEMPLATE = """const CWD = process.cwd(); - -const React = require('react'); -const Tutorial = require(`${{CWD}}/core/Tutorial.js`); - -class TutorialPage extends React.Component {{ - render() {{ - const {{config: siteConfig}} = this.props; - const {{baseUrl}} = siteConfig; - return ; - }} -}} - -module.exports = TutorialPage; - -""" - -JS_SCRIPTS = """ - - -""" # noqa: E501 - - -def gen_tutorials(repo_dir: str) -> None: - """Generate HTML tutorials for PyTorch3D Docusaurus site from Jupyter notebooks. - - Also create ipynb and py versions of tutorial in Docusaurus site for - download. - """ - with open(os.path.join(repo_dir, "website", "tutorials.json"), "r") as infile: - tutorial_config = json.loads(infile.read()) - - tutorial_ids = {x["id"] for v in tutorial_config.values() for x in v} - - for tid in tutorial_ids: - print("Generating {} tutorial".format(tid)) - - # convert notebook to HTML - ipynb_in_path = os.path.join( - repo_dir, "docs", "tutorials", "{}.ipynb".format(tid) - ) - with open(ipynb_in_path, "r") as infile: - nb_str = infile.read() - nb = nbformat.reads(nb_str, nbformat.NO_CONVERT) - - # displayname is absent from notebook metadata - nb["metadata"]["kernelspec"]["display_name"] = "python3" - - exporter = HTMLExporter() - html, meta = exporter.from_notebook_node(nb) - - # pull out html div for notebook - soup = BeautifulSoup(html, "html.parser") - nb_meat = soup.find("div", {"id": "notebook-container"}) - del nb_meat.attrs["id"] - nb_meat.attrs["class"] = ["notebook"] - html_out = JS_SCRIPTS + str(nb_meat) - - # generate html file - html_out_path = os.path.join( - repo_dir, "website", "_tutorials", "{}.html".format(tid) - ) - with open(html_out_path, "w") as html_outfile: - html_outfile.write(html_out) - - # generate JS file - script = TEMPLATE.format(tid) - js_out_path = os.path.join( - repo_dir, "website", "pages", "tutorials", "{}.js".format(tid) - ) - with open(js_out_path, "w") as js_outfile: - js_outfile.write(script) - - # output tutorial in both ipynb & py form - ipynb_out_path = os.path.join( - repo_dir, "website", "static", "files", "{}.ipynb".format(tid) - ) - with open(ipynb_out_path, "w") as ipynb_outfile: - ipynb_outfile.write(nb_str) - exporter = ScriptExporter() - script, meta = exporter.from_notebook_node(nb) - py_out_path = os.path.join( - repo_dir, "website", "static", "files", "{}.py".format(tid) - ) - with open(py_out_path, "w") as py_outfile: - py_outfile.write(script) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Generate JS, HTML, ipynb, and py files for tutorials." - ) - parser.add_argument( - "--repo_dir", metavar="path", required=True, help="PyTorch3D repo directory." - ) - args = parser.parse_args() - gen_tutorials(args.repo_dir) diff --git a/pytorch3d/scripts/publish_website.sh b/pytorch3d/scripts/publish_website.sh deleted file mode 100644 index 604875245965948db603e27dae4bbe95d629e0a1..0000000000000000000000000000000000000000 --- a/pytorch3d/scripts/publish_website.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -# Instructions, assuming you are on a fresh pytorch3d checkout on a local -# drive. - -# (1) Have a separate checkout of pytorch3d at the head of the gh-pages branch -# on a local drive. Set the variable GHP to its full path. -# Any uncommitted changes there will be obliterated. -# For example -# GHP=/path/to/pytorch3d-gh-pages -# git clone -b gh-pages https://github.com/facebookresearch/pytorch3d $GHP - -# (2) Run this script in this directory with -# sudo docker run -it --rm -v $PWD/..:/loc -v $GHP:/ghp continuumio/miniconda3 bash --login /loc/scripts/publish_website.sh - -# (3) Choose a commit message, commit and push: -# cd $GHP && git add . -# git commit -m 'Update latest version of site' -# git push - -set -e - -conda create -y -n myenv python=3.7 nodejs - -# Note: Using bash --login together with the continuumio/miniconda3 image -# is what lets conda activate work so smoothly. - -conda activate myenv -pip install nbformat==4.4.0 nbconvert==5.3.1 ipywidgets==7.5.1 tornado==4.2 bs4 notebook==5.7.12 'mistune<2' -npm install --global yarn - -cd /loc -bash scripts/build_website.sh -b - -rm -rf /ghp/* -echo "pytorch3d.org" > /ghp/CNAME -mv /loc/website/build/pytorch3d/* /ghp/ diff --git a/pytorch3d/setup.cfg b/pytorch3d/setup.cfg deleted file mode 100644 index 8e48cc2dd614b4cb7370e8312de9329cdb128abc..0000000000000000000000000000000000000000 --- a/pytorch3d/setup.cfg +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -[isort] -line_length = 88 -multi_line_output = 3 -include_trailing_comma = True -force_grid_warp = 0 -default_section = THIRDPARTY -lines_after_imports = 2 -combine_as_imports = True diff --git a/pytorch3d/setup.py b/pytorch3d/setup.py deleted file mode 100644 index a5aecf71d3b5ec51163355540db1c056b68fa5aa..0000000000000000000000000000000000000000 --- a/pytorch3d/setup.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import glob -import os -import runpy -import sys -import warnings -from typing import List, Optional - -import torch -from setuptools import find_packages, setup -from torch.utils.cpp_extension import CppExtension, CUDA_HOME, CUDAExtension - - -def get_existing_ccbin(nvcc_args: List[str]) -> Optional[str]: - """ - Given a list of nvcc arguments, return the compiler if specified. - - Note from CUDA doc: Single value options and list options must have - arguments, which must follow the name of the option itself by either - one of more spaces or an equals character. - """ - last_arg = None - for arg in reversed(nvcc_args): - if arg == "-ccbin": - return last_arg - if arg.startswith("-ccbin="): - return arg[7:] - last_arg = arg - return None - - -def get_extensions(): - no_extension = os.getenv("PYTORCH3D_NO_EXTENSION", "0") == "1" - if no_extension: - msg = "SKIPPING EXTENSION BUILD. PYTORCH3D WILL NOT WORK!" - print(msg, file=sys.stderr) - warnings.warn(msg) - return [] - - this_dir = os.path.dirname(os.path.abspath(__file__)) - extensions_dir = os.path.join(this_dir, "pytorch3d", "csrc") - sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"), recursive=True) - source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu"), recursive=True) - extension = CppExtension - - extra_compile_args = {"cxx": ["-std=c++17"]} - define_macros = [] - include_dirs = [extensions_dir] - - force_cuda = os.getenv("FORCE_CUDA", "0") == "1" - force_no_cuda = os.getenv("PYTORCH3D_FORCE_NO_CUDA", "0") == "1" - if ( - not force_no_cuda and torch.cuda.is_available() and CUDA_HOME is not None - ) or force_cuda: - extension = CUDAExtension - sources += source_cuda - define_macros += [("WITH_CUDA", None)] - # Thrust is only used for its tuple objects. - # With CUDA 11.0 we can't use the cudatoolkit's version of cub. - # We take the risk that CUB and Thrust are incompatible, because - # we aren't using parts of Thrust which actually use CUB. - define_macros += [("THRUST_IGNORE_CUB_VERSION_CHECK", None)] - cub_home = os.environ.get("CUB_HOME", None) - nvcc_args = [ - "-DCUDA_HAS_FP16=1", - "-D__CUDA_NO_HALF_OPERATORS__", - "-D__CUDA_NO_HALF_CONVERSIONS__", - "-D__CUDA_NO_HALF2_OPERATORS__", - ] - if os.name != "nt": - nvcc_args.append("-std=c++17") - if cub_home is None: - prefix = os.environ.get("CONDA_PREFIX", None) - if prefix is not None and os.path.isdir(prefix + "/include/cub"): - cub_home = prefix + "/include" - - if cub_home is None: - warnings.warn( - "The environment variable `CUB_HOME` was not found. " - "NVIDIA CUB is required for compilation and can be downloaded " - "from `https://github.com/NVIDIA/cub/releases`. You can unpack " - "it to a location of your choice and set the environment variable " - "`CUB_HOME` to the folder containing the `CMakeListst.txt` file." - ) - else: - include_dirs.append(os.path.realpath(cub_home).replace("\\ ", " ")) - nvcc_flags_env = os.getenv("NVCC_FLAGS", "") - if nvcc_flags_env != "": - nvcc_args.extend(nvcc_flags_env.split(" ")) - - # This is needed for pytorch 1.6 and earlier. See e.g. - # https://github.com/facebookresearch/pytorch3d/issues/436 - # It is harmless after https://github.com/pytorch/pytorch/pull/47404 . - # But it can be problematic in torch 1.7.0 and 1.7.1 - if torch.__version__[:4] != "1.7.": - CC = os.environ.get("CC", None) - if CC is not None: - existing_CC = get_existing_ccbin(nvcc_args) - if existing_CC is None: - CC_arg = "-ccbin={}".format(CC) - nvcc_args.append(CC_arg) - elif existing_CC != CC: - msg = f"Inconsistent ccbins: {CC} and {existing_CC}" - raise ValueError(msg) - - extra_compile_args["nvcc"] = nvcc_args - - sources = [os.path.join(extensions_dir, s) for s in sources] - - ext_modules = [ - extension( - "pytorch3d._C", - sources, - include_dirs=include_dirs, - define_macros=define_macros, - extra_compile_args=extra_compile_args, - ) - ] - - return ext_modules - - -# Retrieve __version__ from the package. -__version__ = runpy.run_path("pytorch3d/__init__.py")["__version__"] - - -if os.getenv("PYTORCH3D_NO_NINJA", "0") == "1": - - class BuildExtension(torch.utils.cpp_extension.BuildExtension): - def __init__(self, *args, **kwargs): - super().__init__(use_ninja=False, *args, **kwargs) - -else: - BuildExtension = torch.utils.cpp_extension.BuildExtension - -trainer = "pytorch3d.implicitron_trainer" - -setup( - name="pytorch3d", - version=__version__, - author="FAIR", - url="https://github.com/facebookresearch/pytorch3d", - description="PyTorch3D is FAIR's library of reusable components " - "for deep Learning with 3D data.", - packages=find_packages( - exclude=("configs", "tests", "tests.*", "docs.*", "projects.*") - ) - + [trainer], - package_dir={trainer: "projects/implicitron_trainer"}, - install_requires=["fvcore", "iopath"], - extras_require={ - "all": ["matplotlib", "tqdm>4.29.0", "imageio", "ipywidgets"], - "dev": ["flake8", "usort"], - "implicitron": [ - "hydra-core>=1.1", - "visdom", - "lpips", - "tqdm>4.29.0", - "matplotlib", - "accelerate", - "sqlalchemy>=2.0", - ], - }, - entry_points={ - "console_scripts": [ - f"pytorch3d_implicitron_runner={trainer}.experiment:experiment", - f"pytorch3d_implicitron_visualizer={trainer}.visualize_reconstruction:main", - ] - }, - ext_modules=get_extensions(), - cmdclass={"build_ext": BuildExtension}, - package_data={ - "": ["*.json"], - }, -) diff --git a/pytorch3d/website/.dockerignore b/pytorch3d/website/.dockerignore deleted file mode 100644 index 27d2dae2b493488b48bdb18b95af471821ece9bf..0000000000000000000000000000000000000000 --- a/pytorch3d/website/.dockerignore +++ /dev/null @@ -1,2 +0,0 @@ -*/node_modules -*.log diff --git a/pytorch3d/website/.gitignore b/pytorch3d/website/.gitignore deleted file mode 100644 index 64150c13c1bfcfc78adc4750ab3d9ef97f2454e4..0000000000000000000000000000000000000000 --- a/pytorch3d/website/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -.DS_Store - -node_modules - -lib/core/metadata.js -lib/core/MetadataBlog.js - -website/translated_docs -website/build/ -website/yarn.lock -website/node_modules -website/i18n/* -website/_tutorials/* diff --git a/pytorch3d/website/README.md b/pytorch3d/website/README.md deleted file mode 100644 index bfd420afe376fcee68a8214f4995b3a5ceeab8cd..0000000000000000000000000000000000000000 --- a/pytorch3d/website/README.md +++ /dev/null @@ -1,265 +0,0 @@ -This website was created with [Docusaurus](https://docusaurus.io/). - -# Building the PyTorch3D website - -## Install - -1. Make sure all the dependencies for the website are installed: - -```sh -# Install dependencies -$ yarn - -or - -$ npm install docusaurus-init -``` - -2. Run your dev server: - -```sh -# Start the site -$ yarn start - -or -$ ./node_modules/docusaurus/lib/start-server.js -``` - -## Build the tutorials - -We convert the ipython notebooks to html using `parse_tutorials.py` which is found in the scripts folder at the root of the PyTorch3D directory. - -Before running this script install the following dependencies: - -``` -pip install nbformat==4.4.0 nbconvert==5.3.1 ipywidgets==7.5.1 tornado==4.2 bs4 -``` - -Install yarn: - -``` -brew install yarn - -# or - -curl -o- -L https://yarnpkg.com/install.sh | bash -``` - -Then run the build script: - -``` -bash scripts/build_website.sh -``` - -This will build the docusaurus website and run a script to parse the tutorials and generate: -- `.html` files in the `website/_tutorials` folder -- `.js` files in the `website/pages/tutorials` folder -- `.py`/`.ipynb` files in the `website/static/files` folder - - -TODO: Add support for latex in markdown in jupyter notebooks and embedded images. - -## Build and publish the website - -To update for a new version, you need to build the tutorials and the website and push to the gh-pages -branch of `github.com/facebookresearch/pytorch3d`. The instructions in `scripts/publish_website.sh` -bring it all together. - -## Add a new tutorial - -The tutorials to include in the website are listed in `website/tutorials.json`. If you create a new tutorial add an entry to the list in this file. This is needed in order to generate the sidebar for the tutorials page. - - -## Edit the landing page - -To change the content of the landing page modify: `website/pages/en/index.js`. - - -## Edit the tutorials page - -To change the content of the tutorials home page modify: `website/pages/tutorials/index.js`. - - ---------------------------------------------------------- - -## Docusaurus docs - -- [Get Started in 5 Minutes](#get-started-in-5-minutes) -- [Directory Structure](#directory-structure) -- [Editing Content](#editing-content) -- [Adding Content](#adding-content) -- [Full Documentation](#full-documentation) - - -## Directory Structure - -Your project file structure should look something like this - -``` -my-docusaurus/ - docs/ - doc-1.md - doc-2.md - doc-3.md - website/ - blog/ - 2016-3-11-oldest-post.md - 2017-10-24-newest-post.md - core/ - node_modules/ - pages/ - static/ - css/ - img/ - package.json - sidebars.json - siteConfig.js -``` - -# Editing Content - -## Editing an existing docs page - -Edit docs by navigating to `docs/` and editing the corresponding document: - -`docs/doc-to-be-edited.md` - -```markdown ---- -id: page-needs-edit -title: This Doc Needs To Be Edited ---- - -Edit me... -``` - -For more information about docs, click [here](https://docusaurus.io/docs/en/navigation) - -## Editing an existing blog post - -Edit blog posts by navigating to `website/blog` and editing the corresponding post: - -`website/blog/post-to-be-edited.md` - -```markdown ---- -id: post-needs-edit -title: This Blog Post Needs To Be Edited ---- - -Edit me... -``` - -For more information about blog posts, click [here](https://docusaurus.io/docs/en/adding-blog) - -# Adding Content - -## Adding a new docs page to an existing sidebar - -1. Create the doc as a new markdown file in `/docs`, example `docs/newly-created-doc.md`: - -```md ---- -id: newly-created-doc -title: This Doc Needs To Be Edited ---- - -My new content here.. -``` - -1. Refer to that doc's ID in an existing sidebar in `website/sidebars.json`: - -```javascript -// Add newly-created-doc to the Getting Started category of docs -{ - "docs": { - "Getting Started": [ - "quick-start", - "newly-created-doc" // new doc here - ], - ... - }, - ... -} -``` - -For more information about adding new docs, click [here](https://docusaurus.io/docs/en/navigation) - -## Adding a new blog post - -1. Make sure there is a header link to your blog in `website/siteConfig.js`: - -`website/siteConfig.js` - -```javascript -headerLinks: [ - ... - { blog: true, label: 'Blog' }, - ... -] -``` - -2. Create the blog post with the format `YYYY-MM-DD-My-Blog-Post-Title.md` in `website/blog`: - -`website/blog/2018-05-21-New-Blog-Post.md` - -```markdown ---- -author: Frank Li -authorURL: https://twitter.com/foobarbaz -authorFBID: 503283835 -title: New Blog Post ---- - -Lorem Ipsum... -``` - -For more information about blog posts, click [here](https://docusaurus.io/docs/en/adding-blog) - -## Adding items to your site's top navigation bar - -1. Add links to docs, custom pages or external links by editing the headerLinks field of `website/siteConfig.js`: - -`website/siteConfig.js` - -```javascript -{ - headerLinks: [ - ... - /* you can add docs */ - { doc: 'my-examples', label: 'Examples' }, - /* you can add custom pages */ - { page: 'help', label: 'Help' }, - /* you can add external links */ - { href: 'https://github.com/facebook/docusaurus', label: 'GitHub' }, - ... - ], - ... -} -``` - -For more information about the navigation bar, click [here](https://docusaurus.io/docs/en/navigation) - -## Adding custom pages - -1. Docusaurus uses React components to build pages. The components are saved as .js files in `website/pages/en`: -1. If you want your page to show up in your navigation header, you will need to update `website/siteConfig.js` to add to the `headerLinks` element: - -`website/siteConfig.js` - -```javascript -{ - headerLinks: [ - ... - { page: 'my-new-custom-page', label: 'My New Custom Page' }, - ... - ], - ... -} -``` - -For more information about custom pages, click [here](https://docusaurus.io/docs/en/custom-pages). - -# Full Documentation - -Full documentation can be found on the [website](https://docusaurus.io/). diff --git a/pytorch3d/website/core/Footer.js b/pytorch3d/website/core/Footer.js deleted file mode 100644 index 9e8c4a79b390e342a86675309b50265ca7258d28..0000000000000000000000000000000000000000 --- a/pytorch3d/website/core/Footer.js +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -const PropTypes = require("prop-types"); -const React = require('react'); - -function SocialFooter(props) { - const repoUrl = `https://github.com/${props.config.organizationName}/${props.config.projectName}`; - return ( - - ); -} - -SocialFooter.propTypes = { - config: PropTypes.object -}; - -class Footer extends React.Component { - docUrl(doc, language) { - const baseUrl = this.props.config.baseUrl; - const docsUrl = this.props.config.docsUrl; - const docsPart = `${docsUrl ? `${docsUrl}/` : ''}`; - const langPart = `${language ? `${language}/` : ''}`; - return `${baseUrl}${docsPart}${langPart}${doc}`; - } - - pageUrl(doc, language) { - const baseUrl = this.props.config.baseUrl; - return baseUrl + (language ? `${language}/` : '') + doc; - } - - render() { - const repoUrl = `https://github.com/${this.props.config.organizationName}/${this.props.config.projectName}`; - return ( - - ); - } -} - -module.exports = Footer; diff --git a/pytorch3d/website/core/Tutorial.js b/pytorch3d/website/core/Tutorial.js deleted file mode 100644 index 866fa6e34b938ae7bd7e2df16a2845887c5bc3fd..0000000000000000000000000000000000000000 --- a/pytorch3d/website/core/Tutorial.js +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - * - * @format - */ - -const React = require('react'); - -const fs = require('fs-extra'); -const path = require('path'); -const CWD = process.cwd(); - -const CompLibrary = require(`${CWD}/node_modules/docusaurus/lib/core/CompLibrary.js`); -const Container = CompLibrary.Container; - -const TutorialSidebar = require(`${CWD}/core/TutorialSidebar.js`); - -function renderDownloadIcon() { - return ( - - ); -} - -class Tutorial extends React.Component { - render() { - const {baseUrl, tutorialID} = this.props; - - const htmlFile = `${CWD}/_tutorials/${tutorialID}.html`; - const normalizedHtmlFile = path.normalize(htmlFile); - - return ( -
- - - -
- -
- ); - } -} - -module.exports = Tutorial; diff --git a/pytorch3d/website/core/TutorialSidebar.js b/pytorch3d/website/core/TutorialSidebar.js deleted file mode 100644 index b53c683de9368bcbdfa4bf84a98698fcd65c311d..0000000000000000000000000000000000000000 --- a/pytorch3d/website/core/TutorialSidebar.js +++ /dev/null @@ -1,93 +0,0 @@ -/** - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - * - * @format - */ - -const React = require('react'); -const fs = require('fs-extra'); -const path = require('path'); -const join = path.join; -const CWD = process.cwd(); - -const CompLibrary = require(join( - CWD, - '/node_modules/docusaurus/lib/core/CompLibrary.js', -)); -const SideNav = require(join( - CWD, - '/node_modules/docusaurus/lib/core/nav/SideNav.js', -)); - -const Container = CompLibrary.Container; - -const OVERVIEW_ID = 'tutorial_overview'; - -class TutorialSidebar extends React.Component { - render() { - const {currentTutorialID} = this.props; - const current = { - id: currentTutorialID || OVERVIEW_ID, - }; - - const toc = [ - { - type: 'CATEGORY', - title: 'Tutorials', - children: [ - { - type: 'LINK', - item: { - permalink: 'tutorials/', - id: OVERVIEW_ID, - title: 'Overview', - }, - }, - ], - }, - ]; - - const jsonFile = join(CWD, 'tutorials.json'); - const normJsonFile = path.normalize(jsonFile); - const json = JSON.parse(fs.readFileSync(normJsonFile, {encoding: 'utf8'})); - - Object.keys(json).forEach(category => { - const categoryItems = json[category]; - const items = []; - categoryItems.map(item => { - items.push({ - type: 'LINK', - item: { - permalink: `tutorials/${item.id}`, - id: item.id, - title: item.title, - }, - }); - }); - - toc.push({ - type: 'CATEGORY', - title: category, - children: items, - }); - }); - - return ( - - - - ); - } -} - -module.exports = TutorialSidebar; diff --git a/pytorch3d/website/package.json b/pytorch3d/website/package.json deleted file mode 100644 index 1f87392a9d8236e74a6de6a90608d201cf8a44db..0000000000000000000000000000000000000000 --- a/pytorch3d/website/package.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "scripts": { - "examples": "docusaurus-examples", - "start": "docusaurus-start", - "build": "docusaurus-build", - "publish-gh-pages": "docusaurus-publish", - "write-translations": "docusaurus-write-translations", - "version": "docusaurus-version", - "rename-version": "docusaurus-rename-version" - }, - "devDependencies": { - "docusaurus": "^1.14.4" - } -} diff --git a/pytorch3d/website/pages/en/help.js b/pytorch3d/website/pages/en/help.js deleted file mode 100644 index 323ba7123f725bf4358b7a3a18879bf7a7052d1a..0000000000000000000000000000000000000000 --- a/pytorch3d/website/pages/en/help.js +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -const React = require('react'); - -const CompLibrary = require('../../core/CompLibrary.js'); - -const Container = CompLibrary.Container; -const GridBlock = CompLibrary.GridBlock; - -function Help(props) { - const {config: siteConfig, language = ''} = props; - const {baseUrl, docsUrl} = siteConfig; - const docsPart = `${docsUrl ? `${docsUrl}/` : ''}`; - const langPart = `${language ? `${language}/` : ''}`; - const docUrl = doc => `${baseUrl}${docsPart}${langPart}${doc}`; - - const supportLinks = [ - { - content: `Learn more using the [documentation on this site.](${docUrl( - 'doc1.html', - )})`, - title: 'Browse Docs', - }, - { - content: 'Ask questions about the documentation and project', - title: 'Join the community', - }, - { - content: "Find out what's new with this project", - title: 'Stay up to date', - }, - ]; - - return ( -
- -
-
-

Need help?

-
-

This project is maintained by a dedicated group of people.

- -
-
-
- ); -} - -module.exports = Help; diff --git a/pytorch3d/website/pages/en/index.js b/pytorch3d/website/pages/en/index.js deleted file mode 100644 index 59afb0f6d9bd07c7abe93a8e9ca0bf35e4f70cf8..0000000000000000000000000000000000000000 --- a/pytorch3d/website/pages/en/index.js +++ /dev/null @@ -1,240 +0,0 @@ -/** - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -const React = require('react'); - -const CompLibrary = require('../../core/CompLibrary.js'); - -const MarkdownBlock = CompLibrary.MarkdownBlock; /* Used to read markdown */ -const Container = CompLibrary.Container; -const GridBlock = CompLibrary.GridBlock; -const bash = (...args) => `~~~bash\n${String.raw(...args)}\n~~~`; -class HomeSplash extends React.Component { - render() { - const {siteConfig, language = ''} = this.props; - const {baseUrl, docsUrl} = siteConfig; - const docsPart = `${docsUrl ? `${docsUrl}/` : ''}`; - const langPart = `${language ? `${language}/` : ''}`; - const docUrl = doc => `${baseUrl}${docsPart}${langPart}${doc}`; - - const SplashContainer = props => ( -
-
-
{props.children}
-
-
- ); - - const Logo = props => ( -
- Project Logo -
- ); - - const ProjectTitle = props => ( -

- {props.tagline} -

- ); - - const PromoSection = props => ( -
-
-
{props.children}
-
-
- ); - - const Button = props => ( - - ); - - return ( - - -
- - - - - - -
-
- ); - } -} - -function SocialBanner() { - return ( -
-
- Support Ukraine πŸ‡ΊπŸ‡¦{' '} - - Help Provide Humanitarian Aid to Ukraine - - . -
-
- ); -} - -class Index extends React.Component { - render() { - const {config: siteConfig, language = ''} = this.props; - const {baseUrl} = siteConfig; - - const Block = props => ( - - - - ); - - const Description = () => ( - - {[ - { - content: - 'This is another description of how this project is useful', - image: `${baseUrl}img/docusaurus.svg`, - imageAlign: 'right', - title: 'Description', - }, - ]} - - ); - - const pre = '```'; - - const codeExample = `${pre}python -from pytorch3d.utils import ico_sphere -from pytorch3d.io import load_obj -from pytorch3d.structures import Meshes -from pytorch3d.ops import sample_points_from_meshes -from pytorch3d.loss import chamfer_distance - -# Use an ico_sphere mesh and load a mesh from an .obj e.g. model.obj -sphere_mesh = ico_sphere(level=3) -verts, faces, _ = load_obj("model.obj") -test_mesh = Meshes(verts=[verts], faces=[faces.verts_idx]) - -# Differentiably sample 5k points from the surface of each mesh and then compute the loss. -sample_sphere = sample_points_from_meshes(sphere_mesh, 5000) -sample_test = sample_points_from_meshes(test_mesh, 5000) -loss_chamfer, _ = chamfer_distance(sample_sphere, sample_test) - `; - - const QuickStart = () => ( -
-

Get Started

- -
    -
  1. - Install PyTorch3D (following the instructions here) -
  2. -
  3. - Try a few 3D operators - e.g. compute the chamfer loss between two meshes: - {codeExample} -
  4. -
-
-
- ); - - const Features = () => ( -
- - {[ - { - content: - 'Supports batching of 3D inputs of different sizes ' + - 'such as meshes' , - image: `${baseUrl}img/batching.svg`, - imageAlign: 'top', - title: 'Heterogeneous Batching', - }, - { - content: - 'Supports optimized implementations of ' + - 'several common functions for 3D data', - image: `${baseUrl}img/ops.png`, - imageAlign: 'top', - title: 'Fast 3D Operators', - }, - { - content: - 'Modular differentiable rendering API ' + - 'with parallel implementations in ' + - 'PyTorch, C++ and CUDA' , - image: `${baseUrl}img/rendering.svg`, - imageAlign: 'top', - title: 'Differentiable Rendering', - }, - ]} - -
- ); - - const Showcase = () => { - if ((siteConfig.users || []).length === 0) { - return null; - } - - const showcase = siteConfig.users - .filter(user => user.pinned) - .map(user => ( - - {user.caption} - - )); - - const pageUrl = page => baseUrl + (language ? `${language}/` : '') + page; - - return ( -
-

Who is Using This?

-

This project is used by all these people

-
{showcase}
- -
- ); - }; - - return ( -
- - -
- - -
-
- ); - } -} - -module.exports = Index; diff --git a/pytorch3d/website/pages/en/users.js b/pytorch3d/website/pages/en/users.js deleted file mode 100644 index 2439c3eefb24bd80a554d547ea0629c741b992ed..0000000000000000000000000000000000000000 --- a/pytorch3d/website/pages/en/users.js +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -const React = require('react'); - -const CompLibrary = require('../../core/CompLibrary.js'); - -const Container = CompLibrary.Container; - -class Users extends React.Component { - render() { - const {config: siteConfig} = this.props; - if ((siteConfig.users || []).length === 0) { - return null; - } - - const editUrl = `${siteConfig.repoUrl}/edit/main/website/siteConfig.js`; - const showcase = siteConfig.users.map(user => ( - - {user.caption} - - )); - - return ( -
- -
-
-

Who is Using This?

-

This project is used by many folks

-
-
{showcase}
-

Are you using this project?

- - Add your company - -
-
-
- ); - } -} - -module.exports = Users; diff --git a/pytorch3d/website/pages/tutorials/index.js b/pytorch3d/website/pages/tutorials/index.js deleted file mode 100644 index 1a97c2b343763133d23d7a2eeb59d5d5040b3142..0000000000000000000000000000000000000000 --- a/pytorch3d/website/pages/tutorials/index.js +++ /dev/null @@ -1,83 +0,0 @@ -/** - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - * - * @format - */ - -const React = require('react'); - -const CWD = process.cwd(); - -const CompLibrary = require(`${CWD}/node_modules/docusaurus/lib/core/CompLibrary.js`); -const Container = CompLibrary.Container; -const MarkdownBlock = CompLibrary.MarkdownBlock; - -const TutorialSidebar = require(`${CWD}/core/TutorialSidebar.js`); -const bash = (...args) => `~~~bash\n${String.raw(...args)}\n~~~`; - -class TutorialHome extends React.Component { - render() { - return ( -
- - -
-
-

- Welcome to the PyTorch3D Tutorials -

-
-

- Here you can learn about the structure and applications of - PyTorch3D from examples which are in the form of ipython - notebooks. -

-

Run interactively

-

- At the top of each example you can find a button named{' '} - "Run in Google Colab" which will open the - notebook in{' '} - - {' '} - Google Colaboratory{' '} - {' '} - where you can run the code directly in the browser with access to - GPU support - it looks like this: -

- -

- {' '} - You can modify the code and experiment with varying different - settings. Remember to install the latest stable version of - PyTorch3D and its dependencies. Code to do this with pip is - provided in each notebook.{' '} -

-

Run locally

-

- {' '} - There is also a button to download the notebook and source code to - run it locally.{' '} -

-
-
-
- ); - } -} - -module.exports = TutorialHome; diff --git a/pytorch3d/website/sidebars.json b/pytorch3d/website/sidebars.json deleted file mode 100644 index 92932fbac354d801883a1ade884bed9792f799b5..0000000000000000000000000000000000000000 --- a/pytorch3d/website/sidebars.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "docs": { - "Introduction": ["why_pytorch3d"], - "Data": ["io", "meshes_io", "datasets", "batching"], - "Ops": ["cubify", "iou3d"], - "Visualization": ["visualization"], - "Renderer": ["renderer", "renderer_getting_started", "cameras"] - } -} diff --git a/pytorch3d/website/siteConfig.js b/pytorch3d/website/siteConfig.js deleted file mode 100644 index 98e6225d6d92ee49e3a37e936c4481f2571a9092..0000000000000000000000000000000000000000 --- a/pytorch3d/website/siteConfig.js +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -// @licenselint-loose-mode - -// See https://docusaurus.io/docs/site-config for all the possible -// site configuration options. - -// List of projects/orgs using your project for the users page. -const users = [ - { - caption: 'User1', - // You will need to prepend the image path with your baseUrl - // if it is not '/', like: '/test-site/img/image.jpg'. - image: '/img/undraw_open_source.svg', - infoLink: 'https://www.facebook.com', - pinned: true, - }, -]; - -const baseUrl = '/' - -const siteConfig = { - title: 'PyTorch3D', // Title for your website. - tagline: 'A library for deep learning with 3D data', - url: 'https://pytorch3d.org', // Your website URL - baseUrl: baseUrl, // Base URL for your project */ - projectName: 'pytorch3d', - organizationName: 'facebookresearch', - customDocsPath: 'docs/notes', - headerLinks: [ - {doc: 'why_pytorch3d', label: 'Docs'}, - {page: 'tutorials', label: 'Tutorials'}, - {href: "https://pytorch3d.readthedocs.io/", label: 'API'}, - {href: "https://github.com/facebookresearch/pytorch3d", label: 'GitHub'}, - ], - - // If you have users set above, you add it here: - users, - - /* path to images for header/footer */ - headerIcon: 'img/pytorch3dfavicon.png', - footerIcon: 'img/pytorch3dfavicon.png', - favicon: 'img/pytorch3dfavicon.png', - - /* Colors for website */ - colors: { - primaryColor: '#812CE5', - secondaryColor: '#FFAF00', - }, - - // This copyright info is used in /core/Footer.js and blog RSS/Atom feeds. - copyright: `Copyright \u{00A9} ${new Date().getFullYear()} Meta Platforms, Inc`, - - highlight: { - // Highlight.js theme to use for syntax highlighting in code blocks. - theme: 'default', - }, - - // Add custom scripts here that would be placed in